Documentation Index
Fetch the complete documentation index at: https://docs.camel-ai.org/llms.txt
Use this file to discover all available pages before exploring further.
class ActionExtractor(BaseExtractorStrategy):
A strategy for extracting RLCard actions from text.
init
def __init__(self, action_pattern: str = '<Action>\\s*(.+)'):
Initialize the action extractor with a regex pattern.
Parameters:
- action_pattern (str): The regex pattern to extract actions. (default: :obj:
"<Action>\\s*(.+)").
RLCardsEnv
class RLCardsEnv(MultiStepEnv):
A base environment for RLCard games.
This environment implements a wrapper around RLCard environments for
reinforcement learning with LLMs. It handles the conversion between
RLCard states and actions and the CAMEL environment interface.
init
def __init__(
self,
game_name: str,
extractor: Optional[BaseExtractor] = None,
max_steps: Optional[int] = None,
num_players: int = 2,
**kwargs
):
Initialize the RLCard environment.
Parameters:
- game_name (str): The name of the RLCard game to play.
- extractor (Optional[BaseExtractor]): Extractor to process LLM responses. If None, a default extractor with ActionExtractor will be used. (default: :obj:
None)
- max_steps (Optional[int]): Maximum steps per episode. (default: :obj:
None)
- num_players (int): Number of players in the game. (default: :obj:
2) **kwargs: Additional environment parameters.
_get_initial_state
def _get_initial_state(self):
Returns:
Dict[str, Any]: A dictionary containing the initial state with
game state, player info, and game status flags.
_get_next_observation
def _get_next_observation(self):
Returns:
Observation: An Observation object containing the game state
description.
_get_terminal_observation
def _get_terminal_observation(self):
Returns:
Observation: An Observation object containing the final game state
description.
_is_done
Returns:
bool: True if the game is over, False otherwise.
_convert_to_rlcard_action
def _convert_to_rlcard_action(self, action_str: str):
Convert a string action to the format expected by RLCard.
This method must be implemented by subclasses to handle the specific
action format of each game.
Parameters:
- action_str (str): The string representation of the action.
Returns:
Any: The action in the format expected by the RLCard environment.
def _format_state_for_observation(self, state: Dict[str, Any]):
Format the RLCard state for human-readable observation.
This method must be implemented by subclasses to create a
human-readable representation of the game state.
Parameters:
- state (Dict[str, Any]): The RLCard state dictionary.
Returns:
str: A human-readable representation of the state.
def _format_legal_actions(self, legal_actions: List[Any]):
Format the legal actions for human-readable observation.
This method must be implemented by subclasses to create a
human-readable representation of the legal actions.
Parameters:
- legal_actions (List[Any]): The list of legal actions.
Returns:
str: A human-readable representation of the legal actions.
BlackjackEnv
class BlackjackEnv(RLCardsEnv):
A Blackjack environment for reinforcement learning with LLMs.
This environment implements a standard Blackjack game where the LLM agent
plays against a dealer.
init
def __init__(
self,
extractor: Optional[BaseExtractor] = None,
max_steps: Optional[int] = None,
**kwargs
):
Initialize the Blackjack environment.
Parameters:
- extractor (Optional[BaseExtractor]): Extractor to process LLM responses. If None, a default extractor will be used. (default: :obj:
None)
- max_steps (Optional[int]): Maximum steps per episode. (default: :obj:
None) **kwargs: Additional environment parameters.
_convert_to_rlcard_action
def _convert_to_rlcard_action(self, action_str: str):
Convert a string action to the format expected by RLCard Blackjack.
Parameters:
- action_str (str): The string representation of the action. Expected to be ‘hit’ or ‘stand’.
Returns:
int: 0 for ‘hit’, 1 for ‘stand’.
def _format_state_for_observation(self, state: Dict[str, Any]):
Format the Blackjack state for human-readable observation.
Parameters:
- state (Dict[str, Any]): The RLCard state dictionary.
Returns:
str: A human-readable representation of the state.
def _format_legal_actions(self, legal_actions: List[int]):
Format the legal actions for Blackjack.
Parameters:
- legal_actions (List[int]): The list of legal actions.
Returns:
str: A human-readable representation of the legal actions.
def _format_cards(self, cards: List[str]):
Format a list of cards for display.
Parameters:
- cards (List[str]): List of card strings.
Returns:
str: Formatted card string.
_calculate_hand_value
def _calculate_hand_value(self, cards: List[str]):
Calculate the value of a hand in Blackjack.
Parameters:
- cards (List[str]): List of card strings.
Returns:
int: The value of the hand.
LeducHoldemEnv
class LeducHoldemEnv(RLCardsEnv):
A Leduc Hold’em environment for reinforcement learning with LLMs.
This environment implements a Leduc Hold’em poker game where the LLM agent
plays against one or more opponents.
init
def __init__(
self,
extractor: Optional[BaseExtractor] = None,
max_steps: Optional[int] = None,
num_players: int = 2,
**kwargs
):
Initialize the Leduc Hold’em environment.
Parameters:
- extractor (Optional[BaseExtractor]): Extractor to process LLM responses. If None, a default extractor will be used. (default: :obj:
None)
- max_steps (Optional[int]): Maximum steps per episode. (default: :obj:
None)
- num_players (int): Number of players in the game. (default: :obj:
2) **kwargs: Additional environment parameters.
_convert_to_rlcard_action
def _convert_to_rlcard_action(self, action_str: str):
Convert a string action to the format expected by RLCard
Leduc Hold’em.
Parameters:
- action_str (str): The string representation of the action. Expected to be ‘fold’, ‘check’, ‘call’, or ‘raise’.
Returns:
int: 0 for ‘fold’, 1 for ‘check/call’, 2 for ‘raise’.
def _format_state_for_observation(self, state: Dict[str, Any]):
Format the Leduc Hold’em state for human-readable observation.
Parameters:
- state (Dict[str, Any]): The RLCard state dictionary.
Returns:
str: A human-readable representation of the state.
def _format_legal_actions(self, legal_actions: List[int]):
Format the legal actions for Leduc Hold’em.
Parameters:
- legal_actions (List[int]): The list of legal actions.
Returns:
str: A human-readable representation of the legal actions.
DoudizhuEnv
class DoudizhuEnv(RLCardsEnv):
A Doudizhu environment for reinforcement learning with LLMs.
This environment implements a standard Doudizhu game where the LLM agent
plays against two AI opponents.
init
def __init__(
self,
extractor: Optional[BaseExtractor] = None,
max_steps: Optional[int] = None,
**kwargs
):
Initialize the Doudizhu environment.
Parameters:
- extractor (Optional[BaseExtractor]): Extractor to process LLM responses. If None, a default extractor will be used. (default: :obj:
None)
- max_steps (Optional[int]): Maximum steps per episode. (default: :obj:
None) **kwargs: Additional environment parameters.
_convert_to_rlcard_action
def _convert_to_rlcard_action(self, action_str: str):
Convert a string action to the format expected by RLCard Doudizhu.
Parameters:
- action_str (str): The string representation of the action. Expected to be a card combination or ‘pass’.
Returns:
str: The action string in the format expected by RLCard.
def _format_state_for_observation(self, state: Dict[str, Any]):
Format the Doudizhu state for human-readable observation.
Parameters:
- state (Dict[str, Any]): The RLCard state dictionary.
Returns:
str: A human-readable representation of the state.
def _format_legal_actions(self, legal_actions: List[str]):
Format the legal actions for Doudizhu.
Parameters:
- legal_actions (List[str]): The list of legal actions.
Returns:
str: A human-readable representation of the legal actions.
def _format_cards(self, cards: List[str]):
Format a list of cards for display.
Parameters:
- cards (List[str]): List of card strings.
Returns:
str: Formatted card string.