ActionExtractor

class ActionExtractor(BaseExtractorStrategy):

A strategy for extracting RLCard actions from text.

init

def __init__(self, action_pattern: str = '<Action>\\s*(.+)'):

Initialize the action extractor with a regex pattern.

Parameters:

  • action_pattern (str): The regex pattern to extract actions. (default: :obj:"<Action>\\s*(.+)").

RLCardsEnv

class RLCardsEnv(MultiStepEnv):

A base environment for RLCard games.

This environment implements a wrapper around RLCard environments for reinforcement learning with LLMs. It handles the conversion between RLCard states and actions and the CAMEL environment interface.

init

def __init__(
    self,
    game_name: str,
    extractor: Optional[BaseExtractor] = None,
    max_steps: Optional[int] = None,
    num_players: int = 2,
    **kwargs
):

Initialize the RLCard environment.

Parameters:

  • game_name (str): The name of the RLCard game to play.
  • extractor (Optional[BaseExtractor]): Extractor to process LLM responses. If None, a default extractor with ActionExtractor will be used. (default: :obj:None)
  • max_steps (Optional[int]): Maximum steps per episode. (default: :obj:None)
  • num_players (int): Number of players in the game. (default: :obj:2) **kwargs: Additional environment parameters.

_get_initial_state

def _get_initial_state(self):

Returns:

Dict[str, Any]: A dictionary containing the initial state with game state, player info, and game status flags.

_get_next_observation

def _get_next_observation(self):

Returns:

Observation: An Observation object containing the game state description.

_get_terminal_observation

def _get_terminal_observation(self):

Returns:

Observation: An Observation object containing the final game state description.

_is_done

def _is_done(self):

Returns:

bool: True if the game is over, False otherwise.

_convert_to_rlcard_action

def _convert_to_rlcard_action(self, action_str: str):

Convert a string action to the format expected by RLCard.

This method must be implemented by subclasses to handle the specific action format of each game.

Parameters:

  • action_str (str): The string representation of the action.

Returns:

Any: The action in the format expected by the RLCard environment.

_format_state_for_observation

def _format_state_for_observation(self, state: Dict[str, Any]):

Format the RLCard state for human-readable observation.

This method must be implemented by subclasses to create a human-readable representation of the game state.

Parameters:

  • state (Dict[str, Any]): The RLCard state dictionary.

Returns:

str: A human-readable representation of the state.

def _format_legal_actions(self, legal_actions: List[Any]):

Format the legal actions for human-readable observation.

This method must be implemented by subclasses to create a human-readable representation of the legal actions.

Parameters:

  • legal_actions (List[Any]): The list of legal actions.

Returns:

str: A human-readable representation of the legal actions.

BlackjackEnv

class BlackjackEnv(RLCardsEnv):

A Blackjack environment for reinforcement learning with LLMs.

This environment implements a standard Blackjack game where the LLM agent plays against a dealer.

init

def __init__(
    self,
    extractor: Optional[BaseExtractor] = None,
    max_steps: Optional[int] = None,
    **kwargs
):

Initialize the Blackjack environment.

Parameters:

  • extractor (Optional[BaseExtractor]): Extractor to process LLM responses. If None, a default extractor will be used. (default: :obj:None)
  • max_steps (Optional[int]): Maximum steps per episode. (default: :obj:None) **kwargs: Additional environment parameters.

_convert_to_rlcard_action

def _convert_to_rlcard_action(self, action_str: str):

Convert a string action to the format expected by RLCard Blackjack.

Parameters:

  • action_str (str): The string representation of the action. Expected to be ‘hit’ or ‘stand’.

Returns:

int: 0 for ‘hit’, 1 for ‘stand’.

_format_state_for_observation

def _format_state_for_observation(self, state: Dict[str, Any]):

Format the Blackjack state for human-readable observation.

Parameters:

  • state (Dict[str, Any]): The RLCard state dictionary.

Returns:

str: A human-readable representation of the state.

def _format_legal_actions(self, legal_actions: List[int]):

Format the legal actions for Blackjack.

Parameters:

  • legal_actions (List[int]): The list of legal actions.

Returns:

str: A human-readable representation of the legal actions.

_format_cards

def _format_cards(self, cards: List[str]):

Format a list of cards for display.

Parameters:

  • cards (List[str]): List of card strings.

Returns:

str: Formatted card string.

_calculate_hand_value

def _calculate_hand_value(self, cards: List[str]):

Calculate the value of a hand in Blackjack.

Parameters:

  • cards (List[str]): List of card strings.

Returns:

int: The value of the hand.

LeducHoldemEnv

class LeducHoldemEnv(RLCardsEnv):

A Leduc Hold’em environment for reinforcement learning with LLMs.

This environment implements a Leduc Hold’em poker game where the LLM agent plays against one or more opponents.

init

def __init__(
    self,
    extractor: Optional[BaseExtractor] = None,
    max_steps: Optional[int] = None,
    num_players: int = 2,
    **kwargs
):

Initialize the Leduc Hold’em environment.

Parameters:

  • extractor (Optional[BaseExtractor]): Extractor to process LLM responses. If None, a default extractor will be used. (default: :obj:None)
  • max_steps (Optional[int]): Maximum steps per episode. (default: :obj:None)
  • num_players (int): Number of players in the game. (default: :obj:2) **kwargs: Additional environment parameters.

_convert_to_rlcard_action

def _convert_to_rlcard_action(self, action_str: str):

Convert a string action to the format expected by RLCard Leduc Hold’em.

Parameters:

  • action_str (str): The string representation of the action. Expected to be ‘fold’, ‘check’, ‘call’, or ‘raise’.

Returns:

int: 0 for ‘fold’, 1 for ‘check/call’, 2 for ‘raise’.

_format_state_for_observation

def _format_state_for_observation(self, state: Dict[str, Any]):

Format the Leduc Hold’em state for human-readable observation.

Parameters:

  • state (Dict[str, Any]): The RLCard state dictionary.

Returns:

str: A human-readable representation of the state.

def _format_legal_actions(self, legal_actions: List[int]):

Format the legal actions for Leduc Hold’em.

Parameters:

  • legal_actions (List[int]): The list of legal actions.

Returns:

str: A human-readable representation of the legal actions.

DoudizhuEnv

class DoudizhuEnv(RLCardsEnv):

A Doudizhu environment for reinforcement learning with LLMs.

This environment implements a standard Doudizhu game where the LLM agent plays against two AI opponents.

init

def __init__(
    self,
    extractor: Optional[BaseExtractor] = None,
    max_steps: Optional[int] = None,
    **kwargs
):

Initialize the Doudizhu environment.

Parameters:

  • extractor (Optional[BaseExtractor]): Extractor to process LLM responses. If None, a default extractor will be used. (default: :obj:None)
  • max_steps (Optional[int]): Maximum steps per episode. (default: :obj:None) **kwargs: Additional environment parameters.

_convert_to_rlcard_action

def _convert_to_rlcard_action(self, action_str: str):

Convert a string action to the format expected by RLCard Doudizhu.

Parameters:

  • action_str (str): The string representation of the action. Expected to be a card combination or ‘pass’.

Returns:

str: The action string in the format expected by RLCard.

_format_state_for_observation

def _format_state_for_observation(self, state: Dict[str, Any]):

Format the Doudizhu state for human-readable observation.

Parameters:

  • state (Dict[str, Any]): The RLCard state dictionary.

Returns:

str: A human-readable representation of the state.

def _format_legal_actions(self, legal_actions: List[str]):

Format the legal actions for Doudizhu.

Parameters:

  • legal_actions (List[str]): The list of legal actions.

Returns:

str: A human-readable representation of the legal actions.

_format_cards

def _format_cards(self, cards: List[str]):

Format a list of cards for display.

Parameters:

  • cards (List[str]): List of card strings.

Returns:

str: Formatted card string.