MultiStepEnv

class MultiStepEnv(ABC):

A multi-step environment for reinforcement learning with LLMs.

init

def __init__(
    self,
    extractor: BaseExtractor,
    max_steps: Optional[int] = None,
    **kwargs
):

Initialize the environment.

Parameters:

  • extractor: Extractor to process LLM responses.
  • max_steps: Maximum steps per episode. **kwargs: Additional environment parameters.

_get_initial_state

def _get_initial_state(self):

_get_next_observation

def _get_next_observation(self):

_get_terminal_observation

def _get_terminal_observation(self):

is_done

def is_done(self):

Returns:

bool: A boolean flag.

_is_done

def _is_done(self):

metadata

def metadata(self):

Returns:

Dict[str, Any]: A copy of the environment’s metadata.

current_step

def current_step(self):

Returns:

int: The number of the step we are currently in.