Source code for camel.environments.models

# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========

from datetime import datetime, timezone
from typing import Any, Dict, Optional, Protocol, Tuple

from pydantic import BaseModel, Field


[docs] class Action(BaseModel): r"""Represents an action taken in an environment. This class defines the input context, the LLM-generated output, and metadata required for verification and tracking within an RL framework. Attributes: llm_response (str): The response generated by the LLM. metadata (Dict[str, Any]): Additional metadata such as model parameters, prompt details, or response confidence scores. timestamp (datetime): The timestamp when the action was generated (UTC). """ index: Optional[int] = Field(default=None, description="...") llm_response: str = Field(description="Generated response from the LLM") metadata: Dict[str, Any] = Field( default_factory=dict, description="Additional metadata about the generation", ) timestamp: datetime = Field( default_factory=lambda: datetime.now(timezone.utc), description="When the response was generated (UTC)", )
[docs] class Observation(BaseModel): r"""Environment observation. Attributes: question: The question posed to the LLM. context: Additional context for the question. metadata: Optional metadata about the observation. """ question: str = Field(..., description="The question posed to the LLM") context: Dict[str, Any] = Field( default_factory=dict, description="Additional context for the question" ) metadata: Optional[Dict[str, Any]] = Field( default=None, description="Optional metadata about the observation" )
[docs] class StepResult(BaseModel): r"""Result of an environment step. Attributes: observation: The next observation. reward: Dictionary of reward scores for different aspects. done: Whether the episode is complete. info: Additional information about the step. """ observation: Observation = Field(..., description="The next observation") reward: float = Field(..., description="Total reward of the action") rewards_dict: Dict[str, float] = Field( default_factory=dict, description="Dictionary of reward scores for different aspects", ) done: bool = Field(..., description="Whether the episode is complete") info: Dict[str, Any] = Field( default_factory=dict, description="Additional information about the step", )
[docs] def as_tuple( self, ) -> Tuple[Observation, float, bool, Dict[str, Any]]: r"""Returns all fields of the model as a tuple, in declaration order""" self.info["rewards_dict"] = self.rewards_dict return (self.observation, self.reward, self.done, self.info)
[docs] class Environment(Protocol):
[docs] async def reset(self) -> Observation: r"""Reset the environment to an initial state. Returns: Initial observation for the episode """ ...
[docs] async def step(self, action: Action) -> StepResult: r"""Take a step in the environment. Args: action: Action containing everything that is needed to progress in the environment Returns: StepResult containing next observation, reward, done flag, and info """ ...
[docs] async def close(self) -> None: r"""Perform a full cleanup of all environment resources.""" ...