# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
from datetime import datetime, timezone
from typing import Any, Dict, Optional, Protocol, Tuple
from pydantic import BaseModel, Field
[docs]
class Action(BaseModel):
r"""Represents an action taken in an environment.
This class defines the input context, the LLM-generated output, and
metadata required for verification and tracking within an RL
framework.
Attributes:
llm_response (str): The response generated by the LLM.
metadata (Dict[str, Any]): Additional metadata such as model
parameters, prompt details, or response confidence scores.
timestamp (datetime): The timestamp when the action was
generated (UTC).
"""
index: Optional[int] = Field(default=None, description="...")
llm_response: str = Field(description="Generated response from the LLM")
metadata: Dict[str, Any] = Field(
default_factory=dict,
description="Additional metadata about the generation",
)
timestamp: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc),
description="When the response was generated (UTC)",
)
[docs]
class Observation(BaseModel):
r"""Environment observation.
Attributes:
question: The question posed to the LLM.
context: Additional context for the question.
metadata: Optional metadata about the observation.
"""
question: str = Field(..., description="The question posed to the LLM")
context: Dict[str, Any] = Field(
default_factory=dict, description="Additional context for the question"
)
metadata: Optional[Dict[str, Any]] = Field(
default=None, description="Optional metadata about the observation"
)
[docs]
class StepResult(BaseModel):
r"""Result of an environment step.
Attributes:
observation: The next observation.
reward: Dictionary of reward scores for different aspects.
done: Whether the episode is complete.
info: Additional information about the step.
"""
observation: Observation = Field(..., description="The next observation")
reward: float = Field(..., description="Total reward of the action")
rewards_dict: Dict[str, float] = Field(
default_factory=dict,
description="Dictionary of reward scores for different aspects",
)
done: bool = Field(..., description="Whether the episode is complete")
info: Dict[str, Any] = Field(
default_factory=dict,
description="Additional information about the step",
)
[docs]
def as_tuple(
self,
) -> Tuple[Observation, float, bool, Dict[str, Any]]:
r"""Returns all fields of the model as a tuple, in declaration order"""
self.info["rewards_dict"] = self.rewards_dict
return (self.observation, self.reward, self.done, self.info)
[docs]
class Environment(Protocol):
[docs]
async def reset(self) -> Observation:
r"""Reset the environment to an initial state.
Returns:
Initial observation for the episode
"""
...
[docs]
async def step(self, action: Action) -> StepResult:
r"""Take a step in the environment.
Args:
action: Action containing everything that is needed
to progress in the environment
Returns:
StepResult containing next observation, reward, done flag, and info
"""
...
[docs]
async def close(self) -> None:
r"""Perform a full cleanup of all environment resources."""
...