Source code for camel.data_collector.alpaca_collector

# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========

from typing import Any, Dict, List, Optional, Union

from typing_extensions import Self

from camel.agents import ChatAgent
from camel.data_collector.base import BaseDataCollector
from camel.messages import AlpacaItem, BaseMessage
from camel.schemas import OpenAISchemaConverter

# ruff: noqa: E501
DEFAULT_CONVERTER_PROMPTS = """
    Extract key entities and attributes from the conversations
    and convert them into a structured JSON format.
    For example:
    Instruction: You are a helpful assistant. 
    User: When is the release date of the video game Portal?
    Assistant: The release date of the video game Portal is October 9.
    Your output should be:
    {
        "instruction": "You are a helpful assistant. When is the release date of the video game Portal?",
        "input": "",
        "output": "The release date of the video game Portal is October 9."
    }
"""


[docs] class AlpacaDataCollector(BaseDataCollector): def __init__(self) -> None: super().__init__() self.system_message: Optional[BaseMessage] = None self.agent_name: Optional[str] = None
[docs] def record( self, agent: Union[List[ChatAgent], ChatAgent], ) -> Self: r"""Inject an agent into the data collector. Args: agent (Union[List[ChatAgent], ChatAgent]): The agent to inject. """ if not self.agent_name: _agent = agent if isinstance(agent, ChatAgent) else agent[0] self.agent_name = _agent.role_name self.system_message = _agent._system_message super().record(agent) return self
[docs] def convert(self) -> Dict[str, Any]: r"""Convert the collected data into a dictionary.""" if self.agent_name is None: raise ValueError("No agent injected") history = self.get_agent_history(self.agent_name) if not history: raise ValueError("No data collected.") # Validate and process history if len(history) == 3 and history[0].role == "system": history = history[1:] # Ignore the system message. elif len(history) != 2: raise ValueError( f"AlpacaDataCollector only supports one message pair, but " f"got {len(history)}" ) input_message, output_message = history instruction = ( self.system_message.content if self.system_message else "" ) + str(input_message.message) data = { "instruction": instruction, "input": "", "output": output_message.message, } self.data.append(data) return data
[docs] def llm_convert( self, converter: Optional[OpenAISchemaConverter] = None, prompt: Optional[str] = None, ) -> Dict[str, str]: r"""Convert collected data using an LLM schema converter. Args: converter (Optional[OpenAISchemaConverter], optional): The converter to use. (default: :obj:`OpenAISchemaConverter`) prompt (Optional[str], optional): Prompt to guide the conversion. (default: :obj:`DEFAULT_CONVERTER_PROMPTS`) Returns: Dict[str, str]: The converted data. Raises: ValueError: If no agent is injected or data cannot be collected. """ prompt = prompt or DEFAULT_CONVERTER_PROMPTS converter = converter or OpenAISchemaConverter() system = self.system_message.content if self.system_message else "" context = [f"Instruction: {system}\n"] for message in self.get_agent_history(str(self.agent_name)): if message.role == "user": context.append(f"User: {message.message}\n") else: context.append(f"{message.name}: {message.message}\n") return converter.convert( "\n".join(context), AlpacaItem, prompt=prompt ).model_dump()