Source code for camel.runtime.llm_guard_runtime

# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
import json
import logging
from functools import wraps
from typing import List, Optional, Union

from camel.agents import ChatAgent
from camel.configs import ChatGPTConfig
from camel.models import BaseModelBackend, ModelFactory
from camel.runtime import BaseRuntime
from camel.runtime.utils import FunctionRiskToolkit, IgnoreRiskToolkit
from camel.toolkits import FunctionTool
from camel.types import ModelPlatformType, ModelType

logger = logging.getLogger(__name__)

GUARDPROMPT = """You are a function safety evaluator tasked with assessing the 
potential risk level of a given function based on both its description 
and parameters. Your goal is to determine if the function may pose any 
harm to the user's environment, such as deleting files, executing 
arbitrary code, or accessing sensitive information. Pay special attention 
to the provided parameters — even if a function has the potential to be 
dangerous, its actual parameters may indicate harmless behavior, and the 
risk level should be adjusted accordingly. Use the `function_risk` tool to 
assign a risk score to the function, based on the following criteria:

- **Score 1**: No harm. This includes simple operations like mathematical 
    calculations, content searches, or data retrievals that do not impact 
    the user's environment or access sensitive resources. This also 
    includes functions with potentially dangerous capabilities that have 
    harmless, controlled parameters that ensure safe execution.
- **Score 2**: Minimal harm. The function might read user files, retrieve 
    non-sensitive data, or access other low-risk resources, 
    posing little risk to the user.
- **Score 3**: Risk present. The function might delete files, modify the 
    file system, execute arbitrary code, or access sensitive data, which 
    could negatively impact the user's environment. However, if the 
    actual parameters provided clearly indicate safe and restricted 
    usage, this risk should be downgraded accordingly.

When evaluating, always consider both the function's description and its 
specific parameters. If the function appears risky due to its design but 
the provided parameters indicate a safe and non-impactful operation, 
adjust the risk score to reflect this. Assign an appropriate risk score 
and provide a brief explanation of your reasoning based on the function's 
description and the actual parameters given.
YOU MUST USE THE `function_risk` TOOL TO ASSESS THE RISK 
LEVEL OF EACH FUNCTION.
"""


[docs] class LLMGuardRuntime(BaseRuntime): r"""A runtime that evaluates the risk level of functions using a language model. Arguments: prompt (str): The prompt to use for the language model. (default: :obj:`GUARDPROMPT`) model (BaseModelBackend): The language model to use. (default: :obj: `None`) verbose (bool): Whether to print verbose output. (default: :obj: `False`) """ def __init__( self, prompt: str = GUARDPROMPT, model: Optional[BaseModelBackend] = None, verbose: bool = False, ): super().__init__() self.prompt = prompt self.model = model self.verbose = verbose if not self.model: self.model = ModelFactory.create( model_platform=ModelPlatformType.DEFAULT, model_type=ModelType.DEFAULT, model_config_dict=ChatGPTConfig().as_dict(), ) self.ignore_toolkit = IgnoreRiskToolkit(verbose=verbose) self.ignore_tool = self.ignore_toolkit.get_tools()[0] self.tools_map[self.ignore_tool.get_function_name()] = self.ignore_tool self.agent = ChatAgent( system_message=self.prompt, model=self.model, external_tools=[ *FunctionRiskToolkit(verbose=verbose).get_tools(), ], )
[docs] def add( # type: ignore[override] self, funcs: Union[FunctionTool, List[FunctionTool]], threshold: int = 2, ) -> "LLMGuardRuntime": r"""Add a function or list of functions to the runtime. Args: funcs (FunctionTool or List[FunctionTool]): The function or list of functions to add. threshold (int): The risk threshold for functions. (default: :obj:`2`) Returns: LLMGuardRuntime: The current runtime. """ if not isinstance(funcs, list): funcs = [funcs] for func in funcs: inner_func = func.func # Create a wrapper that explicitly binds `func` @wraps(inner_func) def wrapper( *args, func=func, inner_func=inner_func, threshold=threshold, **kwargs, ): function_name = func.get_function_name() if function_name in self.ignore_toolkit.ignored_risks: reason = self.ignore_toolkit.ignored_risks.pop( function_name ) logger.info( f"Ignored risk for function {function_name}: {reason}" ) return inner_func(*args, **kwargs) self.agent.init_messages() resp = self.agent.step( f""" Function is: {function_name} Function description: {func.get_function_description()} Args: {args} Kwargs: {kwargs} """ ) tool_call = resp.info.get("external_tool_request", None) if not tool_call: logger.error("No tool call found in response.") return { "error": "Risk assessment failed. Disabling function." } data = tool_call.function.arguments data = json.loads(data) if threshold < data["score"]: message = ( f"Risk assessment not passed for {function_name}." f"Score: {data['score']} > Threshold: {threshold}" f"\nReason: {data['reason']}" ) logger.warning(message) return {"error": message} logger.info( ( f"Function {function_name} passed risk assessment." f"Score: {data['score']}, Reason: {data['reason']}" ) ) if self.verbose: print( ( f"Function {function_name} passed risk assessment." f"Score: {data['score']}, Reason: {data['reason']}" ) ) return inner_func(*args, **kwargs) func.func = wrapper self.tools_map[func.get_function_name()] = func self.ignore_toolkit.add(func.get_function_name()) return self
[docs] def reset(self) -> "LLMGuardRuntime": r"""Resets the runtime to its initial state.""" self.ignore_toolkit.ignored_risks = dict() self.agent.reset() return self