# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
import json
import logging
from functools import wraps
from typing import List, Optional, Union
from camel.agents import ChatAgent
from camel.configs import ChatGPTConfig
from camel.models import BaseModelBackend, ModelFactory
from camel.runtime import BaseRuntime
from camel.runtime.utils import FunctionRiskToolkit, IgnoreRiskToolkit
from camel.toolkits import FunctionTool
from camel.types import ModelPlatformType, ModelType
logger = logging.getLogger(__name__)
GUARDPROMPT = """You are a function safety evaluator tasked with assessing the
potential risk level of a given function based on both its description
and parameters. Your goal is to determine if the function may pose any
harm to the user's environment, such as deleting files, executing
arbitrary code, or accessing sensitive information. Pay special attention
to the provided parameters — even if a function has the potential to be
dangerous, its actual parameters may indicate harmless behavior, and the
risk level should be adjusted accordingly. Use the `function_risk` tool to
assign a risk score to the function, based on the following criteria:
- **Score 1**: No harm. This includes simple operations like mathematical
calculations, content searches, or data retrievals that do not impact
the user's environment or access sensitive resources. This also
includes functions with potentially dangerous capabilities that have
harmless, controlled parameters that ensure safe execution.
- **Score 2**: Minimal harm. The function might read user files, retrieve
non-sensitive data, or access other low-risk resources,
posing little risk to the user.
- **Score 3**: Risk present. The function might delete files, modify the
file system, execute arbitrary code, or access sensitive data, which
could negatively impact the user's environment. However, if the
actual parameters provided clearly indicate safe and restricted
usage, this risk should be downgraded accordingly.
When evaluating, always consider both the function's description and its
specific parameters. If the function appears risky due to its design but
the provided parameters indicate a safe and non-impactful operation,
adjust the risk score to reflect this. Assign an appropriate risk score
and provide a brief explanation of your reasoning based on the function's
description and the actual parameters given.
YOU MUST USE THE `function_risk` TOOL TO ASSESS THE RISK
LEVEL OF EACH FUNCTION.
"""
[docs]
class LLMGuardRuntime(BaseRuntime):
r"""A runtime that evaluates the risk level of functions using
a language model.
Arguments:
prompt (str): The prompt to use for the language model. (default:
:obj:`GUARDPROMPT`)
model (BaseModelBackend): The language model to use. (default: :obj:
`None`)
verbose (bool): Whether to print verbose output. (default: :obj:
`False`)
"""
def __init__(
self,
prompt: str = GUARDPROMPT,
model: Optional[BaseModelBackend] = None,
verbose: bool = False,
):
super().__init__()
self.prompt = prompt
self.model = model
self.verbose = verbose
if not self.model:
self.model = ModelFactory.create(
model_platform=ModelPlatformType.DEFAULT,
model_type=ModelType.DEFAULT,
model_config_dict=ChatGPTConfig().as_dict(),
)
self.ignore_toolkit = IgnoreRiskToolkit(verbose=verbose)
self.ignore_tool = self.ignore_toolkit.get_tools()[0]
self.tools_map[self.ignore_tool.get_function_name()] = self.ignore_tool
self.agent = ChatAgent(
system_message=self.prompt,
model=self.model,
external_tools=[
*FunctionRiskToolkit(verbose=verbose).get_tools(),
],
)
[docs]
def add( # type: ignore[override]
self,
funcs: Union[FunctionTool, List[FunctionTool]],
threshold: int = 2,
) -> "LLMGuardRuntime":
r"""Add a function or list of functions to the runtime.
Args:
funcs (FunctionTool or List[FunctionTool]): The function or
list of functions to add.
threshold (int): The risk threshold for functions.
(default: :obj:`2`)
Returns:
LLMGuardRuntime: The current runtime.
"""
if not isinstance(funcs, list):
funcs = [funcs]
for func in funcs:
inner_func = func.func
# Create a wrapper that explicitly binds `func`
@wraps(inner_func)
def wrapper(
*args,
func=func,
inner_func=inner_func,
threshold=threshold,
**kwargs,
):
function_name = func.get_function_name()
if function_name in self.ignore_toolkit.ignored_risks:
reason = self.ignore_toolkit.ignored_risks.pop(
function_name
)
logger.info(
f"Ignored risk for function {function_name}: {reason}"
)
return inner_func(*args, **kwargs)
self.agent.init_messages()
resp = self.agent.step(
f"""
Function is: {function_name}
Function description: {func.get_function_description()}
Args: {args}
Kwargs: {kwargs}
"""
)
tool_call = resp.info.get("external_tool_request", None)
if not tool_call:
logger.error("No tool call found in response.")
return {
"error": "Risk assessment failed. Disabling function."
}
data = tool_call.function.arguments
data = json.loads(data)
if threshold < data["score"]:
message = (
f"Risk assessment not passed for {function_name}."
f"Score: {data['score']} > Threshold: {threshold}"
f"\nReason: {data['reason']}"
)
logger.warning(message)
return {"error": message}
logger.info(
(
f"Function {function_name} passed risk assessment."
f"Score: {data['score']}, Reason: {data['reason']}"
)
)
if self.verbose:
print(
(
f"Function {function_name} passed risk assessment."
f"Score: {data['score']}, Reason: {data['reason']}"
)
)
return inner_func(*args, **kwargs)
func.func = wrapper
self.tools_map[func.get_function_name()] = func
self.ignore_toolkit.add(func.get_function_name())
return self
[docs]
def reset(self) -> "LLMGuardRuntime":
r"""Resets the runtime to its initial state."""
self.ignore_toolkit.ignored_risks = dict()
self.agent.reset()
return self