Source code for camel.toolkits.searxng_toolkit
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
from typing import ClassVar, Dict, List, Optional, Union
from urllib.parse import urlparse
import requests
from camel.logger import get_logger
from camel.toolkits.base import BaseToolkit
from camel.toolkits.function_tool import FunctionTool
from camel.utils import MCPServer
logger = get_logger(__name__)
[docs]
@MCPServer()
class SearxNGToolkit(BaseToolkit):
r"""A toolkit for performing web searches using SearxNG search engine.
This toolkit provides methods to search the web using SearxNG,
a privacy-respecting metasearch engine. It supports customizable
search parameters and safe search levels.
Args:
searxng_host (str): The URL of the SearxNG instance to use for
searches. Must be a valid HTTP/HTTPS URL.
language (str, optional): Search language code for results.
(default: :obj:`"en"`)
categories (List[str], optional): List of search categories to use.
(default: :obj:`None`)
time_range (str, optional): Time range filter for search results.Valid
values are "day", "week", "month", "year". (default: :obj:`None`)
safe_search (int, optional): Safe search level (0: None, 1: Moderate,
2: Strict). (default: :obj:`1`)
timeout (Optional[float]): The timeout value for API requests
in seconds. If None, no timeout is applied.
(default: :obj:`None`)
Raises:
ValueError: If searxng_host is not a valid HTTP/HTTPS URL.
ValueError: If safe_search is not in the valid range [0, 2].
ValueError: If time_range is provided but not in valid options.
"""
# Constants for validation
_SAFE_SEARCH_LEVELS: ClassVar[Dict[int, str]] = {
0: "Disabled",
1: "Moderate",
2: "Strict",
}
_VALID_TIME_RANGES: ClassVar[List[str]] = ["day", "week", "month", "year"]
_DEFAULT_CATEGORY: ClassVar[str] = "general"
def __init__(
self,
searxng_host: str,
language: str = "en",
categories: Optional[List[str]] = None,
time_range: Optional[str] = None,
safe_search: int = 1,
timeout: Optional[float] = None,
) -> None:
super().__init__(timeout=timeout)
self._validate_searxng_host(searxng_host)
self._validate_safe_search(safe_search)
if time_range is not None:
self._validate_time_range(time_range)
self.searxng_host = searxng_host.rstrip('/')
self.language = language
self.categories = categories or [self._DEFAULT_CATEGORY]
self.time_range = time_range
self.safe_search = safe_search
logger.info(
f"Initialized SearxNG toolkit with host: {searxng_host}, "
f"safe_search: {self._SAFE_SEARCH_LEVELS[safe_search]}"
)
def _validate_searxng_host(self, url: str) -> None:
r"""Validate if the given URL is a proper HTTP/HTTPS URL.
Args:
url (str): The URL to validate.
Raises:
ValueError: If the URL is not valid.
"""
try:
result = urlparse(url)
is_valid = all(
[
result.scheme in ('http', 'https'),
result.netloc,
]
)
if not is_valid:
raise ValueError
except Exception:
raise ValueError(
"Invalid searxng_host URL. Must be a valid HTTP/HTTPS URL."
)
def _validate_safe_search(self, level: int) -> None:
r"""Validate if the safe search level is valid.
Args:
level (int): The safe search level to validate.
Raises:
ValueError: If the safe search level is not valid.
"""
if level not in self._SAFE_SEARCH_LEVELS:
raise ValueError(
f"Invalid safe_search level: {level}. Must be one of: "
f"{list(self._SAFE_SEARCH_LEVELS.keys())}"
)
def _validate_time_range(self, time_range: str) -> None:
r"""Validate if the time range is valid.
Args:
time_range (str): The time range to validate.
Raises:
ValueError: If the time range is not valid.
"""
if time_range not in self._VALID_TIME_RANGES:
raise ValueError(
f"Invalid time_range: {time_range}. Must be one of: "
f"{self._VALID_TIME_RANGES}"
)
[docs]
def search(
self,
query: str,
num_results: int = 10,
category: Optional[str] = None,
) -> List[Dict[str, str]]:
r"""Perform a web search using the configured SearxNG instance.
Args:
query (str): The search query string to execute.
num_results (int, optional): Maximum number of results to return.
(default: :obj:`10`)
category (str, optional): Specific search category to use. If not
provided, uses the first category from self.categories.
(default: :obj:`None`)
Returns:
List[Dict[str, str]]: List of search results, where each result is
dictionary containing 'title', 'link', and 'snippet' keys.
"""
params: Dict[str, Union[str, int]] = {
"q": query,
"format": "json",
"language": self.language,
"categories": category or self.categories[0],
"pageno": 1,
"safe": self.safe_search,
}
if self.time_range:
params["time_range"] = self.time_range
try:
logger.debug(f"Sending search request with query: {query}")
response = requests.get(
f"{self.searxng_host}/search",
params=params, # type: ignore[arg-type]
headers={"User-Agent": "camel-ai/searxng-toolkit"},
)
response.raise_for_status()
results = response.json().get("results", [])
formatted_results = []
for result in results[:num_results]:
formatted_results.append(
{
"title": result.get("title", ""),
"link": result.get("url", ""),
"snippet": result.get("content", ""),
}
)
logger.debug(f"Retrieved {len(formatted_results)} results")
return formatted_results
except Exception as error:
logger.error(f"Search failed: {error!s}")
return []
[docs]
def get_tools(self) -> List[FunctionTool]:
r"""Get the list of available tools in the toolkit.
Returns:
List[FunctionTool]: A list of FunctionTool objects representing the
available functions in the toolkit.
"""
return [
FunctionTool(self.search),
]