Source code for camel.toolkits.search_toolkit

# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
import os
from typing import Any, Dict, List

from camel.toolkits.base import BaseToolkit
from camel.toolkits.openai_function import OpenAIFunction


[docs] class SearchToolkit(BaseToolkit): r"""A class representing a toolkit for web search. This class provides methods for searching information on the web using search engines like Google, DuckDuckGo, Wikipedia and Wolfram Alpha. """
[docs] def search_wiki(self, entity: str) -> str: r"""Search the entity in WikiPedia and return the summary of the required page, containing factual information about the given entity. Args: entity (str): The entity to be searched. Returns: str: The search result. If the page corresponding to the entity exists, return the summary of this entity in a string. """ try: import wikipedia except ImportError: raise ImportError( "Please install `wikipedia` first. You can install it " "by running `pip install wikipedia`." ) result: str try: result = wikipedia.summary(entity, sentences=5, auto_suggest=False) except wikipedia.exceptions.DisambiguationError as e: result = wikipedia.summary( e.options[0], sentences=5, auto_suggest=False ) except wikipedia.exceptions.PageError: result = ( "There is no page in Wikipedia corresponding to entity " f"{entity}, please specify another word to describe the" " entity to be searched." ) except wikipedia.exceptions.WikipediaException as e: result = f"An exception occurred during the search: {e}" return result
[docs] def search_duckduckgo( self, query: str, source: str = "text", max_results: int = 5 ) -> List[Dict[str, Any]]: r"""Use DuckDuckGo search engine to search information for the given query. This function queries the DuckDuckGo API for related topics to the given search term. The results are formatted into a list of dictionaries, each representing a search result. Args: query (str): The query to be searched. source (str): The type of information to query (e.g., "text", "images", "videos"). Defaults to "text". max_results (int): Max number of results, defaults to `5`. Returns: List[Dict[str, Any]]: A list of dictionaries where each dictionary represents a search result. """ from duckduckgo_search import DDGS from requests.exceptions import RequestException ddgs = DDGS() responses: List[Dict[str, Any]] = [] if source == "text": try: results = ddgs.text(keywords=query, max_results=max_results) except RequestException as e: # Handle specific exceptions or general request exceptions responses.append({"error": f"duckduckgo search failed.{e}"}) # Iterate over results found for i, result in enumerate(results, start=1): # Creating a response object with a similar structure response = { "result_id": i, "title": result["title"], "description": result["body"], "url": result["href"], } responses.append(response) elif source == "images": try: results = ddgs.images(keywords=query, max_results=max_results) except RequestException as e: # Handle specific exceptions or general request exceptions responses.append({"error": f"duckduckgo search failed.{e}"}) # Iterate over results found for i, result in enumerate(results, start=1): # Creating a response object with a similar structure response = { "result_id": i, "title": result["title"], "image": result["image"], "url": result["url"], "source": result["source"], } responses.append(response) elif source == "videos": try: results = ddgs.videos(keywords=query, max_results=max_results) except RequestException as e: # Handle specific exceptions or general request exceptions responses.append({"error": f"duckduckgo search failed.{e}"}) # Iterate over results found for i, result in enumerate(results, start=1): # Creating a response object with a similar structure response = { "result_id": i, "title": result["title"], "description": result["description"], "embed_url": result["embed_url"], "publisher": result["publisher"], "duration": result["duration"], "published": result["published"], } responses.append(response) # If no answer found, return an empty list return responses
[docs] def search_google( self, query: str, num_result_pages: int = 5 ) -> List[Dict[str, Any]]: r"""Use Google search engine to search information for the given query. Args: query (str): The query to be searched. num_result_pages (int): The number of result pages to retrieve. Returns: List[Dict[str, Any]]: A list of dictionaries where each dictionary represents a website. Each dictionary contains the following keys: - 'result_id': A number in order. - 'title': The title of the website. - 'description': A brief description of the website. - 'long_description': More detail of the website. - 'url': The URL of the website. Example: { 'result_id': 1, 'title': 'OpenAI', 'description': 'An organization focused on ensuring that artificial general intelligence benefits all of humanity.', 'long_description': 'OpenAI is a non-profit artificial intelligence research company. Our goal is to advance digital intelligence in the way that is most likely to benefit humanity as a whole', 'url': 'https://www.openai.com' } title, description, url of a website. """ import requests # https://developers.google.com/custom-search/v1/overview GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") # https://cse.google.com/cse/all SEARCH_ENGINE_ID = os.getenv("SEARCH_ENGINE_ID") # Using the first page start_page_idx = 1 # Different language may get different result search_language = "en" # How many pages to return num_result_pages = num_result_pages # Constructing the URL # Doc: https://developers.google.com/custom-search/v1/using_rest url = ( f"https://www.googleapis.com/customsearch/v1?" f"key={GOOGLE_API_KEY}&cx={SEARCH_ENGINE_ID}&q={query}&start=" f"{start_page_idx}&lr={search_language}&num={num_result_pages}" ) responses = [] # Fetch the results given the URL try: # Make the get result = requests.get(url) data = result.json() # Get the result items if "items" in data: search_items = data.get("items") # Iterate over 10 results found for i, search_item in enumerate(search_items, start=1): if ( "og:description" in search_item["pagemap"]["metatags"][0] ): long_description = search_item["pagemap"]["metatags"][ 0 ]["og:description"] else: long_description = "N/A" # Get the page title title = search_item.get("title") # Page snippet snippet = search_item.get("snippet") # Extract the page url link = search_item.get("link") response = { "result_id": i, "title": title, "description": snippet, "long_description": long_description, "url": link, } responses.append(response) else: responses.append({"error": "google search failed."}) except requests.RequestException: # Handle specific exceptions or general request exceptions responses.append({"error": "google search failed."}) # If no answer found, return an empty list return responses
[docs] def query_wolfram_alpha(self, query: str, is_detailed: bool) -> str: r"""Queries Wolfram|Alpha and returns the result. Wolfram|Alpha is an answer engine developed by Wolfram Research. It is offered as an online service that answers factual queries by computing answers from externally sourced data. Args: query (str): The query to send to Wolfram Alpha. is_detailed (bool): Whether to include additional details in the result. Returns: str: The result from Wolfram Alpha, formatted as a string. """ try: import wolframalpha except ImportError: raise ImportError( "Please install `wolframalpha` first. You can install it by" " running `pip install wolframalpha`." ) WOLFRAMALPHA_APP_ID = os.environ.get('WOLFRAMALPHA_APP_ID') if not WOLFRAMALPHA_APP_ID: raise ValueError( "`WOLFRAMALPHA_APP_ID` not found in environment " "variables. Get `WOLFRAMALPHA_APP_ID` here: " "`https://products.wolframalpha.com/api/`." ) try: client = wolframalpha.Client(WOLFRAMALPHA_APP_ID) res = client.query(query) assumption = next(res.pods).text or "No assumption made." answer = next(res.results).text or "No answer found." except Exception as e: if isinstance(e, StopIteration): return "Wolfram Alpha wasn't able to answer it" else: error_message = ( f"Wolfram Alpha wasn't able to answer it" f"{e!s}." ) return error_message result = f"Assumption:\n{assumption}\n\nAnswer:\n{answer}" # Add additional details in the result if is_detailed: result += '\n' for pod in res.pods: result += '\n' + pod['@title'] + ':\n' for sub in pod.subpods: result += (sub.plaintext or "None") + '\n' return result.rstrip() # Remove trailing whitespace
[docs] def get_tools(self) -> List[OpenAIFunction]: r"""Returns a list of OpenAIFunction objects representing the functions in the toolkit. Returns: List[OpenAIFunction]: A list of OpenAIFunction objects representing the functions in the toolkit. """ return [ OpenAIFunction(self.search_wiki), OpenAIFunction(self.search_google), OpenAIFunction(self.search_duckduckgo), OpenAIFunction(self.query_wolfram_alpha), ]
SEARCH_FUNCS: List[OpenAIFunction] = SearchToolkit().get_tools()