Source code for camel.loaders.scrapegraph_reader
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
import os
from typing import Any, Dict, Optional
[docs]
class ScrapeGraphAI:
r"""ScrapeGraphAI allows you to perform AI-powered web scraping and
searching.
Args:
api_key (Optional[str]): API key for authenticating with the
ScrapeGraphAI API.
References:
https://scrapegraph.ai/
"""
def __init__(
self,
api_key: Optional[str] = None,
) -> None:
from scrapegraph_py import Client
from scrapegraph_py.logger import sgai_logger
self._api_key = api_key or os.environ.get("SCRAPEGRAPH_API_KEY")
sgai_logger.set_logging(level="INFO")
self.client = Client(api_key=self._api_key)
[docs]
def search(
self,
user_prompt: str,
) -> Dict[str, Any]:
r"""Perform an AI-powered web search using ScrapeGraphAI.
Args:
user_prompt (str): The search query or instructions.
Returns:
Dict[str, Any]: The search results including answer and reference
URLs.
Raises:
RuntimeError: If the search process fails.
"""
try:
response = self.client.searchscraper(user_prompt=user_prompt)
return response
except Exception as e:
raise RuntimeError(f"Failed to perform search: {e}")
[docs]
def scrape(
self,
website_url: str,
user_prompt: str,
website_html: Optional[str] = None,
) -> Dict[str, Any]:
r"""Perform AI-powered web scraping using ScrapeGraphAI.
Args:
website_url (str): The URL to scrape.
user_prompt (str): Instructions for what data to extract.
website_html (Optional[str]): Optional HTML content to use instead
of fetching from the URL.
Returns:
Dict[str, Any]: The scraped data including request ID and result.
Raises:
RuntimeError: If the scrape process fails.
"""
try:
response = self.client.smartscraper(
website_url=website_url,
user_prompt=user_prompt,
website_html=website_html,
)
return response
except Exception as e:
raise RuntimeError(f"Failed to perform scrape: {e}")
[docs]
def close(self) -> None:
r"""Close the ScrapeGraphAI client connection."""
self.client.close()