Camel.toolkits.browser toolkit - CAMEL-AI Documentation

_get_str

def _get_str(d: Any, k: str):

Safely retrieve a string value from a dictionary.

_get_number

def _get_number(d: Any, k: str):

Safely retrieve a number (int or float) from a dictionary

_get_bool

def _get_bool(d: Any, k: str):

Safely retrieve a boolean value from a dictionary.

BaseBrowser

class BaseBrowser:

init

def __init__(
    self,
    headless = True,
    cache_dir: Optional[str] = None,
    channel: Literal['chrome', 'msedge', 'chromium'] = 'chromium',
    cookie_json_path: Optional[str] = None
):

Initialize the WebBrowser instance.

Parameters:

headless (bool): Whether to run the browser in headless mode.
cache_dir (Union[str, None]): The directory to store cache files.
channel (Literal["chrome", "msedge", "chromium"]): The browser channel to use. Must be one of “chrome”, “msedge”, or “chromium”.
cookie_json_path (Optional[str]): Path to a JSON file containing authentication cookies and browser storage state. If provided and the file exists, the browser will load this state to maintain authenticated sessions without requiring manual login.

Returns:

None

init

def init(self):

Initialize the browser.

clean_cache

def clean_cache(self):

Delete the cache directory and its contents.

_wait_for_load

def _wait_for_load(self, timeout: int = 20):

Wait for a certain amount of time for the page to load.

click_blank_area

def click_blank_area(self):

Click a blank area of the page to unfocus the current element.

visit_page

def visit_page(self, url: str):

Visit a page with the given URL.

ask_question_about_video

def ask_question_about_video(self, question: str):

Ask a question about the video on the current page, such as YouTube video.

Parameters:

question (str): The question to ask.

Returns:

str: The answer to the question.

get_screenshot

def get_screenshot(self, save_image: bool = False):

Get a screenshot of the current page.

Parameters:

save_image (bool): Whether to save the image to the cache directory.

Returns:

Tuple[Image.Image, str]: A tuple containing the screenshot image and the path to the image file if saved, otherwise :obj:None.

capture_full_page_screenshots

def capture_full_page_screenshots(self, scroll_ratio: float = 0.8):

Capture full page screenshots by scrolling the page with a buffer zone.

Parameters:

scroll_ratio (float): The ratio of viewport height to scroll each step. (default: :obj:0.8)

Returns:

List[str]: A list of paths to the screenshot files.

get_visual_viewport

def get_visual_viewport(self):

Returns:

VisualViewport: The visual viewport of the current page.

get_interactive_elements

def get_interactive_elements(self):

Returns:

Dict[str, InteractiveRegion]: A dictionary of interactive elements.

get_som_screenshot

def get_som_screenshot(self, save_image: bool = False):

Get a screenshot of the current viewport with interactive elements marked.

Parameters:

save_image (bool): Whether to save the image to the cache directory.

Returns:

Tuple[Image.Image, Union[str, None]]: A tuple containing the screenshot image and an optional path to the image file if saved, otherwise :obj:None.

scroll_up

def scroll_up(self):

Scroll up the page.

scroll_down

def scroll_down(self):

Scroll down the page.

get_url

def get_url(self):

Get the URL of the current page.

click_id

def click_id(self, identifier: Union[str, int]):

Click an element with the given identifier.

extract_url_content

def extract_url_content(self):

Extract the content of the current page.

download_file_id

def download_file_id(self, identifier: Union[str, int]):

Download a file with the given selector.

Parameters:

identifier (str): The identifier of the file to download.

Returns:

str: The result of the action.

fill_input_id

def fill_input_id(self, identifier: Union[str, int], text: str):

Fill an input field with the given text, and then press Enter.

Parameters:

identifier (str): The identifier of the input field.
text (str): The text to fill.

Returns:

str: The result of the action.

scroll_to_bottom

def scroll_to_bottom(self):

scroll_to_top

def scroll_to_top(self):

hover_id

def hover_id(self, identifier: Union[str, int]):

Hover over an element with the given identifier.

Parameters:

identifier (str): The identifier of the element to hover over.

Returns:

str: The result of the action.

find_text_on_page

def find_text_on_page(self, search_text: str):

Find the next given text on the page, and scroll the page to the targeted text. It is equivalent to pressing Ctrl + F and searching for the text.

back

def back(self):

Navigate back to the previous page.

close

def close(self):

show_interactive_elements

def show_interactive_elements(self):

Show simple interactive elements on the current page.

get_webpage_content

def get_webpage_content(self):

_ensure_browser_installed

def _ensure_browser_installed(self):

Ensure the browser is installed.

BrowserToolkit

class BrowserToolkit(BaseToolkit):

A class for browsing the web and interacting with web pages.

This class provides methods for browsing the web and interacting with web pages.

init

def __init__(
    self,
    headless: bool = False,
    cache_dir: Optional[str] = None,
    channel: Literal['chrome', 'msedge', 'chromium'] = 'chromium',
    history_window: int = 5,
    web_agent_model: Optional[BaseModelBackend] = None,
    planning_agent_model: Optional[BaseModelBackend] = None,
    output_language: str = 'en',
    cookie_json_path: Optional[str] = None
):

Initialize the BrowserToolkit instance.

Parameters:

headless (bool): Whether to run the browser in headless mode.
cache_dir (Union[str, None]): The directory to store cache files.
channel (Literal["chrome", "msedge", "chromium"]): The browser channel to use. Must be one of “chrome”, “msedge”, or “chromium”.
history_window (int): The window size for storing the history of actions.
web_agent_model (Optional[BaseModelBackend]): The model backend for the web agent.
planning_agent_model (Optional[BaseModelBackend]): The model backend for the planning agent.
output_language (str): The language to use for output. (default: :obj:"en”)
cookie_json_path (Optional[str]): Path to a JSON file containing authentication cookies and browser storage state. If provided and the file exists, the browser will load this state to maintain authenticated sessions without requiring manual login. (default: :obj:None)

_reset

def _reset(self):

_initialize_agent

def _initialize_agent(
    self,
    web_agent_model_backend: Optional[BaseModelBackend],
    planning_agent_model_backend: Optional[BaseModelBackend]
):

Initialize the agent.

_observe

def _observe(self, task_prompt: str, detailed_plan: Optional[str] = None):

Let agent observe the current environment, and get the next action.

_act

def _act(self, action_code: str):

Let agent act based on the given action code.

Parameters:

action_code (str): The action code to act.

Returns:

Tuple[bool, str]: A tuple containing a boolean indicating whether the action was successful, and the information to be returned.

_get_final_answer

def _get_final_answer(self, task_prompt: str):

Get the final answer based on the task prompt and current browser state. It is used when the agent thinks that the task can be completed without any further action, and answer can be directly found in the current viewport.

_task_planning

def _task_planning(self, task_prompt: str, start_url: str):

Plan the task based on the given task prompt.

_task_replanning

def _task_replanning(self, task_prompt: str, detailed_plan: str):

Replan the task based on the given task prompt.

Parameters:

task_prompt (str): The original task prompt.
detailed_plan (str): The detailed plan to replan.

Returns:

Tuple[bool, str]: A tuple containing a boolean indicating whether the task needs to be replanned, and the replanned schema.

browse_url

def browse_url(
    self,
    task_prompt: str,
    start_url: str,
    round_limit: int = 12
):

A powerful toolkit which can simulate the browser interaction to solve the task which needs multi-step actions.

Parameters:

task_prompt (str): The task prompt to solve.
start_url (str): The start URL to visit.
round_limit (int): The round limit to solve the task. (default: :obj:12).

Returns:

str: The simulation result to the task.

get_tools

def get_tools(self):

Camel.toolkits.bohrium toolkit Camel.toolkits.browser toolkit commons

On this page

_get_str
_get_number
_get_bool
BaseBrowser
init
init
clean_cache
_wait_for_load
click_blank_area
visit_page
ask_question_about_video
get_screenshot
capture_full_page_screenshots
get_visual_viewport
get_interactive_elements
get_som_screenshot
scroll_up
scroll_down
get_url
click_id
extract_url_content
download_file_id
fill_input_id
scroll_to_bottom
scroll_to_top
hover_id
find_text_on_page
back
close
show_interactive_elements
get_webpage_content
_ensure_browser_installed
BrowserToolkit
init
_reset
_initialize_agent
_observe
_act
_get_final_answer
_task_planning
_task_replanning
browse_url
get_tools

Overview

Agents

Configs

Data Generation

Datasets

Embeddings

Models

Interpreters

Memory

Messages

Prompts

Responses

Retrievers

Societies

Storage

Tasks

Terminators

Toolkits

Types

Verifiers

Bots

Runtime

Utilities

Environments

Extractors

Personas

Benchmarks

Data Collector

Datahubs

Loaders

Schemas

​_get_str

​_get_number

​_get_bool

​BaseBrowser

​init

​init

​clean_cache

​_wait_for_load

​click_blank_area

​visit_page

​ask_question_about_video

​get_screenshot

​capture_full_page_screenshots

​get_visual_viewport

​get_interactive_elements

​get_som_screenshot

​scroll_up

​scroll_down

​get_url

​click_id

​extract_url_content

​download_file_id

​fill_input_id

​scroll_to_bottom

​scroll_to_top

​hover_id

​find_text_on_page

​back

​close

​show_interactive_elements

​get_webpage_content

​_ensure_browser_installed

​BrowserToolkit

​init

​_reset

​_initialize_agent

​_observe

​_act

​_get_final_answer

​_task_planning

​_task_replanning

​browse_url

​get_tools

_get_str

_get_number

_get_bool

BaseBrowser

init

init

clean_cache

_wait_for_load

click_blank_area

visit_page

ask_question_about_video

get_screenshot

capture_full_page_screenshots

get_visual_viewport

get_interactive_elements

get_som_screenshot

scroll_up

scroll_down

get_url

click_id

extract_url_content

download_file_id

fill_input_id

scroll_to_bottom

scroll_to_top

hover_id

find_text_on_page

back

close

show_interactive_elements

get_webpage_content

_ensure_browser_installed

BrowserToolkit

init

_reset

_initialize_agent

_observe

_act

_get_final_answer

_task_planning

_task_replanning

browse_url

get_tools