HybridBrowserToolkit

class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
A hybrid browser toolkit that combines non-visual, DOM-based browser automation with visual, screenshot-based capabilities. This toolkit exposes a set of actions as CAMEL FunctionTools for agents to interact with web pages. It can operate in headless mode and supports both programmatic control of browser actions (like clicking and typing) and visual analysis of the page layout through screenshots with marked interactive elements.

init

def __init__(self):
Initialize the HybridBrowserToolkit. Parameters:
  • headless (bool): Whether to run the browser in headless mode. Defaults to True.
  • user_data_dir (Optional[str]): Path to a directory for storing browser data like cookies and local storage. Useful for maintaining sessions across runs. Defaults to None (a temporary directory is used).
  • stealth (bool): Whether to run the browser in stealth mode to avoid bot detection. When enabled, hides WebDriver characteristics, spoofs navigator properties, and implements various anti-detection measures. Highly recommended for production use and when accessing sites with bot detection. Defaults to False.
  • web_agent_model (Optional[BaseModelBackend]): The language model backend to use for the high-level solve_task agent. This is required only if you plan to use solve_task. Defaults to None.
  • cache_dir (str): The directory to store cached files, such as screenshots. Defaults to "tmp/".
  • enabled_tools (Optional[List[str]]): List of tool names to enable. If None, uses DEFAULT_TOOLS. Available tools: browser_open, browser_close, browser_visit_page, browser_back, browser_forward, browser_get_page_snapshot, browser_get_som_screenshot, browser_get_page_links, browser_click, browser_type, browser_select, browser_scroll, browser_enter, browser_wait_user, browser_solve_task. Defaults to None.
  • browser_log_to_file (bool): Whether to save detailed browser action logs to file. When enabled, logs action inputs/outputs, execution times, and page loading times. Logs are saved to an auto-generated timestamped file. Defaults to False.
  • session_id (Optional[str]): A unique identifier for this browser session. When multiple HybridBrowserToolkit instances are used concurrently, different session IDs prevent them from sharing the same browser session and causing conflicts. If None, a default session will be used. Defaults to None.
  • default_start_url (str): The default URL to navigate to when open_browser() is called without a start_url parameter or with None. Defaults to "https://google.com/".
  • default_timeout (Optional[int]): Default timeout in milliseconds for browser actions. If None, uses environment variable HYBRID_BROWSER_DEFAULT_TIMEOUT or defaults to 3000ms. Defaults to None.
  • short_timeout (Optional[int]): Short timeout in milliseconds for quick browser actions. If None, uses environment variable HYBRID_BROWSER_SHORT_TIMEOUT or defaults to 1000ms. Defaults to None.
  • navigation_timeout (Optional[int]): Custom navigation timeout in milliseconds. If None, uses environment variable HYBRID_BROWSER_NAVIGATION_TIMEOUT or defaults to 10000ms. Defaults to None.
  • network_idle_timeout (Optional[int]): Custom network idle timeout in milliseconds. If None, uses environment variable HYBRID_BROWSER_NETWORK_IDLE_TIMEOUT or defaults to 5000ms. Defaults to None.
  • screenshot_timeout (Optional[int]): Custom screenshot timeout in milliseconds. If None, uses environment variable HYBRID_BROWSER_SCREENSHOT_TIMEOUT or defaults to 15000ms. Defaults to None.
  • page_stability_timeout (Optional[int]): Custom page stability timeout in milliseconds. If None, uses environment variable HYBRID_BROWSER_PAGE_STABILITY_TIMEOUT or defaults to 1500ms. Defaults to None.
  • dom_content_loaded_timeout (Optional[int]): Custom DOM content loaded timeout in milliseconds. If None, uses environment variable HYBRID_BROWSER_DOM_CONTENT_LOADED_TIMEOUT or defaults to 5000ms. Defaults to None.

web_agent_model

def web_agent_model(self):
Get the web agent model.

web_agent_model

def web_agent_model(self, value: Optional[BaseModelBackend]):
Set the web agent model.

cache_dir

def cache_dir(self):
Get the cache directory.

del

def __del__(self):
Cleanup browser resources on garbage collection.

_load_unified_analyzer

def _load_unified_analyzer(self):
Load the unified analyzer JavaScript script.

_validate_ref

def _validate_ref(self, ref: str, method_name: str):
Validate ref parameter.

_truncate_if_needed

def _truncate_if_needed(self, content: Any):
Truncate content if max_log_length is set.

action_logger

def action_logger(func: Callable[..., Any]):
Decorator to add logging to action methods.

_convert_analysis_to_rects

def _convert_analysis_to_rects(self, analysis_data: Dict[str, Any]):
Convert analysis data to rect format for visual marking.

_add_set_of_mark

def _add_set_of_mark(self, image, rects):
Add visual marks to the image.

_format_snapshot_from_analysis

def _format_snapshot_from_analysis(self, analysis_data: Dict[str, Any]):
Format analysis data into snapshot string.

_ensure_agent

def _ensure_agent(self):
Create PlaywrightLLMAgent on first use.

get_log_summary

def get_log_summary(self):
Get a summary of logged actions.

clear_logs

def clear_logs(self):
Clear the log buffer.

clone_for_new_session

def clone_for_new_session(self, new_session_id: Optional[str] = None):
Create a new instance of HybridBrowserToolkit with a unique session. Parameters:
  • new_session_id: Optional new session ID. If None, a UUID will be generated.
Returns: A new HybridBrowserToolkit instance with the same configuration but a different session.

get_tools

def get_tools(self):
Get available function tools based on enabled_tools configuration.