Source code for camel.toolkits.pyautogui_toolkit

# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========

import os
import time
from typing import List, Literal, Optional, Tuple, Union

from camel.logger import get_logger
from camel.toolkits import BaseToolkit, FunctionTool
from camel.utils import MCPServer, dependencies_required

# Set up logging
logger = get_logger(__name__)

DURATION = 0.1


[docs] @MCPServer() class PyAutoGUIToolkit(BaseToolkit): r"""A toolkit for automating GUI interactions using PyAutoGUI.""" @dependencies_required('pyautogui') def __init__( self, timeout: Optional[float] = None, screenshots_dir: str = "tmp", ): r"""Initializes the PyAutoGUIToolkit with optional timeout. Args: timeout (Optional[float]): Timeout for API requests in seconds. (default: :obj:`None`) screenshots_dir (str): Directory to save screenshots. (default: :obj:`"tmp"`) """ import pyautogui super().__init__(timeout=timeout) # Configure PyAutoGUI for safety self.pyautogui = pyautogui self.pyautogui.FAILSAFE = True # Move mouse to upper-left to abort # Get screen size for safety boundaries self.screen_width, self.screen_height = self.pyautogui.size() # Define safe boundaries (10% margin from edges) self.safe_margin = 0.1 self.safe_min_x = int(self.screen_width * self.safe_margin) self.safe_max_x = int(self.screen_width * (1 - self.safe_margin)) self.safe_min_y = int(self.screen_height * self.safe_margin) self.safe_max_y = int(self.screen_height * (1 - self.safe_margin)) self.screen_center = (self.screen_width // 2, self.screen_height // 2) self.screenshots_dir = os.path.expanduser(screenshots_dir) def _get_safe_coordinates(self, x: int, y: int) -> Tuple[int, int]: r"""Ensure coordinates are within safe boundaries to prevent triggering failsafe. Args: x (int): Original x-coordinate y (int): Original y-coordinate Returns: Tuple[int, int]: Safe coordinates """ # Clamp coordinates to safe boundaries safe_x = max(self.safe_min_x, min(x, self.safe_max_x)) safe_y = max(self.safe_min_y, min(y, self.safe_max_y)) if safe_x != x or safe_y != y: logger.info( f"Safety: Adjusted coordinates from ({x}, {y}) to " f"({safe_x}, {safe_y})" ) return safe_x, safe_y
[docs] def mouse_move(self, x: int, y: int) -> str: r"""Move mouse pointer to specified coordinates. Args: x (int): X-coordinate to move to. y (int): Y-coordinate to move to. Returns: str: Success or error message. """ try: # Apply safety boundaries safe_x, safe_y = self._get_safe_coordinates(x, y) self.pyautogui.moveTo(safe_x, safe_y, duration=DURATION) return f"Mouse moved to position ({safe_x}, {safe_y})" except Exception as e: logger.error(f"Error moving mouse: {e}") return f"Error: {e}"
[docs] def mouse_click( self, button: Literal["left", "middle", "right"] = "left", clicks: int = 1, x: Optional[int] = None, y: Optional[int] = None, ) -> str: r"""Performs a mouse click at the specified coordinates or current position. Args: button (Literal["left", "middle", "right"]): The mouse button to click. - "left": Typically used for selecting items, activating buttons, or placing the cursor. - "middle": Often used for opening links in a new tab or specific application functions. - "right": Usually opens a context menu providing options related to the clicked item or area. (default: :obj:`"left"`) clicks (int): The number of times to click the button. - 1: A single click, the most common action. - 2: A double-click, often used to open files/folders or select words. (default: :obj:`1`) x (Optional[int]): The x-coordinate on the screen to move the mouse to before clicking. If None, clicks at the current mouse position. (default: :obj:`None`) y (Optional[int]): The y-coordinate on the screen to move the mouse to before clicking. If None, clicks at the current mouse position. (default: :obj:`None`) Returns: str: A message indicating the action performed, e.g., "Clicked left button 1 time(s) at coordinates (100, 150)." or "Clicked right button 2 time(s) at current position." """ try: # Apply safety boundaries if coordinates are specified position_info = "at current position" if x is not None and y is not None: safe_x, safe_y = self._get_safe_coordinates(x, y) self.pyautogui.click( x=safe_x, y=safe_y, button=button, clicks=clicks ) position_info = f"at position ({safe_x}, {safe_y})" else: self.pyautogui.click(button=button, clicks=clicks) return f"Clicked {button} button {clicks} time(s) {position_info}" except Exception as e: logger.error(f"Error clicking mouse: {e}") return f"Error: {e}"
[docs] def get_mouse_position(self) -> str: r"""Get current mouse position. Returns: str: Current mouse X and Y coordinates. """ try: x, y = self.pyautogui.position() return f"Mouse position: ({x}, {y})" except Exception as e: logger.error(f"Error getting mouse position: {e}") return f"Error: {e}"
[docs] def take_screenshot(self) -> str: r"""Take a screenshot. Returns: str: Path to the saved screenshot or error message. """ try: # Create directory for screenshots if it doesn't exist os.makedirs(self.screenshots_dir, exist_ok=True) # Take screenshot screenshot = self.pyautogui.screenshot() # Save screenshot to file timestamp = int(time.time()) filename = f"screenshot_{timestamp}.png" filepath = os.path.join(self.screenshots_dir, filename) screenshot.save(filepath) return f"Screenshot saved to {filepath}" except Exception as e: logger.error(f"Error taking screenshot: {e}") return f"Error: {e}"
[docs] def mouse_drag( self, start_x: int, start_y: int, end_x: int, end_y: int, button: Literal["left", "middle", "right"] = "left", ) -> str: r"""Drag mouse from start position to end position. Args: start_x (int): Starting x-coordinate. start_y (int): Starting y-coordinate. end_x (int): Ending x-coordinate. end_y (int): Ending y-coordinate. button (Literal["left", "middle", "right"]): Mouse button to use ('left', 'middle', 'right'). (default: :obj:`'left'`) Returns: str: Success or error message. """ try: # Apply safety boundaries to both start and end positions safe_start_x, safe_start_y = self._get_safe_coordinates( start_x, start_y ) safe_end_x, safe_end_y = self._get_safe_coordinates(end_x, end_y) # Break operation into smaller steps for safety # First move to start position self.pyautogui.moveTo( safe_start_x, safe_start_y, duration=DURATION ) # Then perform drag self.pyautogui.dragTo( safe_end_x, safe_end_y, duration=DURATION, button=button ) # Finally, move to a safe position (screen center) afterwards self.pyautogui.moveTo( self.screen_center[0], self.screen_center[1], duration=DURATION, ) return ( f"Dragged from ({safe_start_x}, {safe_start_y}) " f"to ({safe_end_x}, {safe_end_y})" ) except Exception as e: logger.error(f"Error dragging mouse: {e}") # Try to move to safe position even after error try: self.pyautogui.moveTo( self.screen_center[0], self.screen_center[1], duration=DURATION, ) except Exception as recovery_error: logger.error( f"Failed to move to safe position: {recovery_error}" ) return f"Error: {e}"
[docs] def scroll( self, scroll_amount: int, x: Optional[int] = None, y: Optional[int] = None, ) -> str: r"""Scroll the mouse wheel. Args: scroll_amount (int): Amount to scroll. Positive values scroll up, negative values scroll down. x (Optional[int]): X-coordinate to scroll at. If None, uses current position. (default: :obj:`None`) y (Optional[int]): Y-coordinate to scroll at. If None, uses current position. (default: :obj:`None`) Returns: str: Success or error message. """ try: # Get current mouse position if coordinates are not specified if x is None or y is None: current_x, current_y = self.pyautogui.position() x = x if x is not None else current_x y = y if y is not None else current_y # Always apply safety boundaries safe_x, safe_y = self._get_safe_coordinates(x, y) self.pyautogui.scroll(scroll_amount, x=safe_x, y=safe_y) # Move mouse back to screen center for added safety self.pyautogui.moveTo(self.screen_center[0], self.screen_center[1]) logger.info( f"Safety: Moving mouse back to screen center " f"({self.screen_center[0]}, {self.screen_center[1]})" ) return ( f"Scrolled {scroll_amount} clicks at position " f"{safe_x}, {safe_y}" ) except Exception as e: logger.error(f"Error scrolling: {e}") return f"Error: {e}"
[docs] def keyboard_type(self, text: str, interval: float = 0.0) -> str: r"""Type text on the keyboard. Args: text (str): Text to type. interval (float): Seconds to wait between keypresses. (default: :obj:`0.0`) Returns: str: Success or error message. """ try: if not text: return "Error: Empty text provided" if len(text) > 1000: # Set a reasonable maximum length limit warn_msg = ( f"Warning: Very long text ({len(text)} characters) may " f"cause performance issues" ) logger.warning(warn_msg) # First, move mouse to a safe position to prevent potential issues self.pyautogui.moveTo( self.screen_center[0], self.screen_center[1], duration=DURATION ) self.pyautogui.write(text, interval=interval) return f"Typed text: {text[:20]}{'...' if len(text) > 20 else ''}" except Exception as e: logger.error(f"Error typing text: {e}") return f"Error: {e}"
[docs] def press_key(self, key: Union[str, List[str]]) -> str: r"""Press a key on the keyboard. Args: key (Union[str, List[str]]): The key to be pressed. Can also be a list of such strings. Valid key names include: - Basic characters: a-z, 0-9, and symbols like !, @, #, etc. - Special keys: enter, esc, space, tab, backspace, delete - Function keys: f1-f24 - Navigation: up, down, left, right, home, end, pageup, pagedown - Modifiers: shift, ctrl, alt, command, option, win - Media keys: volumeup, volumedown, volumemute, playpause Returns: str: Success or error message. """ if isinstance(key, str): key = [key] try: for k in key: # Length validation (most valid key names are short) if len(k) > 20: logger.warning( f"Warning: Key name '{k}' is too long " "(max 20 characters)" ) # Special character validation # (key names usually don't contain special characters) import re if re.search(r'[^\w+\-_]', k) and len(k) > 1: logger.warning( f"Warning: Key '{k}' contains unusual characters" ) # First, move mouse to a safe position to prevent potential issues self.pyautogui.moveTo( self.screen_center[0], self.screen_center[1], duration=DURATION ) self.pyautogui.press(key) return f"Pressed key: {key}" except Exception as e: logger.error(f"Error pressing key: {e}") return f"Error: Invalid key '{key}' or error pressing it. {e}"
[docs] def hotkey(self, keys: List[str]) -> str: r"""Press keys in succession and release in reverse order. Args: keys (List[str]): The series of keys to press, in order. This can be either: - Multiple string arguments, e.g., hotkey('ctrl', 'c') - A single list of strings, e.g., hotkey(['ctrl', 'c']) Returns: str: Success or error message. """ try: # First, move mouse to a safe position to prevent potential issues self.pyautogui.moveTo( self.screen_center[0], self.screen_center[1], duration=DURATION ) self.pyautogui.hotkey(*keys) return f"Pressed hotkey: {'+'.join(keys)}" except Exception as e: logger.error(f"Error pressing hotkey: {e}") return f"Error: {e}"
[docs] def get_tools(self) -> List[FunctionTool]: r"""Returns a list of FunctionTool objects for PyAutoGUI operations. Returns: List[FunctionTool]: List of PyAutoGUI functions. """ return [ FunctionTool(self.mouse_move), FunctionTool(self.mouse_click), FunctionTool(self.keyboard_type), FunctionTool(self.take_screenshot), FunctionTool(self.get_mouse_position), FunctionTool(self.press_key), FunctionTool(self.hotkey), FunctionTool(self.mouse_drag), FunctionTool(self.scroll), ]