Source code for camel.toolkits.image_analysis_toolkit

# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========

from io import BytesIO
from typing import List, Optional
from urllib.parse import urlparse

import requests
from PIL import Image

from camel.logger import get_logger
from camel.messages import BaseMessage
from camel.models import BaseModelBackend, ModelFactory
from camel.toolkits import FunctionTool
from camel.toolkits.base import BaseToolkit
from camel.types import ModelPlatformType, ModelType

logger = get_logger(__name__)


[docs] class ImageAnalysisToolkit(BaseToolkit): r"""A toolkit for comprehensive image analysis and understanding. The toolkit uses vision-capable language models to perform these tasks. """ def __init__(self, model: Optional[BaseModelBackend] = None): r"""Initialize the ImageAnalysisToolkit. Args: model (Optional[BaseModelBackend]): The model backend to use for image analysis tasks. This model should support processing images for tasks like image description and visual question answering. If None, a default model will be created using ModelFactory. (default: :obj:`None`) """ if model: self.model = model else: self.model = ModelFactory.create( model_platform=ModelPlatformType.DEFAULT, model_type=ModelType.DEFAULT, )
[docs] def image_to_text( self, image_path: str, sys_prompt: Optional[str] = None ) -> str: r"""Generates textual description of an image with optional custom prompt. Args: image_path (str): Local path or URL to an image file. sys_prompt (Optional[str]): Custom system prompt for the analysis. (default: :obj:`None`) Returns: str: Natural language description of the image. """ default_content = '''You are an image analysis expert. Provide a detailed description including text if present.''' system_msg = BaseMessage.make_assistant_message( role_name="Senior Computer Vision Analyst", content=sys_prompt if sys_prompt else default_content, ) return self._analyze_image( image_path=image_path, prompt="Please describe the contents of this image.", system_message=system_msg, )
[docs] def ask_question_about_image( self, image_path: str, question: str, sys_prompt: Optional[str] = None ) -> str: r"""Answers image questions with optional custom instructions. Args: image_path (str): Local path or URL to an image file. question (str): Query about the image content. sys_prompt (Optional[str]): Custom system prompt for the analysis. (default: :obj:`None`) Returns: str: Detailed answer based on visual understanding """ default_content = """Answer questions about images by: 1. Careful visual inspection 2. Contextual reasoning 3. Text transcription where relevant 4. Logical deduction from visual evidence""" system_msg = BaseMessage.make_assistant_message( role_name="Visual QA Specialist", content=sys_prompt if sys_prompt else default_content, ) return self._analyze_image( image_path=image_path, prompt=question, system_message=system_msg, )
def _load_image(self, image_path: str) -> Image.Image: r"""Loads an image from either local path or URL. Args: image_path (str): Local path or URL to image. Returns: Image.Image: Loaded PIL Image object. Raises: ValueError: For invalid paths/URLs or unreadable images. requests.exceptions.RequestException: For URL fetch failures. """ parsed = urlparse(image_path) if parsed.scheme in ("http", "https"): logger.debug(f"Fetching image from URL: {image_path}") try: response = requests.get(image_path, timeout=15) response.raise_for_status() return Image.open(BytesIO(response.content)) except requests.exceptions.RequestException as e: logger.error(f"URL fetch failed: {e}") raise else: logger.debug(f"Loading local image: {image_path}") try: return Image.open(image_path) except Exception as e: logger.error(f"Image loading failed: {e}") raise ValueError(f"Invalid image file: {e}") def _analyze_image( self, image_path: str, prompt: str, system_message: BaseMessage, ) -> str: r"""Core analysis method handling image loading and processing. Args: image_path (str): Image location. prompt (str): Analysis query/instructions. system_message (BaseMessage): Custom system prompt for the analysis. Returns: str: Analysis result or error message. """ try: image = self._load_image(image_path) logger.info(f"Analyzing image: {image_path}") from camel.agents.chat_agent import ChatAgent agent = ChatAgent( system_message=system_message, model=self.model, ) user_msg = BaseMessage.make_user_message( role_name="User", content=prompt, image_list=[image], ) response = agent.step(user_msg) agent.reset() return response.msgs[0].content except (ValueError, requests.exceptions.RequestException) as e: logger.error(f"Image handling error: {e}") return f"Image error: {e!s}" except Exception as e: logger.error(f"Unexpected error: {e}") return f"Analysis failed: {e!s}"
[docs] def get_tools(self) -> List[FunctionTool]: r"""Returns a list of FunctionTool objects representing the functions in the toolkit. Returns: List[FunctionTool]: A list of FunctionTool objects representing the functions in the toolkit. """ return [ FunctionTool(self.image_to_text), FunctionTool(self.ask_question_about_image), ]