Source code for camel.extractors.python_strategies

# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========

import ast
from typing import Optional

from camel.extractors.base import BaseExtractorStrategy
from camel.logger import get_logger

logger = get_logger(__name__)


[docs] class BoxedStrategy(BaseExtractorStrategy): r"""Extracts content from \\boxed{} environments."""
[docs] async def extract(self, text: str) -> Optional[str]: r"""Extract content from \\boxed{} environments. Args: text (str): The input text to process. Returns: Optional[str]: Content inside \\boxed{} if found, else None. """ # Find the start of the boxed content boxed_pattern = "\\boxed{" if boxed_pattern not in text: logger.debug("No \\boxed{} content found in the response") return None start_idx = text.find(boxed_pattern) + len(boxed_pattern) if start_idx >= len(text): logger.debug("Malformed \\boxed{} (no content after opening)") return None # Use stack-based approach to handle nested braces stack = 1 # Start with one opening brace end_idx = start_idx escape_mode = False for i in range(start_idx, len(text)): char = text[i] # Handle escape sequences if escape_mode: escape_mode = False continue if char == '\\': escape_mode = True continue if char == '{': stack += 1 elif char == '}': stack -= 1 if stack == 0: # Found the matching closing brace end_idx = i break # Check if we found a complete boxed expression if stack != 0: logger.debug("Unbalanced braces in \\boxed{} content") return None # Extract the content content = text[start_idx:end_idx].strip() logger.debug(f"Extracted boxed content: {content}") return content
[docs] class PythonListStrategy(BaseExtractorStrategy): r"""Extracts and normalizes Python lists."""
[docs] async def extract(self, text: str) -> Optional[str]: r"""Extract and normalize a Python list. Args: text (str): The input text to process. Returns: Optional[str]: Normalized list as a string if found, else None. """ text = text.strip() if not (text.startswith('[') and text.endswith(']')): logger.debug("Content is not a list format (missing brackets)") return None try: # Fix any escaped quotes before parsing fixed_content = text.replace('\\"', '"') parsed = ast.literal_eval(fixed_content) if isinstance(parsed, list): # Sort the list for normalization sorted_list = sorted(parsed, key=lambda x: str(x)) return repr(sorted_list) else: logger.debug(f"Content is not a list, got {type(parsed)}") return None except (SyntaxError, ValueError) as e: logger.debug(f"Failed to parse as Python list: {e}") return None
[docs] class PythonDictStrategy(BaseExtractorStrategy): r"""Extracts and normalizes Python dictionaries."""
[docs] async def extract(self, text: str) -> Optional[str]: r"""Extract and normalize a Python dictionary. Args: text (str): The input text to process. Returns: Optional[str]: Normalized dictionary as a string, else None. """ text = text.strip() if not (text.startswith('{') and text.endswith('}')): logger.debug("Content is not a dictionary format (missing braces)") return None try: # Fix any escaped quotes before parsing fixed_content = text.replace('\\"', '"') parsed = ast.literal_eval(fixed_content) if isinstance(parsed, dict): # Sort the dictionary items for normalization sorted_dict = dict( sorted(parsed.items(), key=lambda x: str(x[0])) ) return repr(sorted_dict) else: logger.debug( f"Content is not a dictionary, got {type(parsed)}" ) return None except (SyntaxError, ValueError) as e: logger.debug(f"Failed to parse as Python dictionary: {e}") return None
[docs] class PythonSetStrategy(BaseExtractorStrategy): r"""Extracts and normalizes Python sets."""
[docs] async def extract(self, text: str) -> Optional[str]: r"""Extract and normalize a Python set. Args: text (str): The input text to process. Returns: Optional[str]: Normalized set as a string if found, else None. """ text = text.strip() # Check for set syntax: {1, 2, 3} or set([1, 2, 3]) if not ( (text.startswith('{') and text.endswith('}')) or (text.startswith('set(') and text.endswith(')')) ): logger.debug("Content is not a set format") return None try: # Fix any escaped quotes before parsing fixed_content = text.replace('\\"', '"') parsed = ast.literal_eval(fixed_content) if isinstance(parsed, set): # Sort the set elements for normalization sorted_set = sorted(parsed, key=lambda x: str(x)) return repr(set(sorted_set)) else: logger.debug(f"Content is not a set, got {type(parsed)}") return None except (SyntaxError, ValueError) as e: logger.debug(f"Failed to parse as Python set: {e}") return None
[docs] class PythonTupleStrategy(BaseExtractorStrategy): r"""Extracts and normalizes Python tuples."""
[docs] async def extract(self, text: str) -> Optional[str]: r"""Extract and normalize a Python tuple. Args: text (str): The input text to process. Returns: Optional[str]: Normalized tuple as a string if found, else None. """ text = text.strip() # Check for tuple syntax: (1, 2, 3) or (1,) if not (text.startswith('(') and text.endswith(')')): logger.debug("Content is not a tuple format (missing parentheses)") return None try: # Fix any escaped quotes before parsing fixed_content = text.replace('\\"', '"') parsed = ast.literal_eval(fixed_content) if isinstance(parsed, tuple): # Sort the tuple elements for normalization sorted_tuple = tuple(sorted(parsed, key=lambda x: str(x))) return repr(sorted_tuple) else: logger.debug(f"Content is not a tuple, got {type(parsed)}") return None except (SyntaxError, ValueError) as e: logger.debug(f"Failed to parse as Python tuple: {e}") return None