Source code for camel.toolkits.dalle_toolkit
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
import base64
import os
import uuid
from io import BytesIO
from typing import List, Optional
from openai import OpenAI
from PIL import Image
from camel.toolkits import FunctionTool
from camel.toolkits.base import BaseToolkit
[docs]
class DalleToolkit(BaseToolkit):
r"""A class representing a toolkit for image generation using OpenAI's
DALL-E model.
"""
[docs]
def base64_to_image(self, base64_string: str) -> Optional[Image.Image]:
r"""Converts a base64 encoded string into a PIL Image object.
Args:
base64_string (str): The base64 encoded string of the image.
Returns:
Optional[Image.Image]: The PIL Image object or None if conversion
fails.
"""
try:
# Decode the base64 string to get the image data
image_data = base64.b64decode(base64_string)
# Create a memory buffer for the image data
image_buffer = BytesIO(image_data)
# Open the image using the PIL library
image = Image.open(image_buffer)
return image
except Exception as e:
print(f"An error occurred while converting base64 to image: {e}")
return None
[docs]
def image_path_to_base64(self, image_path: str) -> str:
r"""Converts the file path of an image to a Base64 encoded string.
Args:
image_path (str): The path to the image file.
Returns:
str: A Base64 encoded string representing the content of the image
file.
"""
try:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
except Exception as e:
print(
f"An error occurred while converting image path to base64: {e}"
)
return ""
[docs]
def image_to_base64(self, image: Image.Image) -> str:
r"""Converts an image into a base64-encoded string.
This function takes an image object as input, encodes the image into a
PNG format base64 string, and returns it.
If the encoding process encounters an error, it prints the error
message and returns None.
Args:
image: The image object to be encoded, supports any image format
that can be saved in PNG format.
Returns:
str: A base64-encoded string of the image.
"""
try:
with BytesIO() as buffered_image:
image.save(buffered_image, format="PNG")
buffered_image.seek(0)
image_bytes = buffered_image.read()
base64_str = base64.b64encode(image_bytes).decode('utf-8')
return base64_str
except Exception as e:
print(f"An error occurred: {e}")
return ""
[docs]
def get_dalle_img(self, prompt: str, image_dir: str = "img") -> str:
r"""Generate an image using OpenAI's DALL-E model.
The generated image is saved to the specified directory.
Args:
prompt (str): The text prompt based on which the image is
generated.
image_dir (str): The directory to save the generated image.
Defaults to 'img'.
Returns:
str: The path to the saved image.
"""
dalle_client = OpenAI()
response = dalle_client.images.generate(
model="dall-e-3",
prompt=prompt,
size="1024x1792",
quality="standard",
n=1, # NOTE: now dall-e-3 only supports n=1
response_format="b64_json",
)
image_b64 = response.data[0].b64_json
image = self.base64_to_image(image_b64) # type: ignore[arg-type]
if image is None:
raise ValueError("Failed to convert base64 string to image.")
os.makedirs(image_dir, exist_ok=True)
image_path = os.path.join(image_dir, f"{uuid.uuid4()}.png")
image.save(image_path)
return image_path
[docs]
def get_tools(self) -> List[FunctionTool]:
r"""Returns a list of FunctionTool objects representing the
functions in the toolkit.
Returns:
List[FunctionTool]: A list of FunctionTool objects
representing the functions in the toolkit.
"""
return [FunctionTool(self.get_dalle_img)]