feat: +openai text-to-image

2026-06-08 15:05:17 +02:00 · 2023-08-17 21:51:50 +08:00 · 2023-08-17 21:51:50 +08:00 · 60245fbe90
commit 60245fbe90
parent eb232efdfc
3 changed files with 143 additions and 5 deletions
--- a/metagpt/tools/azure_tts.py
+++ b/metagpt/tools/azure_tts.py
@ -4,7 +4,7 @@
@Time    : 2023/8/17
@Author  : mashenquan
@File    : azure_tts.py
-@Desc    : azure TTS openapi, which provides text-to-speech functionality
+@Desc    : azure TTS OAS3 api, which provides text-to-speech functionality
 """
 from pathlib import Path
 from uuid import uuid4
@ -69,7 +69,7 @@ def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key
    :param voice: For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`, `https://speech.microsoft.com/portal/voicegallery`
    :param style: Speaking style to express different emotions like cheerfulness, empathy, and calm. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
    :param role: With roles, the same voice can act as a different age and gender. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
-    :param text: Text to convert
+    :param text: The text used for voice conversion.
    :param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint`
    :param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API.
    :return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string.
@ -110,5 +110,5 @@ def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key
 if __name__ == "__main__":
    initalize_enviroment()

-    v = openapi_azsure_tts("测试，test")
+    v = oas3_azsure_tts("测试，test")
    print(v)
--- a/metagpt/tools/openai_text_2_image.py
+++ b/metagpt/tools/openai_text_2_image.py
@ -0,0 +1,100 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2023/8/17
+@Author  : mashenquan
+@File    : openai_text_2_image.py
+@Desc    : OpenAI Text-to-Image OAS3 api, which provides text-to-image functionality.
+"""
+import base64
+import os
+import sys
+from pathlib import Path
+from typing import List
+
+import requests
+from pydantic import BaseModel
+
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent))  # fix-bug: No module named 'metagpt'
+from metagpt.utils.common import initalize_enviroment
+from metagpt.logs import logger
+
+
+class OpenAIText2Image:
+    def __init__(self, openai_api_key):
+        """
+        :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
+        """
+        self.openai_api_key = openai_api_key if openai_api_key else os.environ.get('OPENAI_API_KEY')
+
+    def text_2_image(self, text, size_type="1024x1024"):
+        """Text to image
+
+        :param text: The text used for image conversion.
+        :param size_type: One of ['256x256', '512x512', '1024x1024']
+        :return: The image data is returned in Base64 encoding.
+        """
+
+        class ImageUrl(BaseModel):
+            url: str
+
+        class ImageResult(BaseModel):
+            data: List[ImageUrl]
+            created: int
+
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.openai_api_key}"
+        }
+        data = {"prompt": text, "n": 1, "size": size_type}
+        try:
+            response = requests.post("https://api.openai.com/v1/images/generations", headers=headers, json=data)
+            response.raise_for_status()  # Raise an exception for 4xx or 5xx responses
+            result = ImageResult(**response.json())
+        except requests.exceptions.RequestException as e:
+            logger.error(f"An error occurred:{e}")
+            return ""
+        if len(result.data) > 0:
+            return OpenAIText2Image.get_image_data(result.data[0].url)
+        return ""
+
+    @staticmethod
+    def get_image_data(url):
+        """Fetch image data from a URL and encode it as Base64
+
+        :param url: Image url
+        :return: Base64-encoded image data.
+        """
+        try:
+            response = requests.get(url)
+            response.raise_for_status()  # Raise an exception for 4xx or 5xx responses
+            image_data = response.content
+            base64_image = base64.b64encode(image_data).decode("utf-8")
+            return base64_image
+
+        except requests.exceptions.RequestException as e:
+            logger.error(f"An error occurred:{e}")
+            return ""
+
+
+# Export
+def oas3_openai_text_2_image(text, size_type: str = "1024x1024", openai_api_key=""):
+    """Text to image
+
+    :param text: The text used for image conversion.
+    :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
+    :param size_type: One of ['256x256', '512x512', '1024x1024']
+    :return: The image data is returned in Base64 encoding.
+    """
+    if not text:
+        return ""
+    if not openai_api_key:
+        openai_api_key = os.environ.get("OPENAI_API_KEY")
+    return OpenAIText2Image(openai_api_key).text_2_image(text, size_type=size_type)
+
+
+if __name__ == "__main__":
+    initalize_enviroment()
+
+    v = oas3_openai_text_2_image("Panda emoji")
+    print(v)
--- a/spec/metagpt_oas3_api.yaml
+++ b/spec/metagpt_oas3_api.yaml
@ -59,6 +59,44 @@ paths:
                  result:
                    type: string
        '400':
-          description: Bad Request
+          description: "Bad Request"
        '500':
-          description: Bad Request
+          description: "Internal Server Error"
+
+  /txt2img/openai:
+    post:
+      summary: "Convert Text to Base64-encoded Image Data Stream"
+      operationId: openai_text_2_image.oas3_openai_text_2_image
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                text:
+                  type: string
+                  description: "The text used for image conversion."
+                size_type:
+                  type: string
+                  enum: ["256x256", "512x512", "1024x1024"]
+                  default: "1024x1024"
+                  description: "Size of the generated image."
+                openai_api_key:
+                  type: string
+                  default: ""
+                  description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`"
+      responses:
+        '200':
+          description: "Base64-encoded image data."
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  image_data:
+                    type: string
+        '400':
+          description: "Bad Request"
+        '500':
+          description: "Internal Server Error"