From 60245fbe902287cc40ea0643d7764da0f50da29a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Thu, 17 Aug 2023 21:51:50 +0800 Subject: [PATCH] feat: +openai text-to-image --- metagpt/tools/azure_tts.py | 6 +- metagpt/tools/openai_text_2_image.py | 100 +++++++++++++++++++++++++++ spec/metagpt_oas3_api.yaml | 42 ++++++++++- 3 files changed, 143 insertions(+), 5 deletions(-) create mode 100644 metagpt/tools/openai_text_2_image.py diff --git a/metagpt/tools/azure_tts.py b/metagpt/tools/azure_tts.py index 035a85108..5d0001b27 100644 --- a/metagpt/tools/azure_tts.py +++ b/metagpt/tools/azure_tts.py @@ -4,7 +4,7 @@ @Time : 2023/8/17 @Author : mashenquan @File : azure_tts.py -@Desc : azure TTS openapi, which provides text-to-speech functionality +@Desc : azure TTS OAS3 api, which provides text-to-speech functionality """ from pathlib import Path from uuid import uuid4 @@ -69,7 +69,7 @@ def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key :param voice: For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`, `https://speech.microsoft.com/portal/voicegallery` :param style: Speaking style to express different emotions like cheerfulness, empathy, and calm. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` :param role: With roles, the same voice can act as a different age and gender. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` - :param text: Text to convert + :param text: The text used for voice conversion. :param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint` :param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API. :return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string. @@ -110,5 +110,5 @@ def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key if __name__ == "__main__": initalize_enviroment() - v = openapi_azsure_tts("测试,test") + v = oas3_azsure_tts("测试,test") print(v) diff --git a/metagpt/tools/openai_text_2_image.py b/metagpt/tools/openai_text_2_image.py new file mode 100644 index 000000000..3d2a2bbfc --- /dev/null +++ b/metagpt/tools/openai_text_2_image.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/17 +@Author : mashenquan +@File : openai_text_2_image.py +@Desc : OpenAI Text-to-Image OAS3 api, which provides text-to-image functionality. +""" +import base64 +import os +import sys +from pathlib import Path +from typing import List + +import requests +from pydantic import BaseModel + +sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt' +from metagpt.utils.common import initalize_enviroment +from metagpt.logs import logger + + +class OpenAIText2Image: + def __init__(self, openai_api_key): + """ + :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys` + """ + self.openai_api_key = openai_api_key if openai_api_key else os.environ.get('OPENAI_API_KEY') + + def text_2_image(self, text, size_type="1024x1024"): + """Text to image + + :param text: The text used for image conversion. + :param size_type: One of ['256x256', '512x512', '1024x1024'] + :return: The image data is returned in Base64 encoding. + """ + + class ImageUrl(BaseModel): + url: str + + class ImageResult(BaseModel): + data: List[ImageUrl] + created: int + + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.openai_api_key}" + } + data = {"prompt": text, "n": 1, "size": size_type} + try: + response = requests.post("https://api.openai.com/v1/images/generations", headers=headers, json=data) + response.raise_for_status() # Raise an exception for 4xx or 5xx responses + result = ImageResult(**response.json()) + except requests.exceptions.RequestException as e: + logger.error(f"An error occurred:{e}") + return "" + if len(result.data) > 0: + return OpenAIText2Image.get_image_data(result.data[0].url) + return "" + + @staticmethod + def get_image_data(url): + """Fetch image data from a URL and encode it as Base64 + + :param url: Image url + :return: Base64-encoded image data. + """ + try: + response = requests.get(url) + response.raise_for_status() # Raise an exception for 4xx or 5xx responses + image_data = response.content + base64_image = base64.b64encode(image_data).decode("utf-8") + return base64_image + + except requests.exceptions.RequestException as e: + logger.error(f"An error occurred:{e}") + return "" + + +# Export +def oas3_openai_text_2_image(text, size_type: str = "1024x1024", openai_api_key=""): + """Text to image + + :param text: The text used for image conversion. + :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys` + :param size_type: One of ['256x256', '512x512', '1024x1024'] + :return: The image data is returned in Base64 encoding. + """ + if not text: + return "" + if not openai_api_key: + openai_api_key = os.environ.get("OPENAI_API_KEY") + return OpenAIText2Image(openai_api_key).text_2_image(text, size_type=size_type) + + +if __name__ == "__main__": + initalize_enviroment() + + v = oas3_openai_text_2_image("Panda emoji") + print(v) diff --git a/spec/metagpt_oas3_api.yaml b/spec/metagpt_oas3_api.yaml index 5a3e6923b..70c15d590 100644 --- a/spec/metagpt_oas3_api.yaml +++ b/spec/metagpt_oas3_api.yaml @@ -59,6 +59,44 @@ paths: result: type: string '400': - description: Bad Request + description: "Bad Request" '500': - description: Bad Request \ No newline at end of file + description: "Internal Server Error" + + /txt2img/openai: + post: + summary: "Convert Text to Base64-encoded Image Data Stream" + operationId: openai_text_2_image.oas3_openai_text_2_image + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + text: + type: string + description: "The text used for image conversion." + size_type: + type: string + enum: ["256x256", "512x512", "1024x1024"] + default: "1024x1024" + description: "Size of the generated image." + openai_api_key: + type: string + default: "" + description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`" + responses: + '200': + description: "Base64-encoded image data." + content: + application/json: + schema: + type: object + properties: + image_data: + type: string + '400': + description: "Bad Request" + '500': + description: "Internal Server Error" \ No newline at end of file