feat: +openai text-to-image

This commit is contained in:
莘权 马 2023-08-17 21:51:50 +08:00
parent eb232efdfc
commit 60245fbe90
3 changed files with 143 additions and 5 deletions

View file

@ -4,7 +4,7 @@
@Time : 2023/8/17
@Author : mashenquan
@File : azure_tts.py
@Desc : azure TTS openapi, which provides text-to-speech functionality
@Desc : azure TTS OAS3 api, which provides text-to-speech functionality
"""
from pathlib import Path
from uuid import uuid4
@ -69,7 +69,7 @@ def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key
:param voice: For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`, `https://speech.microsoft.com/portal/voicegallery`
:param style: Speaking style to express different emotions like cheerfulness, empathy, and calm. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
:param role: With roles, the same voice can act as a different age and gender. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
:param text: Text to convert
:param text: The text used for voice conversion.
:param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint`
:param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API.
:return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string.
@ -110,5 +110,5 @@ def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key
if __name__ == "__main__":
initalize_enviroment()
v = openapi_azsure_tts("测试test")
v = oas3_azsure_tts("测试test")
print(v)

View file

@ -0,0 +1,100 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2023/8/17
@Author : mashenquan
@File : openai_text_2_image.py
@Desc : OpenAI Text-to-Image OAS3 api, which provides text-to-image functionality.
"""
import base64
import os
import sys
from pathlib import Path
from typing import List
import requests
from pydantic import BaseModel
sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt'
from metagpt.utils.common import initalize_enviroment
from metagpt.logs import logger
class OpenAIText2Image:
def __init__(self, openai_api_key):
"""
:param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
"""
self.openai_api_key = openai_api_key if openai_api_key else os.environ.get('OPENAI_API_KEY')
def text_2_image(self, text, size_type="1024x1024"):
"""Text to image
:param text: The text used for image conversion.
:param size_type: One of ['256x256', '512x512', '1024x1024']
:return: The image data is returned in Base64 encoding.
"""
class ImageUrl(BaseModel):
url: str
class ImageResult(BaseModel):
data: List[ImageUrl]
created: int
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.openai_api_key}"
}
data = {"prompt": text, "n": 1, "size": size_type}
try:
response = requests.post("https://api.openai.com/v1/images/generations", headers=headers, json=data)
response.raise_for_status() # Raise an exception for 4xx or 5xx responses
result = ImageResult(**response.json())
except requests.exceptions.RequestException as e:
logger.error(f"An error occurred:{e}")
return ""
if len(result.data) > 0:
return OpenAIText2Image.get_image_data(result.data[0].url)
return ""
@staticmethod
def get_image_data(url):
"""Fetch image data from a URL and encode it as Base64
:param url: Image url
:return: Base64-encoded image data.
"""
try:
response = requests.get(url)
response.raise_for_status() # Raise an exception for 4xx or 5xx responses
image_data = response.content
base64_image = base64.b64encode(image_data).decode("utf-8")
return base64_image
except requests.exceptions.RequestException as e:
logger.error(f"An error occurred:{e}")
return ""
# Export
def oas3_openai_text_2_image(text, size_type: str = "1024x1024", openai_api_key=""):
"""Text to image
:param text: The text used for image conversion.
:param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
:param size_type: One of ['256x256', '512x512', '1024x1024']
:return: The image data is returned in Base64 encoding.
"""
if not text:
return ""
if not openai_api_key:
openai_api_key = os.environ.get("OPENAI_API_KEY")
return OpenAIText2Image(openai_api_key).text_2_image(text, size_type=size_type)
if __name__ == "__main__":
initalize_enviroment()
v = oas3_openai_text_2_image("Panda emoji")
print(v)

View file

@ -59,6 +59,44 @@ paths:
result:
type: string
'400':
description: Bad Request
description: "Bad Request"
'500':
description: Bad Request
description: "Internal Server Error"
/txt2img/openai:
post:
summary: "Convert Text to Base64-encoded Image Data Stream"
operationId: openai_text_2_image.oas3_openai_text_2_image
requestBody:
required: true
content:
application/json:
schema:
type: object
properties:
text:
type: string
description: "The text used for image conversion."
size_type:
type: string
enum: ["256x256", "512x512", "1024x1024"]
default: "1024x1024"
description: "Size of the generated image."
openai_api_key:
type: string
default: ""
description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`"
responses:
'200':
description: "Base64-encoded image data."
content:
application/json:
schema:
type: object
properties:
image_data:
type: string
'400':
description: "Bad Request"
'500':
description: "Internal Server Error"