diff --git a/.gitignore b/.gitignore index e326e8372..1a9741e91 100644 --- a/.gitignore +++ b/.gitignore @@ -167,3 +167,5 @@ output.wav # output folder output +tmp.png + diff --git a/.well-known/ai-plugin.json b/.well-known/ai-plugin.json new file mode 100644 index 000000000..44e8435f2 --- /dev/null +++ b/.well-known/ai-plugin.json @@ -0,0 +1,18 @@ +{ + "schema_version": "v1", + "name_for_model": "text processing tools", + "name_for_human": "MetaGPT Text Plugin", + "description_for_model": "Plugins for text processing, including text-to-speech, text-to-image, text-to-embedding, text summarization, text-to-code, vector similarity calculation, web content crawling, and more.", + "description_for_human": "Plugins for text processing, including text-to-speech, text-to-image, text-to-embedding, text summarization, text-to-code, vector similarity calculation, web content crawling, and more.", + "auth": { + "type": "none" + }, + "api": { + "type": "openapi", + "url": "https://github.com/iorisa/MetaGPT/blob/feature/oas3/.well-known/metagpt_oas3_api.yaml", + "has_user_authentication": false + }, + "logo_url": "https://github.com/iorisa/MetaGPT/blob/feature/oas3/docs/resources/MetaGPT-logo.png", + "contact_email": "mashenquan@fuzhi.cn", + "legal_info_url": "https://github.com/iorisa/MetaGPT/blob/feature/oas3/docs/README_CN.md" +} \ No newline at end of file diff --git a/.well-known/metagpt_oas3_api.yaml b/.well-known/metagpt_oas3_api.yaml new file mode 100644 index 000000000..a226181a5 --- /dev/null +++ b/.well-known/metagpt_oas3_api.yaml @@ -0,0 +1,236 @@ +openapi: "3.0.0" + +info: + title: "MetaGPT Export OpenAPIs" + version: "1.0" +servers: + - url: "/oas3" + variables: + port: + default: '8080' + description: HTTP service port + +paths: + /tts/azsure: + post: + summary: "Convert Text to Base64-encoded .wav File Stream" + description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" + operationId: azure_tts.oas3_azsure_tts + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - text + properties: + text: + type: string + description: Text to convert + lang: + type: string + description: The language code or locale, e.g., en-US (English - United States) + default: "zh-CN" + voice: + type: string + description: "Voice style, see: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts), [Voice Gallery](https://speech.microsoft.com/portal/voicegallery)" + default: "zh-CN-XiaomoNeural" + style: + type: string + description: "Speaking style to express different emotions. For more details, checkout: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" + default: "affectionate" + role: + type: string + description: "Role to specify age and gender. For more details, checkout: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" + default: "Girl" + subscription_key: + type: string + description: "Key used to access Azure AI service API, see: [Azure Portal](https://portal.azure.com/) > `Resource Management` > `Keys and Endpoint`" + default: "" + region: + type: string + description: "Location (or region) of your resource, see: [Azure Portal](https://portal.azure.com/) > `Resource Management` > `Keys and Endpoint`" + default: "" + responses: + '200': + description: "Base64-encoded .wav file data if successful, otherwise an empty string." + content: + application/json: + schema: + type: object + properties: + wav_data: + type: string + format: base64 + '400': + description: "Bad Request" + '500': + description: "Internal Server Error" + + /txt2img/openai: + post: + summary: "Convert Text to Base64-encoded Image Data Stream" + operationId: openai_text_to_image.oas3_openai_text_to_image + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + text: + type: string + description: "The text used for image conversion." + size_type: + type: string + enum: ["256x256", "512x512", "1024x1024"] + default: "1024x1024" + description: "Size of the generated image." + openai_api_key: + type: string + default: "" + description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`" + responses: + '200': + description: "Base64-encoded image data." + content: + application/json: + schema: + type: object + properties: + image_data: + type: string + format: base64 + '400': + description: "Bad Request" + '500': + description: "Internal Server Error" + /txt2embedding/openai: + post: + summary: Text to embedding + operationId: openai_text_to_embedding.oas3_openai_text_to_embedding + description: Retrieve an embedding for the provided text using the OpenAI API. + requestBody: + content: + application/json: + schema: + type: object + properties: + input: + type: string + description: The text used for embedding. + model: + type: string + description: "ID of the model to use. For more details, checkout: [models](https://api.openai.com/v1/models)" + enum: + - text-embedding-ada-002 + responses: + "200": + description: Successful response + content: + application/json: + schema: + $ref: "#/components/schemas/ResultEmbedding" + "4XX": + description: Client error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "5XX": + description: Server error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + + /txt2image/metagpt: + post: + summary: "Text to Image" + description: "Generate an image from the provided text using the MetaGPT Text-to-Image API." + operationId: metagpt_text_to_image.oas3_metagpt_text_to_image + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - text + properties: + text: + type: string + description: "The text used for image conversion." + size_type: + type: string + enum: ["512x512", "512x768"] + default: "512x512" + description: "Size of the generated image." + model_url: + type: string + description: "Model reset API URL for text-to-image." + default: "" + responses: + '200': + description: "Base64-encoded image data." + content: + application/json: + schema: + type: object + properties: + image_data: + type: string + format: base64 + '400': + description: "Bad Request" + '500': + description: "Internal Server Error" + +components: + schemas: + Embedding: + type: object + description: Represents an embedding vector returned by the embedding endpoint. + properties: + object: + type: string + example: embedding + embedding: + type: array + items: + type: number + example: [0.0023064255, -0.009327292, ...] + index: + type: integer + example: 0 + Usage: + type: object + properties: + prompt_tokens: + type: integer + example: 8 + total_tokens: + type: integer + example: 8 + ResultEmbedding: + type: object + properties: + object: + type: string + example: result_embedding + data: + type: array + items: + $ref: "#/components/schemas/Embedding" + model: + type: string + example: text-embedding-ada-002 + usage: + $ref: "#/components/schemas/Usage" + Error: + type: object + properties: + error: + type: string + example: An error occurred \ No newline at end of file diff --git a/.well-known/openapi.yaml b/.well-known/openapi.yaml new file mode 100644 index 000000000..bc291b7db --- /dev/null +++ b/.well-known/openapi.yaml @@ -0,0 +1,35 @@ +openapi: "3.0.0" + +info: + title: Hello World + version: "1.0" +servers: + - url: /openapi + +paths: + /greeting/{name}: + post: + summary: Generate greeting + description: Generates a greeting message. + operationId: hello.post_greeting + responses: + 200: + description: greeting response + content: + text/plain: + schema: + type: string + example: "hello dave!" + parameters: + - name: name + in: path + description: Name of the person to greet. + required: true + schema: + type: string + example: "dave" + requestBody: + content: + application/json: + schema: + type: object \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml index 303f4824b..6e9a61931 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -70,3 +70,6 @@ SD_T2I_API: "/sdapi/v1/txt2img" ### for Research MODEL_FOR_RESEARCHER_SUMMARY: gpt-3.5-turbo MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k + +### Meta Models +#METAGPT_TEXT_TO_IMAGE_MODEL: MODEL_URL \ No newline at end of file diff --git a/metagpt/actions/azure_tts.py b/metagpt/actions/azure_tts.py deleted file mode 100644 index f528ba001..000000000 --- a/metagpt/actions/azure_tts.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -@Time : 2023/6/9 22:22 -@Author : Leo Xiao -@File : azure_tts.py -""" -from azure.cognitiveservices.speech import AudioConfig, SpeechConfig, SpeechSynthesizer - -from metagpt.actions.action import Action -from metagpt.config import Config - - -class AzureTTS(Action): - def __init__(self, name, context=None, llm=None): - super().__init__(name, context, llm) - self.config = Config() - - # 参数参考:https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts#voice-styles-and-roles - def synthesize_speech(self, lang, voice, role, text, output_file): - subscription_key = self.config.get('AZURE_TTS_SUBSCRIPTION_KEY') - region = self.config.get('AZURE_TTS_REGION') - speech_config = SpeechConfig( - subscription=subscription_key, region=region) - - speech_config.speech_synthesis_voice_name = voice - audio_config = AudioConfig(filename=output_file) - synthesizer = SpeechSynthesizer( - speech_config=speech_config, - audio_config=audio_config) - - # if voice=="zh-CN-YunxiNeural": - ssml_string = f""" - - - - {text} - - - - """ - - synthesizer.speak_ssml_async(ssml_string).get() - - -if __name__ == "__main__": - azure_tts = AzureTTS("azure_tts") - azure_tts.synthesize_speech( - "zh-CN", - "zh-CN-YunxiNeural", - "Boy", - "你好,我是卡卡", - "output.wav") diff --git a/metagpt/learn/text_to_embedding.py b/metagpt/learn/text_to_embedding.py new file mode 100644 index 000000000..281815ca6 --- /dev/null +++ b/metagpt/learn/text_to_embedding.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/18 +@Author : mashenquan +@File : text_to_embedding.py +@Desc : Text-to-Embedding skill, which provides text-to-embedding functionality. +""" +import os + +from metagpt.tools.openai_text_to_embedding import oas3_openai_text_to_embedding +from metagpt.utils.common import initialize_environment + + +def text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""): + """Text to embedding + + :param text: The text used for embedding. + :param model: One of ['text-embedding-ada-002'], ID of the model to use. For more details, checkout: `https://api.openai.com/v1/models`. + :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys` + :return: A json object of :class:`ResultEmbedding` class if successful, otherwise `{}`. + """ + initialize_environment() + if os.environ.get("OPENAI_API_KEY") or openai_api_key: + return oas3_openai_text_to_embedding(text, model=model, openai_api_key=openai_api_key) + raise EnvironmentError diff --git a/metagpt/learn/text_to_image.py b/metagpt/learn/text_to_image.py new file mode 100644 index 000000000..0932dfe07 --- /dev/null +++ b/metagpt/learn/text_to_image.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/18 +@Author : mashenquan +@File : text_to_image.py +@Desc : Text-to-Image skill, which provides text-to-image functionality. +""" +import os + +from metagpt.tools.metagpt_text_to_image import oas3_metagpt_text_to_image +from metagpt.tools.openai_text_to_image import oas3_openai_text_to_image +from metagpt.utils.common import initialize_environment + + +def text_to_image(text, size_type: str = "512x512", openai_api_key="", model_url=""): + """Text to image + + :param text: The text used for image conversion. + :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys` + :param size_type: If using OPENAI, the available size options are ['256x256', '512x512', '1024x1024'], while for MetaGPT, the options are ['512x512', '512x768']. + :param model_url: MetaGPT model url + :return: The image data is returned in Base64 encoding. + """ + initialize_environment() + if os.environ.get("METAGPT_TEXT_TO_IMAGE_MODEL") or model_url: + return oas3_metagpt_text_to_image(text, size_type, model_url) + if os.environ.get("OPENAI_API_KEY") or openai_api_key: + return oas3_openai_text_to_image(text, size_type, openai_api_key) + raise EnvironmentError diff --git a/metagpt/learn/text_to_speech.py b/metagpt/learn/text_to_speech.py new file mode 100644 index 000000000..1b81097b8 --- /dev/null +++ b/metagpt/learn/text_to_speech.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/17 +@Author : mashenquan +@File : text_to_speech.py +@Desc : Text-to-Speech skill, which provides text-to-speech functionality +""" +import os + +from metagpt.tools.azure_tts import oas3_azsure_tts +from metagpt.utils.common import initialize_environment + + +def text_to_speech(text, lang="zh-CN", voice="zh-CN-XiaomoNeural", style="affectionate", role="Girl", + subscription_key="", region=""): + """Text to speech + For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + + :param lang: The value can contain a language code such as en (English), or a locale such as en-US (English - United States). For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + :param voice: For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`, `https://speech.microsoft.com/portal/voicegallery` + :param style: Speaking style to express different emotions like cheerfulness, empathy, and calm. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + :param role: With roles, the same voice can act as a different age and gender. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + :param text: The text used for voice conversion. + :param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint` + :param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API. + :return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string. + + """ + initialize_environment() + if (os.environ.get("AZURE_TTS_SUBSCRIPTION_KEY") and os.environ.get("AZURE_TTS_REGION")) or \ + (subscription_key and region): + return oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region) + + raise EnvironmentError diff --git a/metagpt/tools/azure_tts.py b/metagpt/tools/azure_tts.py new file mode 100644 index 000000000..21e8f1b6c --- /dev/null +++ b/metagpt/tools/azure_tts.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/17 +@Author : mashenquan +@File : azure_tts.py +@Desc : azure TTS OAS3 api, which provides text-to-speech functionality +""" +from pathlib import Path +from uuid import uuid4 +import base64 +import sys + +sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt' +from metagpt.utils.common import initialize_environment +from metagpt.logs import logger + +from azure.cognitiveservices.speech import AudioConfig, SpeechConfig, SpeechSynthesizer +import os + + +class AzureTTS: + """Azure Text-to-Speech""" + + def __init__(self, subscription_key, region): + """ + :param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint` + :param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API. + """ + self.subscription_key = subscription_key if subscription_key else os.environ.get('AZURE_TTS_SUBSCRIPTION_KEY') + self.region = region if region else os.environ.get('AZURE_TTS_REGION') + + # 参数参考:https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts#voice-styles-and-roles + def synthesize_speech(self, lang, voice, text, output_file): + speech_config = SpeechConfig( + subscription=self.subscription_key, region=self.region) + speech_config.speech_synthesis_voice_name = voice + audio_config = AudioConfig(filename=output_file) + synthesizer = SpeechSynthesizer( + speech_config=speech_config, + audio_config=audio_config) + + # More detail: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice + ssml_string = "" \ + f"{text}" + + return synthesizer.speak_ssml_async(ssml_string).get() + + @staticmethod + def role_style_text(role, style, text): + return f'{text}' + + @staticmethod + def role_text(role, text): + return f'{text}' + + @staticmethod + def style_text(style, text): + return f'{text}' + + +# Export +def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key="", region=""): + """Text to speech + For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + + :param lang: The value can contain a language code such as en (English), or a locale such as en-US (English - United States). For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + :param voice: For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`, `https://speech.microsoft.com/portal/voicegallery` + :param style: Speaking style to express different emotions like cheerfulness, empathy, and calm. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + :param role: With roles, the same voice can act as a different age and gender. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + :param text: The text used for voice conversion. + :param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint` + :param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API. + :return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string. + + """ + if not text: + return "" + + if not lang: + lang = "zh-CN" + if not voice: + voice = "zh-CN-XiaomoNeural" + if not role: + role = "Girl" + if not style: + style = "affectionate" + if not subscription_key: + subscription_key = os.environ.get("AZURE_TTS_SUBSCRIPTION_KEY") + if not region: + region = os.environ.get("AZURE_TTS_REGION") + + xml_value = AzureTTS.role_style_text(role=role, style=style, text=text) + tts = AzureTTS(subscription_key=subscription_key, region=region) + filename = Path(__file__).resolve().parent / (str(uuid4()).replace("-", "") + ".wav") + try: + tts.synthesize_speech(lang=lang, voice=voice, text=xml_value, output_file=str(filename)) + with open(str(filename), mode="rb") as reader: + data = reader.read() + base64_string = base64.b64encode(data).decode('utf-8') + filename.unlink() + except Exception as e: + logger.error(f"text:{text}, error:{e}") + return "" + + return base64_string + + +if __name__ == "__main__": + initialize_environment() + + v = oas3_azsure_tts("测试,test") + print(v) diff --git a/metagpt/tools/hello.py b/metagpt/tools/hello.py new file mode 100644 index 000000000..e1bad6456 --- /dev/null +++ b/metagpt/tools/hello.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/5/2 16:03 +@Author : mashenquan +@File : hello.py +@Desc : Implement the OpenAPI Specification 3.0 demo and use the following command to test the HTTP service: + + curl -X 'POST' \ + 'http://localhost:8080/openapi/greeting/dave' \ + -H 'accept: text/plain' \ + -H 'Content-Type: application/json' \ + -d '{}' +""" + +import connexion + + +# openapi implement +def post_greeting(name: str) -> str: + return f"Hello {name}\n" + + +if __name__ == "__main__": + app = connexion.AioHttpApp(__name__, specification_dir='../../.well-known/') + app.add_api("openapi.yaml", arguments={"title": "Hello World Example"}) + app.run(port=8080) diff --git a/metagpt/tools/metagpt_oas3_api_svc.py b/metagpt/tools/metagpt_oas3_api_svc.py new file mode 100644 index 000000000..277d41dfb --- /dev/null +++ b/metagpt/tools/metagpt_oas3_api_svc.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/17 +@Author : mashenquan +@File : metagpt_oas3_api_svc.py +@Desc : MetaGPT OpenAPI Specification 3.0 REST API service +""" +import asyncio +from pathlib import Path +import sys + +import connexion + +sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt' +from metagpt.utils.common import initialize_environment + + +def oas_http_svc(): + """Start the OAS 3.0 OpenAPI HTTP service""" + initialize_environment() + + app = connexion.FlaskApp(__name__, specification_dir='../../.well-known/') + app.add_api("metagpt_oas3_api.yaml") + app.add_api("openapi.yaml") + app.run(port=8080) + + +async def async_main(): + """Start the OAS 3.0 OpenAPI HTTP service in the background.""" + loop = asyncio.get_event_loop() + loop.run_in_executor(None, oas_http_svc) + + # TODO: replace following codes: + while True: + await asyncio.sleep(1) + print("sleep") + + +def main(): + oas_http_svc() + + +if __name__ == "__main__": + # asyncio.run(async_main()) + main() diff --git a/metagpt/tools/metagpt_text_to_image.py b/metagpt/tools/metagpt_text_to_image.py new file mode 100644 index 000000000..393215df0 --- /dev/null +++ b/metagpt/tools/metagpt_text_to_image.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/18 +@Author : mashenquan +@File : metagpt_text_to_image.py +@Desc : MetaGPT Text-to-Image OAS3 api, which provides text-to-image functionality. +""" +import base64 +import os +import sys +from pathlib import Path +from typing import List, Dict + +import requests +from pydantic import BaseModel + +sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt' +from metagpt.utils.common import initialize_environment +from metagpt.logs import logger + + +class MetaGPTText2Image: + def __init__(self, model_url): + """ + :param model_url: Model reset api url + """ + self.model_url = model_url if model_url else os.environ.get('METAGPT_TEXT_TO_IMAGE_MODEL') + + def text_2_image(self, text, size_type="512x512"): + """Text to image + + :param text: The text used for image conversion. + :param size_type: One of ['512x512', '512x768'] + :return: The image data is returned in Base64 encoding. + """ + + headers = { + "Content-Type": "application/json" + } + dims = size_type.split("x") + data = { + "prompt": text, + "negative_prompt": "(easynegative:0.8),black, dark,Low resolution", + "override_settings": {"sd_model_checkpoint": "galaxytimemachinesGTM_photoV20"}, + "seed": -1, + "batch_size": 1, + "n_iter": 1, + "steps": 20, + "cfg_scale": 11, + "width": int(dims[0]), + "height": int(dims[1]), # 768, + "restore_faces": False, + "tiling": False, + "do_not_save_samples": False, + "do_not_save_grid": False, + "enable_hr": False, + "hr_scale": 2, + "hr_upscaler": "Latent", + "hr_second_pass_steps": 0, + "hr_resize_x": 0, + "hr_resize_y": 0, + "hr_upscale_to_x": 0, + "hr_upscale_to_y": 0, + "truncate_x": 0, + "truncate_y": 0, + "applied_old_hires_behavior_to": None, + "eta": None, + "sampler_index": "DPM++ SDE Karras", + "alwayson_scripts": {}, + } + + class ImageResult(BaseModel): + images: List + parameters: Dict + + try: + response = requests.post(self.model_url, headers=headers, json=data) + response.raise_for_status() # Raise an exception for 4xx or 5xx responses + result = ImageResult(**response.json()) + if len(result.images) == 0: + return "" + return result.images[0] + except requests.exceptions.RequestException as e: + logger.error(f"An error occurred:{e}") + return "" + + +# Export +def oas3_metagpt_text_to_image(text, size_type: str = "512x512", model_url=""): + """Text to image + + :param text: The text used for image conversion. + :param model_url: Model reset api + :param size_type: One of ['512x512', '512x768'] + :return: The image data is returned in Base64 encoding. + """ + if not text: + return "" + if not model_url: + model_url = os.environ.get('METAGPT_TEXT_TO_IMAGE_MODEL') + return MetaGPTText2Image(model_url).text_2_image(text, size_type=size_type) + + +if __name__ == "__main__": + initialize_environment() + + v = oas3_metagpt_text_2_image("Panda emoji") + data = base64.b64decode(v) + with open("tmp.png", mode="wb") as writer: + writer.write(data) + print(v) diff --git a/metagpt/tools/openai_text_to_embedding.py b/metagpt/tools/openai_text_to_embedding.py new file mode 100644 index 000000000..9eddd5bc1 --- /dev/null +++ b/metagpt/tools/openai_text_to_embedding.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/18 +@Author : mashenquan +@File : openai_text_to_embedding.py +@Desc : OpenAI Text-to-Embedding OAS3 api, which provides text-to-embedding functionality. + For more details, checkout: `https://platform.openai.com/docs/api-reference/embeddings/object` +""" +import os +from pathlib import Path +from typing import List + +import requests +from pydantic import BaseModel +import sys + +sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt' +from metagpt.utils.common import initialize_environment +from metagpt.logs import logger + + +class Embedding(BaseModel): + """Represents an embedding vector returned by embedding endpoint.""" + object: str # The object type, which is always "embedding". + embedding: List[ + float] # The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the embedding guide. + index: int # The index of the embedding in the list of embeddings. + + +class Usage(BaseModel): + prompt_tokens: int + total_tokens: int + + +class ResultEmbedding(BaseModel): + object: str + data: List[Embedding] + model: str + usage: Usage + + +class OpenAIText2Embedding: + def __init__(self, openai_api_key): + """ + :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys` + """ + self.openai_api_key = openai_api_key if openai_api_key else os.environ.get('OPENAI_API_KEY') + + def text_2_embedding(self, text, model="text-embedding-ada-002"): + """Text to embedding + + :param text: The text used for embedding. + :param model: One of ['text-embedding-ada-002'], ID of the model to use. For more details, checkout: `https://api.openai.com/v1/models`. + :return: A json object of :class:`ResultEmbedding` class if successful, otherwise `{}`. + """ + + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.openai_api_key}" + } + data = {"input": text, "model": model} + try: + response = requests.post("https://api.openai.com/v1/embeddings", headers=headers, json=data) + response.raise_for_status() # Raise an exception for 4xx or 5xx responses + return response.json() + except requests.exceptions.RequestException as e: + logger.error(f"An error occurred:{e}") + return {} + + +# Export +def oas3_openai_text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""): + """Text to embedding + + :param text: The text used for embedding. + :param model: One of ['text-embedding-ada-002'], ID of the model to use. For more details, checkout: `https://api.openai.com/v1/models`. + :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys` + :return: A json object of :class:`ResultEmbedding` class if successful, otherwise `{}`. + """ + if not text: + return "" + if not openai_api_key: + openai_api_key = os.environ.get("OPENAI_API_KEY") + return OpenAIText2Embedding(openai_api_key).text_2_embedding(text, model=model) + + +if __name__ == "__main__": + initialize_environment() + + v = oas3_openai_text_to_embedding("Panda emoji") + print(v) diff --git a/metagpt/tools/openai_text_to_image.py b/metagpt/tools/openai_text_to_image.py new file mode 100644 index 000000000..6ec96d166 --- /dev/null +++ b/metagpt/tools/openai_text_to_image.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/17 +@Author : mashenquan +@File : openai_text_to_image.py +@Desc : OpenAI Text-to-Image OAS3 api, which provides text-to-image functionality. +""" +import base64 +import os +import sys +from pathlib import Path +from typing import List + +import requests +from pydantic import BaseModel + +sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt' +from metagpt.utils.common import initialize_environment +from metagpt.logs import logger + + +class OpenAIText2Image: + def __init__(self, openai_api_key): + """ + :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys` + """ + self.openai_api_key = openai_api_key if openai_api_key else os.environ.get('OPENAI_API_KEY') + + def text_2_image(self, text, size_type="1024x1024"): + """Text to image + + :param text: The text used for image conversion. + :param size_type: One of ['256x256', '512x512', '1024x1024'] + :return: The image data is returned in Base64 encoding. + """ + + class ImageUrl(BaseModel): + url: str + + class ImageResult(BaseModel): + data: List[ImageUrl] + created: int + + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.openai_api_key}" + } + data = {"prompt": text, "n": 1, "size": size_type} + try: + response = requests.post("https://api.openai.com/v1/images/generations", headers=headers, json=data) + response.raise_for_status() # Raise an exception for 4xx or 5xx responses + result = ImageResult(**response.json()) + except requests.exceptions.RequestException as e: + logger.error(f"An error occurred:{e}") + return "" + if len(result.data) > 0: + return OpenAIText2Image.get_image_data(result.data[0].url) + return "" + + @staticmethod + def get_image_data(url): + """Fetch image data from a URL and encode it as Base64 + + :param url: Image url + :return: Base64-encoded image data. + """ + try: + response = requests.get(url) + response.raise_for_status() # Raise an exception for 4xx or 5xx responses + image_data = response.content + base64_image = base64.b64encode(image_data).decode("utf-8") + return base64_image + + except requests.exceptions.RequestException as e: + logger.error(f"An error occurred:{e}") + return "" + + +# Export +def oas3_openai_text_to_image(text, size_type: str = "1024x1024", openai_api_key=""): + """Text to image + + :param text: The text used for image conversion. + :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys` + :param size_type: One of ['256x256', '512x512', '1024x1024'] + :return: The image data is returned in Base64 encoding. + """ + if not text: + return "" + if not openai_api_key: + openai_api_key = os.environ.get("OPENAI_API_KEY") + return OpenAIText2Image(openai_api_key).text_2_image(text, size_type=size_type) + + +if __name__ == "__main__": + initialize_environment() + + v = oas3_openai_text_to_image("Panda emoji") + print(v) diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index 7f090cf63..ea6af7e7c 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -4,14 +4,18 @@ @Time : 2023/4/29 16:07 @Author : alexanderwu @File : common.py +@Modified By: mashenquan, 2023-8-17, add `initalize_enviroment()` to load `config/config.yaml` to `os.environ` """ import ast import contextlib import inspect import os import re +from pathlib import Path from typing import List, Tuple +import yaml + from metagpt.logs import logger @@ -254,3 +258,12 @@ def parse_recipient(text): pattern = r"## Send To:\s*([A-Za-z]+)\s*?" # hard code for now recipient = re.search(pattern, text) return recipient.group(1) if recipient else "" + + +def initialize_environment(): + """Load `config/config.yaml` to `os.environ`""" + yaml_file_path = Path(__file__).resolve().parent.parent.parent / "config/config.yaml" + with open(str(yaml_file_path), "r") as yaml_file: + data = yaml.safe_load(yaml_file) + for k, v in data.items(): + os.environ[k] = str(v) diff --git a/requirements.txt b/requirements.txt index 72021b8e7..cf20432c6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,4 +38,6 @@ typing_extensions==4.5.0 aiofiles libcst==1.0.1 qdrant-client==1.4.0 +connexion[swagger-ui] +aiohttp_jinja2 diff --git a/tests/metagpt/learn/__init__.py b/tests/metagpt/learn/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/metagpt/learn/test_text_to_embedding.py b/tests/metagpt/learn/test_text_to_embedding.py new file mode 100644 index 000000000..c85e5dde8 --- /dev/null +++ b/tests/metagpt/learn/test_text_to_embedding.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/18 +@Author : mashenquan +@File : test_text_to_embedding.py +@Desc : Unit tests. +""" + +import asyncio +import base64 + +from pydantic import BaseModel + +from metagpt.learn.text_to_embedding import text_to_embedding + + +async def mock_text_to_embedding(): + class Input(BaseModel): + input: str + + inputs = [ + {"input": "Panda emoji"} + ] + + for i in inputs: + seed = Input(**i) + data = text_to_embedding(seed.input) + v = ResultEmbedding(**data) + assert len(v.data) > 0 + + +def test_suite(): + loop = asyncio.get_event_loop() + task = loop.create_task(mock_text_to_embedding()) + loop.run_until_complete(task) + + +if __name__ == '__main__': + test_suite() diff --git a/tests/metagpt/learn/test_text_to_image.py b/tests/metagpt/learn/test_text_to_image.py new file mode 100644 index 000000000..545c8a3ef --- /dev/null +++ b/tests/metagpt/learn/test_text_to_image.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/18 +@Author : mashenquan +@File : test_text_to_image.py +@Desc : Unit tests. +""" +import asyncio +import base64 + +from pydantic import BaseModel + +from metagpt.learn.text_to_image import text_to_image + + +async def mock_text_to_image(): + class Input(BaseModel): + input: str + size_type: str + + inputs = [ + {"input": "Panda emoji", "size_type": "512x512"} + ] + + for i in inputs: + seed = Input(**i) + base64_data = text_to_image(seed.input) + assert base64_data != "" + print(f"{seed.input} -> {base64_data}") + assert base64.b64decode(base64_data, validate=True) + + +def test_suite(): + loop = asyncio.get_event_loop() + task = loop.create_task(mock_text_to_image()) + loop.run_until_complete(task) + + +if __name__ == '__main__': + test_suite() diff --git a/tests/metagpt/learn/test_text_to_speech.py b/tests/metagpt/learn/test_text_to_speech.py new file mode 100644 index 000000000..dbb599e38 --- /dev/null +++ b/tests/metagpt/learn/test_text_to_speech.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/18 +@Author : mashenquan +@File : test_text_to_speech.py +@Desc : Unit tests. +""" +import asyncio +import base64 + +from pydantic import BaseModel + +from metagpt.learn.text_to_speech import text_to_speech + + +async def mock_text_to_speech(): + class Input(BaseModel): + input: str + + inputs = [ + {"input": "Panda emoji"} + ] + + for i in inputs: + seed = Input(**i) + base64_data = text_to_speech(seed.input) + assert base64_data != "" + print(f"{seed.input} -> {base64_data}") + assert base64.b64decode(base64_data, validate=True) + + +def test_suite(): + loop = asyncio.get_event_loop() + task = loop.create_task(mock_text_to_speech()) + loop.run_until_complete(task) + + +if __name__ == '__main__': + test_suite() \ No newline at end of file diff --git a/tests/metagpt/actions/test_azure_tts.py b/tests/metagpt/tools/test_azure_tts.py similarity index 67% rename from tests/metagpt/actions/test_azure_tts.py rename to tests/metagpt/tools/test_azure_tts.py index b5a333af2..667e32d01 100644 --- a/tests/metagpt/actions/test_azure_tts.py +++ b/tests/metagpt/tools/test_azure_tts.py @@ -4,8 +4,13 @@ @Time : 2023/7/1 22:50 @Author : alexanderwu @File : test_azure_tts.py +@Modified By: mashenquan, 2023-8-17, move to `tools` folder. """ -from metagpt.actions.azure_tts import AzureTTS +import sys +from pathlib import Path + +sys.path.append(str(Path(__file__).resolve().parent.parent.parent.parent)) # fix-bug: No module named 'metagpt' +from metagpt.tools.azure_tts import AzureTTS def test_azure_tts():