diff --git a/.gitignore b/.gitignore
index e326e8372..1a9741e91 100644
--- a/.gitignore
+++ b/.gitignore
@@ -167,3 +167,5 @@ output.wav
# output folder
output
+tmp.png
+
diff --git a/.well-known/ai-plugin.json b/.well-known/ai-plugin.json
new file mode 100644
index 000000000..44e8435f2
--- /dev/null
+++ b/.well-known/ai-plugin.json
@@ -0,0 +1,18 @@
+{
+ "schema_version": "v1",
+ "name_for_model": "text processing tools",
+ "name_for_human": "MetaGPT Text Plugin",
+ "description_for_model": "Plugins for text processing, including text-to-speech, text-to-image, text-to-embedding, text summarization, text-to-code, vector similarity calculation, web content crawling, and more.",
+ "description_for_human": "Plugins for text processing, including text-to-speech, text-to-image, text-to-embedding, text summarization, text-to-code, vector similarity calculation, web content crawling, and more.",
+ "auth": {
+ "type": "none"
+ },
+ "api": {
+ "type": "openapi",
+ "url": "https://github.com/iorisa/MetaGPT/blob/feature/oas3/.well-known/metagpt_oas3_api.yaml",
+ "has_user_authentication": false
+ },
+ "logo_url": "https://github.com/iorisa/MetaGPT/blob/feature/oas3/docs/resources/MetaGPT-logo.png",
+ "contact_email": "mashenquan@fuzhi.cn",
+ "legal_info_url": "https://github.com/iorisa/MetaGPT/blob/feature/oas3/docs/README_CN.md"
+}
\ No newline at end of file
diff --git a/.well-known/metagpt_oas3_api.yaml b/.well-known/metagpt_oas3_api.yaml
new file mode 100644
index 000000000..a226181a5
--- /dev/null
+++ b/.well-known/metagpt_oas3_api.yaml
@@ -0,0 +1,236 @@
+openapi: "3.0.0"
+
+info:
+ title: "MetaGPT Export OpenAPIs"
+ version: "1.0"
+servers:
+ - url: "/oas3"
+ variables:
+ port:
+ default: '8080'
+ description: HTTP service port
+
+paths:
+ /tts/azsure:
+ post:
+ summary: "Convert Text to Base64-encoded .wav File Stream"
+ description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
+ operationId: azure_tts.oas3_azsure_tts
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required:
+ - text
+ properties:
+ text:
+ type: string
+ description: Text to convert
+ lang:
+ type: string
+ description: The language code or locale, e.g., en-US (English - United States)
+ default: "zh-CN"
+ voice:
+ type: string
+ description: "Voice style, see: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts), [Voice Gallery](https://speech.microsoft.com/portal/voicegallery)"
+ default: "zh-CN-XiaomoNeural"
+ style:
+ type: string
+ description: "Speaking style to express different emotions. For more details, checkout: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
+ default: "affectionate"
+ role:
+ type: string
+ description: "Role to specify age and gender. For more details, checkout: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
+ default: "Girl"
+ subscription_key:
+ type: string
+ description: "Key used to access Azure AI service API, see: [Azure Portal](https://portal.azure.com/) > `Resource Management` > `Keys and Endpoint`"
+ default: ""
+ region:
+ type: string
+ description: "Location (or region) of your resource, see: [Azure Portal](https://portal.azure.com/) > `Resource Management` > `Keys and Endpoint`"
+ default: ""
+ responses:
+ '200':
+ description: "Base64-encoded .wav file data if successful, otherwise an empty string."
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ wav_data:
+ type: string
+ format: base64
+ '400':
+ description: "Bad Request"
+ '500':
+ description: "Internal Server Error"
+
+ /txt2img/openai:
+ post:
+ summary: "Convert Text to Base64-encoded Image Data Stream"
+ operationId: openai_text_to_image.oas3_openai_text_to_image
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ text:
+ type: string
+ description: "The text used for image conversion."
+ size_type:
+ type: string
+ enum: ["256x256", "512x512", "1024x1024"]
+ default: "1024x1024"
+ description: "Size of the generated image."
+ openai_api_key:
+ type: string
+ default: ""
+ description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`"
+ responses:
+ '200':
+ description: "Base64-encoded image data."
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ image_data:
+ type: string
+ format: base64
+ '400':
+ description: "Bad Request"
+ '500':
+ description: "Internal Server Error"
+ /txt2embedding/openai:
+ post:
+ summary: Text to embedding
+ operationId: openai_text_to_embedding.oas3_openai_text_to_embedding
+ description: Retrieve an embedding for the provided text using the OpenAI API.
+ requestBody:
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ input:
+ type: string
+ description: The text used for embedding.
+ model:
+ type: string
+ description: "ID of the model to use. For more details, checkout: [models](https://api.openai.com/v1/models)"
+ enum:
+ - text-embedding-ada-002
+ responses:
+ "200":
+ description: Successful response
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ResultEmbedding"
+ "4XX":
+ description: Client error
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+ "5XX":
+ description: Server error
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/Error"
+
+ /txt2image/metagpt:
+ post:
+ summary: "Text to Image"
+ description: "Generate an image from the provided text using the MetaGPT Text-to-Image API."
+ operationId: metagpt_text_to_image.oas3_metagpt_text_to_image
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required:
+ - text
+ properties:
+ text:
+ type: string
+ description: "The text used for image conversion."
+ size_type:
+ type: string
+ enum: ["512x512", "512x768"]
+ default: "512x512"
+ description: "Size of the generated image."
+ model_url:
+ type: string
+ description: "Model reset API URL for text-to-image."
+ default: ""
+ responses:
+ '200':
+ description: "Base64-encoded image data."
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ image_data:
+ type: string
+ format: base64
+ '400':
+ description: "Bad Request"
+ '500':
+ description: "Internal Server Error"
+
+components:
+ schemas:
+ Embedding:
+ type: object
+ description: Represents an embedding vector returned by the embedding endpoint.
+ properties:
+ object:
+ type: string
+ example: embedding
+ embedding:
+ type: array
+ items:
+ type: number
+ example: [0.0023064255, -0.009327292, ...]
+ index:
+ type: integer
+ example: 0
+ Usage:
+ type: object
+ properties:
+ prompt_tokens:
+ type: integer
+ example: 8
+ total_tokens:
+ type: integer
+ example: 8
+ ResultEmbedding:
+ type: object
+ properties:
+ object:
+ type: string
+ example: result_embedding
+ data:
+ type: array
+ items:
+ $ref: "#/components/schemas/Embedding"
+ model:
+ type: string
+ example: text-embedding-ada-002
+ usage:
+ $ref: "#/components/schemas/Usage"
+ Error:
+ type: object
+ properties:
+ error:
+ type: string
+ example: An error occurred
\ No newline at end of file
diff --git a/.well-known/openapi.yaml b/.well-known/openapi.yaml
new file mode 100644
index 000000000..bc291b7db
--- /dev/null
+++ b/.well-known/openapi.yaml
@@ -0,0 +1,35 @@
+openapi: "3.0.0"
+
+info:
+ title: Hello World
+ version: "1.0"
+servers:
+ - url: /openapi
+
+paths:
+ /greeting/{name}:
+ post:
+ summary: Generate greeting
+ description: Generates a greeting message.
+ operationId: hello.post_greeting
+ responses:
+ 200:
+ description: greeting response
+ content:
+ text/plain:
+ schema:
+ type: string
+ example: "hello dave!"
+ parameters:
+ - name: name
+ in: path
+ description: Name of the person to greet.
+ required: true
+ schema:
+ type: string
+ example: "dave"
+ requestBody:
+ content:
+ application/json:
+ schema:
+ type: object
\ No newline at end of file
diff --git a/config/config.yaml b/config/config.yaml
index 303f4824b..6e9a61931 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -70,3 +70,6 @@ SD_T2I_API: "/sdapi/v1/txt2img"
### for Research
MODEL_FOR_RESEARCHER_SUMMARY: gpt-3.5-turbo
MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k
+
+### Meta Models
+#METAGPT_TEXT_TO_IMAGE_MODEL: MODEL_URL
\ No newline at end of file
diff --git a/metagpt/actions/azure_tts.py b/metagpt/actions/azure_tts.py
deleted file mode 100644
index f528ba001..000000000
--- a/metagpt/actions/azure_tts.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-@Time : 2023/6/9 22:22
-@Author : Leo Xiao
-@File : azure_tts.py
-"""
-from azure.cognitiveservices.speech import AudioConfig, SpeechConfig, SpeechSynthesizer
-
-from metagpt.actions.action import Action
-from metagpt.config import Config
-
-
-class AzureTTS(Action):
- def __init__(self, name, context=None, llm=None):
- super().__init__(name, context, llm)
- self.config = Config()
-
- # 参数参考:https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts#voice-styles-and-roles
- def synthesize_speech(self, lang, voice, role, text, output_file):
- subscription_key = self.config.get('AZURE_TTS_SUBSCRIPTION_KEY')
- region = self.config.get('AZURE_TTS_REGION')
- speech_config = SpeechConfig(
- subscription=subscription_key, region=region)
-
- speech_config.speech_synthesis_voice_name = voice
- audio_config = AudioConfig(filename=output_file)
- synthesizer = SpeechSynthesizer(
- speech_config=speech_config,
- audio_config=audio_config)
-
- # if voice=="zh-CN-YunxiNeural":
- ssml_string = f"""
-
-
-
- {text}
-
-
-
- """
-
- synthesizer.speak_ssml_async(ssml_string).get()
-
-
-if __name__ == "__main__":
- azure_tts = AzureTTS("azure_tts")
- azure_tts.synthesize_speech(
- "zh-CN",
- "zh-CN-YunxiNeural",
- "Boy",
- "你好,我是卡卡",
- "output.wav")
diff --git a/metagpt/learn/text_to_embedding.py b/metagpt/learn/text_to_embedding.py
new file mode 100644
index 000000000..281815ca6
--- /dev/null
+++ b/metagpt/learn/text_to_embedding.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2023/8/18
+@Author : mashenquan
+@File : text_to_embedding.py
+@Desc : Text-to-Embedding skill, which provides text-to-embedding functionality.
+"""
+import os
+
+from metagpt.tools.openai_text_to_embedding import oas3_openai_text_to_embedding
+from metagpt.utils.common import initialize_environment
+
+
+def text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""):
+ """Text to embedding
+
+ :param text: The text used for embedding.
+ :param model: One of ['text-embedding-ada-002'], ID of the model to use. For more details, checkout: `https://api.openai.com/v1/models`.
+ :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
+ :return: A json object of :class:`ResultEmbedding` class if successful, otherwise `{}`.
+ """
+ initialize_environment()
+ if os.environ.get("OPENAI_API_KEY") or openai_api_key:
+ return oas3_openai_text_to_embedding(text, model=model, openai_api_key=openai_api_key)
+ raise EnvironmentError
diff --git a/metagpt/learn/text_to_image.py b/metagpt/learn/text_to_image.py
new file mode 100644
index 000000000..0932dfe07
--- /dev/null
+++ b/metagpt/learn/text_to_image.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2023/8/18
+@Author : mashenquan
+@File : text_to_image.py
+@Desc : Text-to-Image skill, which provides text-to-image functionality.
+"""
+import os
+
+from metagpt.tools.metagpt_text_to_image import oas3_metagpt_text_to_image
+from metagpt.tools.openai_text_to_image import oas3_openai_text_to_image
+from metagpt.utils.common import initialize_environment
+
+
+def text_to_image(text, size_type: str = "512x512", openai_api_key="", model_url=""):
+ """Text to image
+
+ :param text: The text used for image conversion.
+ :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
+ :param size_type: If using OPENAI, the available size options are ['256x256', '512x512', '1024x1024'], while for MetaGPT, the options are ['512x512', '512x768'].
+ :param model_url: MetaGPT model url
+ :return: The image data is returned in Base64 encoding.
+ """
+ initialize_environment()
+ if os.environ.get("METAGPT_TEXT_TO_IMAGE_MODEL") or model_url:
+ return oas3_metagpt_text_to_image(text, size_type, model_url)
+ if os.environ.get("OPENAI_API_KEY") or openai_api_key:
+ return oas3_openai_text_to_image(text, size_type, openai_api_key)
+ raise EnvironmentError
diff --git a/metagpt/learn/text_to_speech.py b/metagpt/learn/text_to_speech.py
new file mode 100644
index 000000000..1b81097b8
--- /dev/null
+++ b/metagpt/learn/text_to_speech.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2023/8/17
+@Author : mashenquan
+@File : text_to_speech.py
+@Desc : Text-to-Speech skill, which provides text-to-speech functionality
+"""
+import os
+
+from metagpt.tools.azure_tts import oas3_azsure_tts
+from metagpt.utils.common import initialize_environment
+
+
+def text_to_speech(text, lang="zh-CN", voice="zh-CN-XiaomoNeural", style="affectionate", role="Girl",
+ subscription_key="", region=""):
+ """Text to speech
+ For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
+
+ :param lang: The value can contain a language code such as en (English), or a locale such as en-US (English - United States). For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
+ :param voice: For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`, `https://speech.microsoft.com/portal/voicegallery`
+ :param style: Speaking style to express different emotions like cheerfulness, empathy, and calm. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
+ :param role: With roles, the same voice can act as a different age and gender. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
+ :param text: The text used for voice conversion.
+ :param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint`
+ :param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API.
+ :return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string.
+
+ """
+ initialize_environment()
+ if (os.environ.get("AZURE_TTS_SUBSCRIPTION_KEY") and os.environ.get("AZURE_TTS_REGION")) or \
+ (subscription_key and region):
+ return oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region)
+
+ raise EnvironmentError
diff --git a/metagpt/tools/azure_tts.py b/metagpt/tools/azure_tts.py
new file mode 100644
index 000000000..21e8f1b6c
--- /dev/null
+++ b/metagpt/tools/azure_tts.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2023/8/17
+@Author : mashenquan
+@File : azure_tts.py
+@Desc : azure TTS OAS3 api, which provides text-to-speech functionality
+"""
+from pathlib import Path
+from uuid import uuid4
+import base64
+import sys
+
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt'
+from metagpt.utils.common import initialize_environment
+from metagpt.logs import logger
+
+from azure.cognitiveservices.speech import AudioConfig, SpeechConfig, SpeechSynthesizer
+import os
+
+
+class AzureTTS:
+ """Azure Text-to-Speech"""
+
+ def __init__(self, subscription_key, region):
+ """
+ :param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint`
+ :param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API.
+ """
+ self.subscription_key = subscription_key if subscription_key else os.environ.get('AZURE_TTS_SUBSCRIPTION_KEY')
+ self.region = region if region else os.environ.get('AZURE_TTS_REGION')
+
+ # 参数参考:https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts#voice-styles-and-roles
+ def synthesize_speech(self, lang, voice, text, output_file):
+ speech_config = SpeechConfig(
+ subscription=self.subscription_key, region=self.region)
+ speech_config.speech_synthesis_voice_name = voice
+ audio_config = AudioConfig(filename=output_file)
+ synthesizer = SpeechSynthesizer(
+ speech_config=speech_config,
+ audio_config=audio_config)
+
+ # More detail: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice
+ ssml_string = "" \
+ f"{text}"
+
+ return synthesizer.speak_ssml_async(ssml_string).get()
+
+ @staticmethod
+ def role_style_text(role, style, text):
+ return f'{text}'
+
+ @staticmethod
+ def role_text(role, text):
+ return f'{text}'
+
+ @staticmethod
+ def style_text(style, text):
+ return f'{text}'
+
+
+# Export
+def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key="", region=""):
+ """Text to speech
+ For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
+
+ :param lang: The value can contain a language code such as en (English), or a locale such as en-US (English - United States). For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
+ :param voice: For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`, `https://speech.microsoft.com/portal/voicegallery`
+ :param style: Speaking style to express different emotions like cheerfulness, empathy, and calm. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
+ :param role: With roles, the same voice can act as a different age and gender. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
+ :param text: The text used for voice conversion.
+ :param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint`
+ :param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API.
+ :return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string.
+
+ """
+ if not text:
+ return ""
+
+ if not lang:
+ lang = "zh-CN"
+ if not voice:
+ voice = "zh-CN-XiaomoNeural"
+ if not role:
+ role = "Girl"
+ if not style:
+ style = "affectionate"
+ if not subscription_key:
+ subscription_key = os.environ.get("AZURE_TTS_SUBSCRIPTION_KEY")
+ if not region:
+ region = os.environ.get("AZURE_TTS_REGION")
+
+ xml_value = AzureTTS.role_style_text(role=role, style=style, text=text)
+ tts = AzureTTS(subscription_key=subscription_key, region=region)
+ filename = Path(__file__).resolve().parent / (str(uuid4()).replace("-", "") + ".wav")
+ try:
+ tts.synthesize_speech(lang=lang, voice=voice, text=xml_value, output_file=str(filename))
+ with open(str(filename), mode="rb") as reader:
+ data = reader.read()
+ base64_string = base64.b64encode(data).decode('utf-8')
+ filename.unlink()
+ except Exception as e:
+ logger.error(f"text:{text}, error:{e}")
+ return ""
+
+ return base64_string
+
+
+if __name__ == "__main__":
+ initialize_environment()
+
+ v = oas3_azsure_tts("测试,test")
+ print(v)
diff --git a/metagpt/tools/hello.py b/metagpt/tools/hello.py
new file mode 100644
index 000000000..e1bad6456
--- /dev/null
+++ b/metagpt/tools/hello.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2023/5/2 16:03
+@Author : mashenquan
+@File : hello.py
+@Desc : Implement the OpenAPI Specification 3.0 demo and use the following command to test the HTTP service:
+
+ curl -X 'POST' \
+ 'http://localhost:8080/openapi/greeting/dave' \
+ -H 'accept: text/plain' \
+ -H 'Content-Type: application/json' \
+ -d '{}'
+"""
+
+import connexion
+
+
+# openapi implement
+def post_greeting(name: str) -> str:
+ return f"Hello {name}\n"
+
+
+if __name__ == "__main__":
+ app = connexion.AioHttpApp(__name__, specification_dir='../../.well-known/')
+ app.add_api("openapi.yaml", arguments={"title": "Hello World Example"})
+ app.run(port=8080)
diff --git a/metagpt/tools/metagpt_oas3_api_svc.py b/metagpt/tools/metagpt_oas3_api_svc.py
new file mode 100644
index 000000000..277d41dfb
--- /dev/null
+++ b/metagpt/tools/metagpt_oas3_api_svc.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2023/8/17
+@Author : mashenquan
+@File : metagpt_oas3_api_svc.py
+@Desc : MetaGPT OpenAPI Specification 3.0 REST API service
+"""
+import asyncio
+from pathlib import Path
+import sys
+
+import connexion
+
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt'
+from metagpt.utils.common import initialize_environment
+
+
+def oas_http_svc():
+ """Start the OAS 3.0 OpenAPI HTTP service"""
+ initialize_environment()
+
+ app = connexion.FlaskApp(__name__, specification_dir='../../.well-known/')
+ app.add_api("metagpt_oas3_api.yaml")
+ app.add_api("openapi.yaml")
+ app.run(port=8080)
+
+
+async def async_main():
+ """Start the OAS 3.0 OpenAPI HTTP service in the background."""
+ loop = asyncio.get_event_loop()
+ loop.run_in_executor(None, oas_http_svc)
+
+ # TODO: replace following codes:
+ while True:
+ await asyncio.sleep(1)
+ print("sleep")
+
+
+def main():
+ oas_http_svc()
+
+
+if __name__ == "__main__":
+ # asyncio.run(async_main())
+ main()
diff --git a/metagpt/tools/metagpt_text_to_image.py b/metagpt/tools/metagpt_text_to_image.py
new file mode 100644
index 000000000..393215df0
--- /dev/null
+++ b/metagpt/tools/metagpt_text_to_image.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2023/8/18
+@Author : mashenquan
+@File : metagpt_text_to_image.py
+@Desc : MetaGPT Text-to-Image OAS3 api, which provides text-to-image functionality.
+"""
+import base64
+import os
+import sys
+from pathlib import Path
+from typing import List, Dict
+
+import requests
+from pydantic import BaseModel
+
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt'
+from metagpt.utils.common import initialize_environment
+from metagpt.logs import logger
+
+
+class MetaGPTText2Image:
+ def __init__(self, model_url):
+ """
+ :param model_url: Model reset api url
+ """
+ self.model_url = model_url if model_url else os.environ.get('METAGPT_TEXT_TO_IMAGE_MODEL')
+
+ def text_2_image(self, text, size_type="512x512"):
+ """Text to image
+
+ :param text: The text used for image conversion.
+ :param size_type: One of ['512x512', '512x768']
+ :return: The image data is returned in Base64 encoding.
+ """
+
+ headers = {
+ "Content-Type": "application/json"
+ }
+ dims = size_type.split("x")
+ data = {
+ "prompt": text,
+ "negative_prompt": "(easynegative:0.8),black, dark,Low resolution",
+ "override_settings": {"sd_model_checkpoint": "galaxytimemachinesGTM_photoV20"},
+ "seed": -1,
+ "batch_size": 1,
+ "n_iter": 1,
+ "steps": 20,
+ "cfg_scale": 11,
+ "width": int(dims[0]),
+ "height": int(dims[1]), # 768,
+ "restore_faces": False,
+ "tiling": False,
+ "do_not_save_samples": False,
+ "do_not_save_grid": False,
+ "enable_hr": False,
+ "hr_scale": 2,
+ "hr_upscaler": "Latent",
+ "hr_second_pass_steps": 0,
+ "hr_resize_x": 0,
+ "hr_resize_y": 0,
+ "hr_upscale_to_x": 0,
+ "hr_upscale_to_y": 0,
+ "truncate_x": 0,
+ "truncate_y": 0,
+ "applied_old_hires_behavior_to": None,
+ "eta": None,
+ "sampler_index": "DPM++ SDE Karras",
+ "alwayson_scripts": {},
+ }
+
+ class ImageResult(BaseModel):
+ images: List
+ parameters: Dict
+
+ try:
+ response = requests.post(self.model_url, headers=headers, json=data)
+ response.raise_for_status() # Raise an exception for 4xx or 5xx responses
+ result = ImageResult(**response.json())
+ if len(result.images) == 0:
+ return ""
+ return result.images[0]
+ except requests.exceptions.RequestException as e:
+ logger.error(f"An error occurred:{e}")
+ return ""
+
+
+# Export
+def oas3_metagpt_text_to_image(text, size_type: str = "512x512", model_url=""):
+ """Text to image
+
+ :param text: The text used for image conversion.
+ :param model_url: Model reset api
+ :param size_type: One of ['512x512', '512x768']
+ :return: The image data is returned in Base64 encoding.
+ """
+ if not text:
+ return ""
+ if not model_url:
+ model_url = os.environ.get('METAGPT_TEXT_TO_IMAGE_MODEL')
+ return MetaGPTText2Image(model_url).text_2_image(text, size_type=size_type)
+
+
+if __name__ == "__main__":
+ initialize_environment()
+
+ v = oas3_metagpt_text_2_image("Panda emoji")
+ data = base64.b64decode(v)
+ with open("tmp.png", mode="wb") as writer:
+ writer.write(data)
+ print(v)
diff --git a/metagpt/tools/openai_text_to_embedding.py b/metagpt/tools/openai_text_to_embedding.py
new file mode 100644
index 000000000..9eddd5bc1
--- /dev/null
+++ b/metagpt/tools/openai_text_to_embedding.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2023/8/18
+@Author : mashenquan
+@File : openai_text_to_embedding.py
+@Desc : OpenAI Text-to-Embedding OAS3 api, which provides text-to-embedding functionality.
+ For more details, checkout: `https://platform.openai.com/docs/api-reference/embeddings/object`
+"""
+import os
+from pathlib import Path
+from typing import List
+
+import requests
+from pydantic import BaseModel
+import sys
+
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt'
+from metagpt.utils.common import initialize_environment
+from metagpt.logs import logger
+
+
+class Embedding(BaseModel):
+ """Represents an embedding vector returned by embedding endpoint."""
+ object: str # The object type, which is always "embedding".
+ embedding: List[
+ float] # The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the embedding guide.
+ index: int # The index of the embedding in the list of embeddings.
+
+
+class Usage(BaseModel):
+ prompt_tokens: int
+ total_tokens: int
+
+
+class ResultEmbedding(BaseModel):
+ object: str
+ data: List[Embedding]
+ model: str
+ usage: Usage
+
+
+class OpenAIText2Embedding:
+ def __init__(self, openai_api_key):
+ """
+ :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
+ """
+ self.openai_api_key = openai_api_key if openai_api_key else os.environ.get('OPENAI_API_KEY')
+
+ def text_2_embedding(self, text, model="text-embedding-ada-002"):
+ """Text to embedding
+
+ :param text: The text used for embedding.
+ :param model: One of ['text-embedding-ada-002'], ID of the model to use. For more details, checkout: `https://api.openai.com/v1/models`.
+ :return: A json object of :class:`ResultEmbedding` class if successful, otherwise `{}`.
+ """
+
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {self.openai_api_key}"
+ }
+ data = {"input": text, "model": model}
+ try:
+ response = requests.post("https://api.openai.com/v1/embeddings", headers=headers, json=data)
+ response.raise_for_status() # Raise an exception for 4xx or 5xx responses
+ return response.json()
+ except requests.exceptions.RequestException as e:
+ logger.error(f"An error occurred:{e}")
+ return {}
+
+
+# Export
+def oas3_openai_text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""):
+ """Text to embedding
+
+ :param text: The text used for embedding.
+ :param model: One of ['text-embedding-ada-002'], ID of the model to use. For more details, checkout: `https://api.openai.com/v1/models`.
+ :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
+ :return: A json object of :class:`ResultEmbedding` class if successful, otherwise `{}`.
+ """
+ if not text:
+ return ""
+ if not openai_api_key:
+ openai_api_key = os.environ.get("OPENAI_API_KEY")
+ return OpenAIText2Embedding(openai_api_key).text_2_embedding(text, model=model)
+
+
+if __name__ == "__main__":
+ initialize_environment()
+
+ v = oas3_openai_text_to_embedding("Panda emoji")
+ print(v)
diff --git a/metagpt/tools/openai_text_to_image.py b/metagpt/tools/openai_text_to_image.py
new file mode 100644
index 000000000..6ec96d166
--- /dev/null
+++ b/metagpt/tools/openai_text_to_image.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2023/8/17
+@Author : mashenquan
+@File : openai_text_to_image.py
+@Desc : OpenAI Text-to-Image OAS3 api, which provides text-to-image functionality.
+"""
+import base64
+import os
+import sys
+from pathlib import Path
+from typing import List
+
+import requests
+from pydantic import BaseModel
+
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt'
+from metagpt.utils.common import initialize_environment
+from metagpt.logs import logger
+
+
+class OpenAIText2Image:
+ def __init__(self, openai_api_key):
+ """
+ :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
+ """
+ self.openai_api_key = openai_api_key if openai_api_key else os.environ.get('OPENAI_API_KEY')
+
+ def text_2_image(self, text, size_type="1024x1024"):
+ """Text to image
+
+ :param text: The text used for image conversion.
+ :param size_type: One of ['256x256', '512x512', '1024x1024']
+ :return: The image data is returned in Base64 encoding.
+ """
+
+ class ImageUrl(BaseModel):
+ url: str
+
+ class ImageResult(BaseModel):
+ data: List[ImageUrl]
+ created: int
+
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {self.openai_api_key}"
+ }
+ data = {"prompt": text, "n": 1, "size": size_type}
+ try:
+ response = requests.post("https://api.openai.com/v1/images/generations", headers=headers, json=data)
+ response.raise_for_status() # Raise an exception for 4xx or 5xx responses
+ result = ImageResult(**response.json())
+ except requests.exceptions.RequestException as e:
+ logger.error(f"An error occurred:{e}")
+ return ""
+ if len(result.data) > 0:
+ return OpenAIText2Image.get_image_data(result.data[0].url)
+ return ""
+
+ @staticmethod
+ def get_image_data(url):
+ """Fetch image data from a URL and encode it as Base64
+
+ :param url: Image url
+ :return: Base64-encoded image data.
+ """
+ try:
+ response = requests.get(url)
+ response.raise_for_status() # Raise an exception for 4xx or 5xx responses
+ image_data = response.content
+ base64_image = base64.b64encode(image_data).decode("utf-8")
+ return base64_image
+
+ except requests.exceptions.RequestException as e:
+ logger.error(f"An error occurred:{e}")
+ return ""
+
+
+# Export
+def oas3_openai_text_to_image(text, size_type: str = "1024x1024", openai_api_key=""):
+ """Text to image
+
+ :param text: The text used for image conversion.
+ :param openai_api_key: OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`
+ :param size_type: One of ['256x256', '512x512', '1024x1024']
+ :return: The image data is returned in Base64 encoding.
+ """
+ if not text:
+ return ""
+ if not openai_api_key:
+ openai_api_key = os.environ.get("OPENAI_API_KEY")
+ return OpenAIText2Image(openai_api_key).text_2_image(text, size_type=size_type)
+
+
+if __name__ == "__main__":
+ initialize_environment()
+
+ v = oas3_openai_text_to_image("Panda emoji")
+ print(v)
diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py
index 7f090cf63..ea6af7e7c 100644
--- a/metagpt/utils/common.py
+++ b/metagpt/utils/common.py
@@ -4,14 +4,18 @@
@Time : 2023/4/29 16:07
@Author : alexanderwu
@File : common.py
+@Modified By: mashenquan, 2023-8-17, add `initalize_enviroment()` to load `config/config.yaml` to `os.environ`
"""
import ast
import contextlib
import inspect
import os
import re
+from pathlib import Path
from typing import List, Tuple
+import yaml
+
from metagpt.logs import logger
@@ -254,3 +258,12 @@ def parse_recipient(text):
pattern = r"## Send To:\s*([A-Za-z]+)\s*?" # hard code for now
recipient = re.search(pattern, text)
return recipient.group(1) if recipient else ""
+
+
+def initialize_environment():
+ """Load `config/config.yaml` to `os.environ`"""
+ yaml_file_path = Path(__file__).resolve().parent.parent.parent / "config/config.yaml"
+ with open(str(yaml_file_path), "r") as yaml_file:
+ data = yaml.safe_load(yaml_file)
+ for k, v in data.items():
+ os.environ[k] = str(v)
diff --git a/requirements.txt b/requirements.txt
index 72021b8e7..cf20432c6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -38,4 +38,6 @@ typing_extensions==4.5.0
aiofiles
libcst==1.0.1
qdrant-client==1.4.0
+connexion[swagger-ui]
+aiohttp_jinja2
diff --git a/tests/metagpt/learn/__init__.py b/tests/metagpt/learn/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/metagpt/learn/test_text_to_embedding.py b/tests/metagpt/learn/test_text_to_embedding.py
new file mode 100644
index 000000000..c85e5dde8
--- /dev/null
+++ b/tests/metagpt/learn/test_text_to_embedding.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2023/8/18
+@Author : mashenquan
+@File : test_text_to_embedding.py
+@Desc : Unit tests.
+"""
+
+import asyncio
+import base64
+
+from pydantic import BaseModel
+
+from metagpt.learn.text_to_embedding import text_to_embedding
+
+
+async def mock_text_to_embedding():
+ class Input(BaseModel):
+ input: str
+
+ inputs = [
+ {"input": "Panda emoji"}
+ ]
+
+ for i in inputs:
+ seed = Input(**i)
+ data = text_to_embedding(seed.input)
+ v = ResultEmbedding(**data)
+ assert len(v.data) > 0
+
+
+def test_suite():
+ loop = asyncio.get_event_loop()
+ task = loop.create_task(mock_text_to_embedding())
+ loop.run_until_complete(task)
+
+
+if __name__ == '__main__':
+ test_suite()
diff --git a/tests/metagpt/learn/test_text_to_image.py b/tests/metagpt/learn/test_text_to_image.py
new file mode 100644
index 000000000..545c8a3ef
--- /dev/null
+++ b/tests/metagpt/learn/test_text_to_image.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2023/8/18
+@Author : mashenquan
+@File : test_text_to_image.py
+@Desc : Unit tests.
+"""
+import asyncio
+import base64
+
+from pydantic import BaseModel
+
+from metagpt.learn.text_to_image import text_to_image
+
+
+async def mock_text_to_image():
+ class Input(BaseModel):
+ input: str
+ size_type: str
+
+ inputs = [
+ {"input": "Panda emoji", "size_type": "512x512"}
+ ]
+
+ for i in inputs:
+ seed = Input(**i)
+ base64_data = text_to_image(seed.input)
+ assert base64_data != ""
+ print(f"{seed.input} -> {base64_data}")
+ assert base64.b64decode(base64_data, validate=True)
+
+
+def test_suite():
+ loop = asyncio.get_event_loop()
+ task = loop.create_task(mock_text_to_image())
+ loop.run_until_complete(task)
+
+
+if __name__ == '__main__':
+ test_suite()
diff --git a/tests/metagpt/learn/test_text_to_speech.py b/tests/metagpt/learn/test_text_to_speech.py
new file mode 100644
index 000000000..dbb599e38
--- /dev/null
+++ b/tests/metagpt/learn/test_text_to_speech.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time : 2023/8/18
+@Author : mashenquan
+@File : test_text_to_speech.py
+@Desc : Unit tests.
+"""
+import asyncio
+import base64
+
+from pydantic import BaseModel
+
+from metagpt.learn.text_to_speech import text_to_speech
+
+
+async def mock_text_to_speech():
+ class Input(BaseModel):
+ input: str
+
+ inputs = [
+ {"input": "Panda emoji"}
+ ]
+
+ for i in inputs:
+ seed = Input(**i)
+ base64_data = text_to_speech(seed.input)
+ assert base64_data != ""
+ print(f"{seed.input} -> {base64_data}")
+ assert base64.b64decode(base64_data, validate=True)
+
+
+def test_suite():
+ loop = asyncio.get_event_loop()
+ task = loop.create_task(mock_text_to_speech())
+ loop.run_until_complete(task)
+
+
+if __name__ == '__main__':
+ test_suite()
\ No newline at end of file
diff --git a/tests/metagpt/actions/test_azure_tts.py b/tests/metagpt/tools/test_azure_tts.py
similarity index 67%
rename from tests/metagpt/actions/test_azure_tts.py
rename to tests/metagpt/tools/test_azure_tts.py
index b5a333af2..667e32d01 100644
--- a/tests/metagpt/actions/test_azure_tts.py
+++ b/tests/metagpt/tools/test_azure_tts.py
@@ -4,8 +4,13 @@
@Time : 2023/7/1 22:50
@Author : alexanderwu
@File : test_azure_tts.py
+@Modified By: mashenquan, 2023-8-17, move to `tools` folder.
"""
-from metagpt.actions.azure_tts import AzureTTS
+import sys
+from pathlib import Path
+
+sys.path.append(str(Path(__file__).resolve().parent.parent.parent.parent)) # fix-bug: No module named 'metagpt'
+from metagpt.tools.azure_tts import AzureTTS
def test_azure_tts():