diff --git a/metagpt/learn/text_to_embedding.py b/metagpt/learn/text_to_embedding.py index 6d0cefcdb..5c08ef0b9 100644 --- a/metagpt/learn/text_to_embedding.py +++ b/metagpt/learn/text_to_embedding.py @@ -16,7 +16,7 @@ from metagpt.utils.common import initialize_environment @skill_metadata(name="Text to Embedding", description="Convert the text into embeddings.", requisite="`OPENAI_API_KEY`") -def text_to_embedding(text, model="text-embedding-ada-002", openai_api_key="", **kwargs): +async def text_to_embedding(text, model="text-embedding-ada-002", openai_api_key="", **kwargs): """Text to embedding :param text: The text used for embedding. @@ -26,5 +26,5 @@ def text_to_embedding(text, model="text-embedding-ada-002", openai_api_key="", * """ initialize_environment() if os.environ.get("OPENAI_API_KEY") or openai_api_key: - return oas3_openai_text_to_embedding(text, model=model, openai_api_key=openai_api_key) + return await oas3_openai_text_to_embedding(text, model=model, openai_api_key=openai_api_key) raise EnvironmentError diff --git a/metagpt/learn/text_to_image.py b/metagpt/learn/text_to_image.py index d245b06db..db9844c71 100644 --- a/metagpt/learn/text_to_image.py +++ b/metagpt/learn/text_to_image.py @@ -17,7 +17,7 @@ from metagpt.utils.common import initialize_environment @skill_metadata(name="Text to image", description="Create a drawing based on the text.", requisite="`OPENAI_API_KEY` or `METAGPT_TEXT_TO_IMAGE_MODEL`") -def text_to_image(text, size_type: str = "512x512", openai_api_key="", model_url="", **kwargs): +async def text_to_image(text, size_type: str = "512x512", openai_api_key="", model_url="", **kwargs): """Text to image :param text: The text used for image conversion. @@ -29,10 +29,11 @@ def text_to_image(text, size_type: str = "512x512", openai_api_key="", model_url initialize_environment() image_declaration = "data:image/png;base64," if os.environ.get("METAGPT_TEXT_TO_IMAGE_MODEL_URL") or model_url: - data = oas3_metagpt_text_to_image(text, size_type, model_url) + data = await oas3_metagpt_text_to_image(text, size_type, model_url) return image_declaration + data if data else "" + if os.environ.get("OPENAI_API_KEY") or openai_api_key: - data = oas3_openai_text_to_image(text, size_type, openai_api_key) + data = await oas3_openai_text_to_image(text, size_type, openai_api_key) return image_declaration + data if data else "" raise EnvironmentError diff --git a/metagpt/learn/text_to_speech.py b/metagpt/learn/text_to_speech.py index 90dd878a1..e5eb3d488 100644 --- a/metagpt/learn/text_to_speech.py +++ b/metagpt/learn/text_to_speech.py @@ -16,7 +16,7 @@ from metagpt.utils.common import initialize_environment @skill_metadata(name="Text to speech", description="Text-to-speech", requisite="`AZURE_TTS_SUBSCRIPTION_KEY` and `AZURE_TTS_REGION`") -def text_to_speech(text, lang="zh-CN", voice="zh-CN-XiaomoNeural", style="affectionate", role="Girl", +async def text_to_speech(text, lang="zh-CN", voice="zh-CN-XiaomoNeural", style="affectionate", role="Girl", subscription_key="", region="", **kwargs): """Text to speech For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` @@ -35,7 +35,7 @@ def text_to_speech(text, lang="zh-CN", voice="zh-CN-XiaomoNeural", style="affect audio_declaration = "data:audio/wav;base64," if (os.environ.get("AZURE_TTS_SUBSCRIPTION_KEY") and os.environ.get("AZURE_TTS_REGION")) or \ (subscription_key and region): - data = oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region) + data = await oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region) return audio_declaration + data if data else data raise EnvironmentError diff --git a/metagpt/tools/azure_tts.py b/metagpt/tools/azure_tts.py index 21e8f1b6c..1fd36e78c 100644 --- a/metagpt/tools/azure_tts.py +++ b/metagpt/tools/azure_tts.py @@ -6,6 +6,7 @@ @File : azure_tts.py @Desc : azure TTS OAS3 api, which provides text-to-speech functionality """ +import asyncio from pathlib import Path from uuid import uuid4 import base64 @@ -14,7 +15,7 @@ import sys sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt' from metagpt.utils.common import initialize_environment from metagpt.logs import logger - +from aiofile import async_open from azure.cognitiveservices.speech import AudioConfig, SpeechConfig, SpeechSynthesizer import os @@ -31,7 +32,7 @@ class AzureTTS: self.region = region if region else os.environ.get('AZURE_TTS_REGION') # 参数参考:https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts#voice-styles-and-roles - def synthesize_speech(self, lang, voice, text, output_file): + async def synthesize_speech(self, lang, voice, text, output_file): speech_config = SpeechConfig( subscription=self.subscription_key, region=self.region) speech_config.speech_synthesis_voice_name = voice @@ -61,7 +62,7 @@ class AzureTTS: # Export -def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key="", region=""): +async def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key="", region=""): """Text to speech For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` @@ -95,9 +96,9 @@ def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key tts = AzureTTS(subscription_key=subscription_key, region=region) filename = Path(__file__).resolve().parent / (str(uuid4()).replace("-", "") + ".wav") try: - tts.synthesize_speech(lang=lang, voice=voice, text=xml_value, output_file=str(filename)) - with open(str(filename), mode="rb") as reader: - data = reader.read() + await tts.synthesize_speech(lang=lang, voice=voice, text=xml_value, output_file=str(filename)) + async with async_open(filename, mode="rb") as reader: + data = await reader.read() base64_string = base64.b64encode(data).decode('utf-8') filename.unlink() except Exception as e: @@ -110,5 +111,7 @@ def oas3_azsure_tts(text, lang="", voice="", style="", role="", subscription_key if __name__ == "__main__": initialize_environment() - v = oas3_azsure_tts("测试,test") + loop = asyncio.new_event_loop() + v = loop.create_task(oas3_azsure_tts("测试,test")) + loop.run_until_complete(v) print(v) diff --git a/metagpt/tools/hello.py b/metagpt/tools/hello.py index e1bad6456..2eb4c31f0 100644 --- a/metagpt/tools/hello.py +++ b/metagpt/tools/hello.py @@ -17,7 +17,7 @@ import connexion # openapi implement -def post_greeting(name: str) -> str: +async def post_greeting(name: str) -> str: return f"Hello {name}\n" diff --git a/metagpt/tools/metagpt_oas3_api_svc.py b/metagpt/tools/metagpt_oas3_api_svc.py index 277d41dfb..624bb7d93 100644 --- a/metagpt/tools/metagpt_oas3_api_svc.py +++ b/metagpt/tools/metagpt_oas3_api_svc.py @@ -20,7 +20,7 @@ def oas_http_svc(): """Start the OAS 3.0 OpenAPI HTTP service""" initialize_environment() - app = connexion.FlaskApp(__name__, specification_dir='../../.well-known/') + app = connexion.AioHttpApp(__name__, specification_dir='../../.well-known/') app.add_api("metagpt_oas3_api.yaml") app.add_api("openapi.yaml") app.run(port=8080) diff --git a/metagpt/tools/metagpt_text_to_image.py b/metagpt/tools/metagpt_text_to_image.py index 8588462d3..bc551134a 100644 --- a/metagpt/tools/metagpt_text_to_image.py +++ b/metagpt/tools/metagpt_text_to_image.py @@ -12,6 +12,7 @@ import sys from pathlib import Path from typing import List, Dict +import aiohttp import requests from pydantic import BaseModel @@ -27,7 +28,7 @@ class MetaGPTText2Image: """ self.model_url = model_url if model_url else os.environ.get('METAGPT_TEXT_TO_IMAGE_MODEL') - def text_2_image(self, text, size_type="512x512"): + async def text_2_image(self, text, size_type="512x512"): """Text to image :param text: The text used for image conversion. @@ -75,9 +76,9 @@ class MetaGPTText2Image: parameters: Dict try: - response = requests.post(self.model_url, headers=headers, json=data) - response.raise_for_status() # Raise an exception for 4xx or 5xx responses - result = ImageResult(**response.json()) + async with aiohttp.ClientSession() as session: + async with session.post(self.model_url, headers=headers, json=data) as response: + result = ImageResult(**await response.json()) if len(result.images) == 0: return "" return result.images[0] @@ -87,7 +88,7 @@ class MetaGPTText2Image: # Export -def oas3_metagpt_text_to_image(text, size_type: str = "512x512", model_url=""): +async def oas3_metagpt_text_to_image(text, size_type: str = "512x512", model_url=""): """Text to image :param text: The text used for image conversion. @@ -99,7 +100,7 @@ def oas3_metagpt_text_to_image(text, size_type: str = "512x512", model_url=""): return "" if not model_url: model_url = os.environ.get('METAGPT_TEXT_TO_IMAGE_MODEL_URL') - return MetaGPTText2Image(model_url).text_2_image(text, size_type=size_type) + return await MetaGPTText2Image(model_url).text_2_image(text, size_type=size_type) if __name__ == "__main__": diff --git a/metagpt/tools/openai_text_to_embedding.py b/metagpt/tools/openai_text_to_embedding.py index 9eddd5bc1..119eb35b6 100644 --- a/metagpt/tools/openai_text_to_embedding.py +++ b/metagpt/tools/openai_text_to_embedding.py @@ -7,10 +7,12 @@ @Desc : OpenAI Text-to-Embedding OAS3 api, which provides text-to-embedding functionality. For more details, checkout: `https://platform.openai.com/docs/api-reference/embeddings/object` """ +import asyncio import os from pathlib import Path from typing import List +import aiohttp import requests from pydantic import BaseModel import sys @@ -47,7 +49,7 @@ class OpenAIText2Embedding: """ self.openai_api_key = openai_api_key if openai_api_key else os.environ.get('OPENAI_API_KEY') - def text_2_embedding(self, text, model="text-embedding-ada-002"): + async def text_2_embedding(self, text, model="text-embedding-ada-002"): """Text to embedding :param text: The text used for embedding. @@ -61,16 +63,16 @@ class OpenAIText2Embedding: } data = {"input": text, "model": model} try: - response = requests.post("https://api.openai.com/v1/embeddings", headers=headers, json=data) - response.raise_for_status() # Raise an exception for 4xx or 5xx responses - return response.json() + async with aiohttp.ClientSession() as session: + async with session.post("https://api.openai.com/v1/embeddings", headers=headers, json=data) as response: + return await response.json() except requests.exceptions.RequestException as e: logger.error(f"An error occurred:{e}") return {} # Export -def oas3_openai_text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""): +async def oas3_openai_text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""): """Text to embedding :param text: The text used for embedding. @@ -82,11 +84,12 @@ def oas3_openai_text_to_embedding(text, model="text-embedding-ada-002", openai_a return "" if not openai_api_key: openai_api_key = os.environ.get("OPENAI_API_KEY") - return OpenAIText2Embedding(openai_api_key).text_2_embedding(text, model=model) + return await OpenAIText2Embedding(openai_api_key).text_2_embedding(text, model=model) if __name__ == "__main__": initialize_environment() - - v = oas3_openai_text_to_embedding("Panda emoji") + loop = asyncio.new_event_loop() + v = loop.create_task(oas3_openai_text_to_embedding("Panda emoji")) + loop.run_until_complete(v) print(v) diff --git a/metagpt/tools/openai_text_to_image.py b/metagpt/tools/openai_text_to_image.py index 6ec96d166..cd48c62af 100644 --- a/metagpt/tools/openai_text_to_image.py +++ b/metagpt/tools/openai_text_to_image.py @@ -12,6 +12,7 @@ import sys from pathlib import Path from typing import List +import aiohttp import requests from pydantic import BaseModel @@ -27,7 +28,7 @@ class OpenAIText2Image: """ self.openai_api_key = openai_api_key if openai_api_key else os.environ.get('OPENAI_API_KEY') - def text_2_image(self, text, size_type="1024x1024"): + async def text_2_image(self, text, size_type="1024x1024"): """Text to image :param text: The text used for image conversion. @@ -48,27 +49,28 @@ class OpenAIText2Image: } data = {"prompt": text, "n": 1, "size": size_type} try: - response = requests.post("https://api.openai.com/v1/images/generations", headers=headers, json=data) - response.raise_for_status() # Raise an exception for 4xx or 5xx responses - result = ImageResult(**response.json()) + async with aiohttp.ClientSession() as session: + async with session.post("https://api.openai.com/v1/images/generations", headers=headers, json=data) as response: + result = ImageResult(** await response.json()) except requests.exceptions.RequestException as e: logger.error(f"An error occurred:{e}") return "" if len(result.data) > 0: - return OpenAIText2Image.get_image_data(result.data[0].url) + return await OpenAIText2Image.get_image_data(result.data[0].url) return "" @staticmethod - def get_image_data(url): + async def get_image_data(url): """Fetch image data from a URL and encode it as Base64 :param url: Image url :return: Base64-encoded image data. """ try: - response = requests.get(url) - response.raise_for_status() # Raise an exception for 4xx or 5xx responses - image_data = response.content + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + response.raise_for_status() # 如果是 4xx 或 5xx 响应,会引发异常 + image_data = await response.read() base64_image = base64.b64encode(image_data).decode("utf-8") return base64_image @@ -78,7 +80,7 @@ class OpenAIText2Image: # Export -def oas3_openai_text_to_image(text, size_type: str = "1024x1024", openai_api_key=""): +async def oas3_openai_text_to_image(text, size_type: str = "1024x1024", openai_api_key=""): """Text to image :param text: The text used for image conversion. @@ -90,7 +92,7 @@ def oas3_openai_text_to_image(text, size_type: str = "1024x1024", openai_api_key return "" if not openai_api_key: openai_api_key = os.environ.get("OPENAI_API_KEY") - return OpenAIText2Image(openai_api_key).text_2_image(text, size_type=size_type) + return await OpenAIText2Image(openai_api_key).text_2_image(text, size_type=size_type) if __name__ == "__main__": diff --git a/requirements.txt b/requirements.txt index 70f2a3809..ed3f755c9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -41,3 +41,4 @@ qdrant-client==1.4.0 connexion[swagger-ui] aiohttp_jinja2 azure-cognitiveservices-speech==1.30.0 +aiofile \ No newline at end of file diff --git a/tests/metagpt/learn/test_text_to_embedding.py b/tests/metagpt/learn/test_text_to_embedding.py index c85e5dde8..d81a8ac1c 100644 --- a/tests/metagpt/learn/test_text_to_embedding.py +++ b/tests/metagpt/learn/test_text_to_embedding.py @@ -8,11 +8,11 @@ """ import asyncio -import base64 from pydantic import BaseModel from metagpt.learn.text_to_embedding import text_to_embedding +from metagpt.tools.openai_text_to_embedding import ResultEmbedding async def mock_text_to_embedding(): @@ -25,7 +25,7 @@ async def mock_text_to_embedding(): for i in inputs: seed = Input(**i) - data = text_to_embedding(seed.input) + data = await text_to_embedding(seed.input) v = ResultEmbedding(**data) assert len(v.data) > 0 diff --git a/tests/metagpt/learn/test_text_to_image.py b/tests/metagpt/learn/test_text_to_image.py index 545c8a3ef..c359797de 100644 --- a/tests/metagpt/learn/test_text_to_image.py +++ b/tests/metagpt/learn/test_text_to_image.py @@ -25,10 +25,17 @@ async def mock_text_to_image(): for i in inputs: seed = Input(**i) - base64_data = text_to_image(seed.input) + base64_data = await text_to_image(seed.input) assert base64_data != "" print(f"{seed.input} -> {base64_data}") - assert base64.b64decode(base64_data, validate=True) + flags = ";base64," + assert flags in base64_data + ix = base64_data.find(flags) + len(flags) + declaration = base64_data[0: ix] + assert declaration + data = base64_data[ix:] + assert data + assert base64.b64decode(data, validate=True) def test_suite(): diff --git a/tests/metagpt/learn/test_text_to_speech.py b/tests/metagpt/learn/test_text_to_speech.py index dbb599e38..68de5a3b2 100644 --- a/tests/metagpt/learn/test_text_to_speech.py +++ b/tests/metagpt/learn/test_text_to_speech.py @@ -24,10 +24,17 @@ async def mock_text_to_speech(): for i in inputs: seed = Input(**i) - base64_data = text_to_speech(seed.input) + base64_data = await text_to_speech(seed.input) assert base64_data != "" print(f"{seed.input} -> {base64_data}") - assert base64.b64decode(base64_data, validate=True) + flags = ";base64," + assert flags in base64_data + ix = base64_data.find(flags) + len(flags) + declaration = base64_data[0: ix] + assert declaration + data = base64_data[ix:] + assert data + assert base64.b64decode(data, validate=True) def test_suite(): diff --git a/tests/metagpt/tools/test_azure_tts.py b/tests/metagpt/tools/test_azure_tts.py index 49dd7eed1..41d429109 100644 --- a/tests/metagpt/tools/test_azure_tts.py +++ b/tests/metagpt/tools/test_azure_tts.py @@ -7,6 +7,7 @@ @Modified By: mashenquan, 2023-8-9, add more text formatting options @Modified By: mashenquan, 2023-8-17, move to `tools` folder. """ +import asyncio import sys from pathlib import Path @@ -19,7 +20,7 @@ from metagpt.utils.common import initialize_environment def test_azure_tts(): initialize_environment() - azure_tts = AzureTTS() + azure_tts = AzureTTS(subscription_key="", region="") text = """ 女儿看见父亲走了进来,问道: @@ -33,11 +34,13 @@ def test_azure_tts(): path = WORKSPACE_ROOT / "tts" path.mkdir(exist_ok=True, parents=True) filename = path / "girl.wav" - result = azure_tts.synthesize_speech( + loop = asyncio.new_event_loop() + v = loop.create_task(azure_tts.synthesize_speech( lang="zh-CN", voice="zh-CN-XiaomoNeural", text=text, - output_file=str(filename)) + output_file=str(filename))) + result = loop.run_until_complete(v) print(result)