diff --git a/metagpt/actions/azure_tts.py b/metagpt/actions/azure_tts.py deleted file mode 100644 index f528ba001..000000000 --- a/metagpt/actions/azure_tts.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -@Time : 2023/6/9 22:22 -@Author : Leo Xiao -@File : azure_tts.py -""" -from azure.cognitiveservices.speech import AudioConfig, SpeechConfig, SpeechSynthesizer - -from metagpt.actions.action import Action -from metagpt.config import Config - - -class AzureTTS(Action): - def __init__(self, name, context=None, llm=None): - super().__init__(name, context, llm) - self.config = Config() - - # 参数参考:https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts#voice-styles-and-roles - def synthesize_speech(self, lang, voice, role, text, output_file): - subscription_key = self.config.get('AZURE_TTS_SUBSCRIPTION_KEY') - region = self.config.get('AZURE_TTS_REGION') - speech_config = SpeechConfig( - subscription=subscription_key, region=region) - - speech_config.speech_synthesis_voice_name = voice - audio_config = AudioConfig(filename=output_file) - synthesizer = SpeechSynthesizer( - speech_config=speech_config, - audio_config=audio_config) - - # if voice=="zh-CN-YunxiNeural": - ssml_string = f""" - - - - {text} - - - - """ - - synthesizer.speak_ssml_async(ssml_string).get() - - -if __name__ == "__main__": - azure_tts = AzureTTS("azure_tts") - azure_tts.synthesize_speech( - "zh-CN", - "zh-CN-YunxiNeural", - "Boy", - "你好,我是卡卡", - "output.wav") diff --git a/metagpt/tools/azure_tts.py b/metagpt/tools/azure_tts.py new file mode 100644 index 000000000..19d7c2ab1 --- /dev/null +++ b/metagpt/tools/azure_tts.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/17 +@Author : mashenquan +@File : azure_tts.py +@Desc : azure TTS openapi, which provides text-to-speech functionality +""" +from pathlib import Path +from uuid import uuid4 +import base64 +import sys + +sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt' +from metagpt.utils.common import initalize_enviroment +from metagpt.logs import logger + +from azure.cognitiveservices.speech import AudioConfig, SpeechConfig, SpeechSynthesizer +import os + + +class AzureTTS: + """Azure Text-to-Speech""" + + def __init__(self, subscription_key, region): + """ + :param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint` + :param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API. + """ + self.subscription_key = subscription_key if subscription_key else os.environ.get('AZURE_TTS_SUBSCRIPTION_KEY') + self.region = region if region else os.environ.get('AZURE_TTS_REGION') + + # 参数参考:https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts#voice-styles-and-roles + def synthesize_speech(self, lang, voice, text, output_file): + speech_config = SpeechConfig( + subscription=self.subscription_key, region=self.region) + speech_config.speech_synthesis_voice_name = voice + audio_config = AudioConfig(filename=output_file) + synthesizer = SpeechSynthesizer( + speech_config=speech_config, + audio_config=audio_config) + + # More detail: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice + ssml_string = "" \ + f"{text}" + + return synthesizer.speak_ssml_async(ssml_string).get() + + @staticmethod + def role_style_text(role, style, text): + return f'{text}' + + @staticmethod + def role_text(role, text): + return f'{text}' + + @staticmethod + def style_text(style, text): + return f'{text}' + + +# Export +def openapi_azsure_tts(text, lang="", voice="", style="", role="", subscription_key="", region=""): + """openapi/tts/azsure + For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + + :param lang: The value can contain a language code such as en (English), or a locale such as en-US (English - United States). For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + :param voice: For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`, `https://speech.microsoft.com/portal/voicegallery` + :param style: Speaking style to express different emotions like cheerfulness, empathy, and calm. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + :param role: With roles, the same voice can act as a different age and gender. For more details, checkout: `https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` + :param text: Text to convert + :param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint` + :param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API. + :return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string. + + """ + if not text: + return "" + + if not lang: + lang = "zh-CN" + if not voice: + voice = "zh-CN-XiaomoNeural" + if not role: + role = "Girl" + if not style: + style = "affectionate" + if not subscription_key: + subscription_key = os.environ.get("AZURE_TTS_SUBSCRIPTION_KEY") + if not region: + region = os.environ.get("AZURE_TTS_REGION") + + xml_value = AzureTTS.role_style_text(role=role, style=style, text=text) + tts = AzureTTS(subscription_key=subscription_key, region=region) + filename = Path(__file__).resolve().parent / (str(uuid4()).replace("-", "") + ".wav") + try: + tts.synthesize_speech(lang=lang, voice=voice, text=xml_value, output_file=str(filename)) + with open(str(filename), mode="rb") as reader: + data = reader.read() + base64_string = base64.b64encode(data).decode('utf-8') + filename.unlink() + except Exception as e: + logger.error(f"text:{text}, error:{e}") + return "" + + return base64_string + + +if __name__ == "__main__": + initalize_enviroment() + + v = openapi_azsure_tts("测试,test") + print(v) diff --git a/metagpt/tools/hello.py b/metagpt/tools/hello.py new file mode 100644 index 000000000..686fba34b --- /dev/null +++ b/metagpt/tools/hello.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/5/2 16:03 +@Author : mashenquan +@File : hello.py +@Desc : Implement the OpenAPI Specification 3.0 demo and use the following command to test the HTTP service: + + curl -X 'POST' \ + 'http://localhost:8080/openapi/greeting/dave' \ + -H 'accept: text/plain' \ + -H 'Content-Type: application/json' \ + -d '{}' +""" + +import connexion + + +# openapi implement +def post_greeting(name: str) -> str: + return f"Hello {name}\n" + + +if __name__ == "__main__": + app = connexion.AioHttpApp(__name__, specification_dir='../../spec/') + app.add_api("openapi.yaml", arguments={"title": "Hello World Example"}) + app.run(port=8080) diff --git a/metagpt/tools/metagpt_openapi_svc.py b/metagpt/tools/metagpt_openapi_svc.py new file mode 100644 index 000000000..94d935625 --- /dev/null +++ b/metagpt/tools/metagpt_openapi_svc.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/17 +@Author : mashenquan +@File : metagpt_openapi_svc.py +@Desc : MetaGPT OpenAPI REST API service +""" +from pathlib import Path +import sys +import connexion +sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: No module named 'metagpt' +from metagpt.utils.common import initalize_enviroment + +if __name__ == "__main__": + initalize_enviroment() + + app = connexion.AioHttpApp(__name__, specification_dir='../../spec/') + app.add_api("metagpt_openapi.yaml") + app.run(port=8080) diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index 7f090cf63..b15c1d186 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -4,14 +4,18 @@ @Time : 2023/4/29 16:07 @Author : alexanderwu @File : common.py +@Modified By: mashenquan, 2023-8-17, add `initalize_enviroment()` to load `config/config.yaml` to `os.environ` """ import ast import contextlib import inspect import os import re +from pathlib import Path from typing import List, Tuple +import yaml + from metagpt.logs import logger @@ -254,3 +258,12 @@ def parse_recipient(text): pattern = r"## Send To:\s*([A-Za-z]+)\s*?" # hard code for now recipient = re.search(pattern, text) return recipient.group(1) if recipient else "" + + +def initalize_enviroment(): + """Load `config/config.yaml` to `os.environ`""" + yaml_file_path = Path(__file__).resolve().parent.parent.parent / "config/config.yaml" + with open(str(yaml_file_path), "r") as yaml_file: + data = yaml.safe_load(yaml_file) + for k, v in data.items(): + os.environ[k] = str(v) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c18145b98..eef7464ce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,4 +36,6 @@ anthropic==0.3.6 typing-inspect==0.8.0 typing_extensions==4.5.0 libcst==1.0.1 -qdrant-client==1.4.0 \ No newline at end of file +qdrant-client==1.4.0 +connexion[swagger-ui] +aiohttp_jinja2 \ No newline at end of file diff --git a/spec/metagpt_openapi.yaml b/spec/metagpt_openapi.yaml new file mode 100644 index 000000000..0bb6ae7bf --- /dev/null +++ b/spec/metagpt_openapi.yaml @@ -0,0 +1,64 @@ +openapi: "3.0.0" + +info: + title: "MetaGPT Export OpenAPIs" + version: "1.0" +servers: + - url: "/openapi" + +paths: + /tts/azsure: + post: + summary: "Convert Text to Base64-encoded .wav File Stream" + description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" + operationId: azure_tts.openapi_azsure_tts + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - text + properties: + text: + type: string + description: Text to convert + lang: + type: string + description: The language code or locale, e.g., en-US (English - United States) + default: "zh-CN" + voice: + type: string + description: "Voice style, see: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts), [Voice Gallery](https://speech.microsoft.com/portal/voicegallery)" + default: "zh-CN-XiaomoNeural" + style: + type: string + description: "Speaking style to express different emotions. For more details, checkout: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" + default: "affectionate" + role: + type: string + description: "Role to specify age and gender. For more details, checkout: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" + default: "Girl" + subscription_key: + type: string + description: "Key used to access Azure AI service API, see: [Azure Portal](https://portal.azure.com/) > `Resource Management` > `Keys and Endpoint`" + default: "" + region: + type: string + description: "Location (or region) of your resource, see: [Azure Portal](https://portal.azure.com/) > `Resource Management` > `Keys and Endpoint`" + default: "" + responses: + '200': + description: "Base64-encoded .wav file data if successful, otherwise an empty string." + content: + application/json: + schema: + type: object + properties: + result: + type: string + '400': + description: Bad Request + '500': + description: Bad Request \ No newline at end of file diff --git a/spec/openapi.yaml b/spec/openapi.yaml new file mode 100644 index 000000000..bc291b7db --- /dev/null +++ b/spec/openapi.yaml @@ -0,0 +1,35 @@ +openapi: "3.0.0" + +info: + title: Hello World + version: "1.0" +servers: + - url: /openapi + +paths: + /greeting/{name}: + post: + summary: Generate greeting + description: Generates a greeting message. + operationId: hello.post_greeting + responses: + 200: + description: greeting response + content: + text/plain: + schema: + type: string + example: "hello dave!" + parameters: + - name: name + in: path + description: Name of the person to greet. + required: true + schema: + type: string + example: "dave" + requestBody: + content: + application/json: + schema: + type: object \ No newline at end of file diff --git a/tests/metagpt/actions/test_azure_tts.py b/tests/metagpt/tools/test_azure_tts.py similarity index 67% rename from tests/metagpt/actions/test_azure_tts.py rename to tests/metagpt/tools/test_azure_tts.py index b5a333af2..667e32d01 100644 --- a/tests/metagpt/actions/test_azure_tts.py +++ b/tests/metagpt/tools/test_azure_tts.py @@ -4,8 +4,13 @@ @Time : 2023/7/1 22:50 @Author : alexanderwu @File : test_azure_tts.py +@Modified By: mashenquan, 2023-8-17, move to `tools` folder. """ -from metagpt.actions.azure_tts import AzureTTS +import sys +from pathlib import Path + +sys.path.append(str(Path(__file__).resolve().parent.parent.parent.parent)) # fix-bug: No module named 'metagpt' +from metagpt.tools.azure_tts import AzureTTS def test_azure_tts():