diff --git a/metagpt/actions/azure_tts.py b/metagpt/actions/azure_tts.py index f528ba001..3520de8b4 100644 --- a/metagpt/actions/azure_tts.py +++ b/metagpt/actions/azure_tts.py @@ -4,11 +4,13 @@ @Time : 2023/6/9 22:22 @Author : Leo Xiao @File : azure_tts.py +@Modified By: mashenquan, 2023-8-9, add more text formatting options """ from azure.cognitiveservices.speech import AudioConfig, SpeechConfig, SpeechSynthesizer from metagpt.actions.action import Action from metagpt.config import Config +from metagpt.const import WORKSPACE_ROOT class AzureTTS(Action): @@ -17,7 +19,7 @@ class AzureTTS(Action): self.config = Config() # 参数参考:https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts#voice-styles-and-roles - def synthesize_speech(self, lang, voice, role, text, output_file): + def synthesize_speech(self, lang, voice, text, output_file): subscription_key = self.config.get('AZURE_TTS_SUBSCRIPTION_KEY') region = self.config.get('AZURE_TTS_REGION') speech_config = SpeechConfig( @@ -29,25 +31,47 @@ class AzureTTS(Action): speech_config=speech_config, audio_config=audio_config) - # if voice=="zh-CN-YunxiNeural": + # More detail: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice ssml_string = f""" - - {text} - + {text} """ - synthesizer.speak_ssml_async(ssml_string).get() + return synthesizer.speak_ssml_async(ssml_string).get() + @staticmethod + def role_style_text(role, style, text): + return f'{text}' + + @staticmethod + def role_text(role, text): + return f'{text}' + + @staticmethod + def style_text(style, text): + return f'{text}' if __name__ == "__main__": azure_tts = AzureTTS("azure_tts") + text = """ + 女儿看见父亲走了进来,问道: + + “您来的挺快的,怎么过来的?” + + 父亲放下手提包,说: + + “刚打车过来的,路上还挺顺畅。” + + """ + path = WORKSPACE_ROOT / "tts" + path.mkdir(exist_ok=True, parents=True) + filename = path / "output.wav" azure_tts.synthesize_speech( "zh-CN", "zh-CN-YunxiNeural", - "Boy", - "你好,我是卡卡", - "output.wav") + text=AzureTTS.role_style_text(role="Boy", style="affectionate", text="你好,我是卡卡"), + output_file=str(filename) + ) diff --git a/tests/metagpt/actions/test_azure_tts.py b/tests/metagpt/actions/test_azure_tts.py index b5a333af2..2145f7133 100644 --- a/tests/metagpt/actions/test_azure_tts.py +++ b/tests/metagpt/actions/test_azure_tts.py @@ -4,18 +4,38 @@ @Time : 2023/7/1 22:50 @Author : alexanderwu @File : test_azure_tts.py +@Modified By: mashenquan, 2023-8-9, add more text formatting options """ from metagpt.actions.azure_tts import AzureTTS +from metagpt.const import WORKSPACE_ROOT def test_azure_tts(): azure_tts = AzureTTS("azure_tts") - azure_tts.synthesize_speech( + text = """ + 女儿看见父亲走了进来,问道: + + “您来的挺快的,怎么过来的?” + + 父亲放下手提包,说: + + “Writing a binary file in Python is similar to writing a regular text file, but you'll work with bytes instead of strings.” + + """ + path = WORKSPACE_ROOT / "tts" + path.mkdir(exist_ok=True, parents=True) + filename = path / "girl.wav" + result = azure_tts.synthesize_speech( "zh-CN", - "zh-CN-YunxiNeural", - "Boy", - "你好,我是卡卡", - "output.wav") + "zh-CN-XiaomoNeural", + text=text, + output_file=str(filename)) + + print(result) # 运行需要先配置 SUBSCRIPTION_KEY # TODO: 这里如果要检验,还要额外加上对应的asr,才能确保前后生成是接近一致的,但现在还没有 + + +if __name__ == '__main__': + test_azure_tts()