diff --git a/metagpt/actions/azure_tts.py b/metagpt/actions/azure_tts.py
index f528ba001..3520de8b4 100644
--- a/metagpt/actions/azure_tts.py
+++ b/metagpt/actions/azure_tts.py
@@ -4,11 +4,13 @@
@Time : 2023/6/9 22:22
@Author : Leo Xiao
@File : azure_tts.py
+@Modified By: mashenquan, 2023-8-9, add more text formatting options
"""
from azure.cognitiveservices.speech import AudioConfig, SpeechConfig, SpeechSynthesizer
from metagpt.actions.action import Action
from metagpt.config import Config
+from metagpt.const import WORKSPACE_ROOT
class AzureTTS(Action):
@@ -17,7 +19,7 @@ class AzureTTS(Action):
self.config = Config()
# 参数参考:https://learn.microsoft.com/zh-cn/azure/cognitive-services/speech-service/language-support?tabs=tts#voice-styles-and-roles
- def synthesize_speech(self, lang, voice, role, text, output_file):
+ def synthesize_speech(self, lang, voice, text, output_file):
subscription_key = self.config.get('AZURE_TTS_SUBSCRIPTION_KEY')
region = self.config.get('AZURE_TTS_REGION')
speech_config = SpeechConfig(
@@ -29,25 +31,47 @@ class AzureTTS(Action):
speech_config=speech_config,
audio_config=audio_config)
- # if voice=="zh-CN-YunxiNeural":
+ # More detail: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice
ssml_string = f"""
-
- {text}
-
+ {text}
"""
- synthesizer.speak_ssml_async(ssml_string).get()
+ return synthesizer.speak_ssml_async(ssml_string).get()
+ @staticmethod
+ def role_style_text(role, style, text):
+ return f'{text}'
+
+ @staticmethod
+ def role_text(role, text):
+ return f'{text}'
+
+ @staticmethod
+ def style_text(style, text):
+ return f'{text}'
if __name__ == "__main__":
azure_tts = AzureTTS("azure_tts")
+ text = """
+ 女儿看见父亲走了进来,问道:
+
+ “您来的挺快的,怎么过来的?”
+
+ 父亲放下手提包,说:
+
+ “刚打车过来的,路上还挺顺畅。”
+
+ """
+ path = WORKSPACE_ROOT / "tts"
+ path.mkdir(exist_ok=True, parents=True)
+ filename = path / "output.wav"
azure_tts.synthesize_speech(
"zh-CN",
"zh-CN-YunxiNeural",
- "Boy",
- "你好,我是卡卡",
- "output.wav")
+ text=AzureTTS.role_style_text(role="Boy", style="affectionate", text="你好,我是卡卡"),
+ output_file=str(filename)
+ )
diff --git a/tests/metagpt/actions/test_azure_tts.py b/tests/metagpt/actions/test_azure_tts.py
index b5a333af2..2145f7133 100644
--- a/tests/metagpt/actions/test_azure_tts.py
+++ b/tests/metagpt/actions/test_azure_tts.py
@@ -4,18 +4,38 @@
@Time : 2023/7/1 22:50
@Author : alexanderwu
@File : test_azure_tts.py
+@Modified By: mashenquan, 2023-8-9, add more text formatting options
"""
from metagpt.actions.azure_tts import AzureTTS
+from metagpt.const import WORKSPACE_ROOT
def test_azure_tts():
azure_tts = AzureTTS("azure_tts")
- azure_tts.synthesize_speech(
+ text = """
+ 女儿看见父亲走了进来,问道:
+
+ “您来的挺快的,怎么过来的?”
+
+ 父亲放下手提包,说:
+
+ “Writing a binary file in Python is similar to writing a regular text file, but you'll work with bytes instead of strings.”
+
+ """
+ path = WORKSPACE_ROOT / "tts"
+ path.mkdir(exist_ok=True, parents=True)
+ filename = path / "girl.wav"
+ result = azure_tts.synthesize_speech(
"zh-CN",
- "zh-CN-YunxiNeural",
- "Boy",
- "你好,我是卡卡",
- "output.wav")
+ "zh-CN-XiaomoNeural",
+ text=text,
+ output_file=str(filename))
+
+ print(result)
# 运行需要先配置 SUBSCRIPTION_KEY
# TODO: 这里如果要检验,还要额外加上对应的asr,才能确保前后生成是接近一致的,但现在还没有
+
+
+if __name__ == '__main__':
+ test_azure_tts()