From 002bc56c0e68a79b7e5311b6799c946d6dd633bf Mon Sep 17 00:00:00 2001 From: zhanglei Date: Thu, 18 Jan 2024 12:37:28 +0800 Subject: [PATCH 1/2] add: openai speech to text --- metagpt/provider/openai_api.py | 4 ++++ tests/metagpt/provider/test_openai.py | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 3a9aca870..d6944eae6 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -239,3 +239,7 @@ class OpenAILLM(BaseLLM): async def atext_to_speech(self, **kwargs): """text to speech""" return await self.aclient.audio.speech.create(**kwargs) + + async def aspeech_to_text(self, **kwargs): + """speech to text""" + return await self.aclient.audio.transcriptions.create(**kwargs) diff --git a/tests/metagpt/provider/test_openai.py b/tests/metagpt/provider/test_openai.py index 2d52ad10e..7a0dbe5c4 100644 --- a/tests/metagpt/provider/test_openai.py +++ b/tests/metagpt/provider/test_openai.py @@ -1,5 +1,6 @@ import pytest +from metagpt.const import TEST_DATA_PATH from metagpt.llm import LLM from metagpt.logs import logger from metagpt.provider import OpenAILLM @@ -53,6 +54,14 @@ async def test_text_to_speech(): assert 200 == resp.response.status_code +@pytest.mark.asyncio +async def test_speech_to_text(): + llm = LLM() + audio_file = open(f"{TEST_DATA_PATH}/audio/hello.mp3", "rb") + resp = await llm.aspeech_to_text(file=audio_file, model="whisper-1") + assert "你好" == resp.text + + class TestOpenAI: def test_make_client_kwargs_without_proxy(self): instance = OpenAILLM(mock_llm_config) From 99511fd264ec9354cb411c0762bd75f8850d7c74 Mon Sep 17 00:00:00 2001 From: zhanglei Date: Thu, 18 Jan 2024 12:41:53 +0800 Subject: [PATCH 2/2] update:OpenAI text to speech unittest --- tests/metagpt/provider/test_openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/metagpt/provider/test_openai.py b/tests/metagpt/provider/test_openai.py index 7a0dbe5c4..bc7f92f33 100644 --- a/tests/metagpt/provider/test_openai.py +++ b/tests/metagpt/provider/test_openai.py @@ -49,7 +49,7 @@ async def test_text_to_speech(): resp = await llm.atext_to_speech( model="tts-1", voice="alloy", - input="人生说起来长,但知道一个岁月回头看,许多事件仅是仓促的。一段一段拼凑一起,合成了人生。苦难当头时,当下不免觉得是折磨;回头看,也不够是一段短短的人生旅程。", + input="人生说起来长,但直到一个岁月回头看,许多事件仅是仓促的。一段一段拼凑一起,合成了人生。苦难当头时,当下不免觉得是折磨;回头看,也不够是一段短短的人生旅程。", ) assert 200 == resp.response.status_code