diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 3a9aca870..d6944eae6 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -239,3 +239,7 @@ class OpenAILLM(BaseLLM): async def atext_to_speech(self, **kwargs): """text to speech""" return await self.aclient.audio.speech.create(**kwargs) + + async def aspeech_to_text(self, **kwargs): + """speech to text""" + return await self.aclient.audio.transcriptions.create(**kwargs) diff --git a/tests/metagpt/provider/test_openai.py b/tests/metagpt/provider/test_openai.py index 2d52ad10e..7a0dbe5c4 100644 --- a/tests/metagpt/provider/test_openai.py +++ b/tests/metagpt/provider/test_openai.py @@ -1,5 +1,6 @@ import pytest +from metagpt.const import TEST_DATA_PATH from metagpt.llm import LLM from metagpt.logs import logger from metagpt.provider import OpenAILLM @@ -53,6 +54,14 @@ async def test_text_to_speech(): assert 200 == resp.response.status_code +@pytest.mark.asyncio +async def test_speech_to_text(): + llm = LLM() + audio_file = open(f"{TEST_DATA_PATH}/audio/hello.mp3", "rb") + resp = await llm.aspeech_to_text(file=audio_file, model="whisper-1") + assert "你好" == resp.text + + class TestOpenAI: def test_make_client_kwargs_without_proxy(self): instance = OpenAILLM(mock_llm_config)