add: openai speech to text

2026-07-23 17:01:08 +02:00 · 2024-01-18 12:37:28 +08:00 · 2024-01-18 12:37:28 +08:00 · 002bc56c0e
commit 002bc56c0e
parent 420b10c5c3
2 changed files with 13 additions and 0 deletions
--- a/metagpt/provider/openai_api.py
+++ b/metagpt/provider/openai_api.py
@ -239,3 +239,7 @@ class OpenAILLM(BaseLLM):
    async def atext_to_speech(self, **kwargs):
        """text to speech"""
        return await self.aclient.audio.speech.create(**kwargs)
+
+    async def aspeech_to_text(self, **kwargs):
+        """speech to text"""
+        return await self.aclient.audio.transcriptions.create(**kwargs)
--- a/tests/metagpt/provider/test_openai.py
+++ b/tests/metagpt/provider/test_openai.py
@ -1,5 +1,6 @@
 import pytest

+from metagpt.const import TEST_DATA_PATH
 from metagpt.llm import LLM
 from metagpt.logs import logger
 from metagpt.provider import OpenAILLM
@ -53,6 +54,14 @@ async def test_text_to_speech():
    assert 200 == resp.response.status_code


+@pytest.mark.asyncio
+async def test_speech_to_text():
+    llm = LLM()
+    audio_file = open(f"{TEST_DATA_PATH}/audio/hello.mp3", "rb")
+    resp = await llm.aspeech_to_text(file=audio_file, model="whisper-1")
+    assert "你好" == resp.text
+
+
 class TestOpenAI:
    def test_make_client_kwargs_without_proxy(self):
        instance = OpenAILLM(mock_llm_config)