Merge pull request #772 from Justin-ZL/dev

add: openai speech to text
2026-06-17 15:35:21 +02:00 · 2024-01-18 19:14:36 +08:00 · 2024-01-18 19:14:36 +08:00 · 6b170e0b9b
commit 6b170e0b9b
parent 420b10c5c3 99511fd264
2 changed files with 14 additions and 1 deletions
--- a/metagpt/provider/openai_api.py
+++ b/metagpt/provider/openai_api.py
@ -239,3 +239,7 @@ class OpenAILLM(BaseLLM):
    async def atext_to_speech(self, **kwargs):
        """text to speech"""
        return await self.aclient.audio.speech.create(**kwargs)
+
+    async def aspeech_to_text(self, **kwargs):
+        """speech to text"""
+        return await self.aclient.audio.transcriptions.create(**kwargs)
--- a/tests/metagpt/provider/test_openai.py
+++ b/tests/metagpt/provider/test_openai.py
@ -1,5 +1,6 @@
 import pytest

+from metagpt.const import TEST_DATA_PATH
 from metagpt.llm import LLM
 from metagpt.logs import logger
 from metagpt.provider import OpenAILLM
@ -48,11 +49,19 @@ async def test_text_to_speech():
    resp = await llm.atext_to_speech(
        model="tts-1",
        voice="alloy",
-        input="人生说起来长，但知道一个岁月回头看，许多事件仅是仓促的。一段一段拼凑一起，合成了人生。苦难当头时，当下不免觉得是折磨；回头看，也不够是一段短短的人生旅程。",
+        input="人生说起来长，但直到一个岁月回头看，许多事件仅是仓促的。一段一段拼凑一起，合成了人生。苦难当头时，当下不免觉得是折磨；回头看，也不够是一段短短的人生旅程。",
    )
    assert 200 == resp.response.status_code


+@pytest.mark.asyncio
+async def test_speech_to_text():
+    llm = LLM()
+    audio_file = open(f"{TEST_DATA_PATH}/audio/hello.mp3", "rb")
+    resp = await llm.aspeech_to_text(file=audio_file, model="whisper-1")
+    assert "你好" == resp.text
+
+
 class TestOpenAI:
    def test_make_client_kwargs_without_proxy(self):
        instance = OpenAILLM(mock_llm_config)