Merge pull request #772 from Justin-ZL/dev

add: openai speech to text
This commit is contained in:
geekan 2024-01-18 19:14:36 +08:00 committed by GitHub
commit 6b170e0b9b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 14 additions and 1 deletions

View file

@ -239,3 +239,7 @@ class OpenAILLM(BaseLLM):
async def atext_to_speech(self, **kwargs):
"""text to speech"""
return await self.aclient.audio.speech.create(**kwargs)
async def aspeech_to_text(self, **kwargs):
"""speech to text"""
return await self.aclient.audio.transcriptions.create(**kwargs)

View file

@ -1,5 +1,6 @@
import pytest
from metagpt.const import TEST_DATA_PATH
from metagpt.llm import LLM
from metagpt.logs import logger
from metagpt.provider import OpenAILLM
@ -48,11 +49,19 @@ async def test_text_to_speech():
resp = await llm.atext_to_speech(
model="tts-1",
voice="alloy",
input="人生说起来长,但知道一个岁月回头看,许多事件仅是仓促的。一段一段拼凑一起,合成了人生。苦难当头时,当下不免觉得是折磨;回头看,也不够是一段短短的人生旅程。",
input="人生说起来长,但直到一个岁月回头看,许多事件仅是仓促的。一段一段拼凑一起,合成了人生。苦难当头时,当下不免觉得是折磨;回头看,也不够是一段短短的人生旅程。",
)
assert 200 == resp.response.status_code
@pytest.mark.asyncio
async def test_speech_to_text():
llm = LLM()
audio_file = open(f"{TEST_DATA_PATH}/audio/hello.mp3", "rb")
resp = await llm.aspeech_to_text(file=audio_file, model="whisper-1")
assert "你好" == resp.text
class TestOpenAI:
def test_make_client_kwargs_without_proxy(self):
instance = OpenAILLM(mock_llm_config)