mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-11 15:15:18 +02:00
feat: +iflytek tts
This commit is contained in:
parent
c1aa932210
commit
f8aea281a8
1 changed files with 162 additions and 0 deletions
162
metagpt/tools/iflytek_tts.py
Normal file
162
metagpt/tools/iflytek_tts.py
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2023/8/17
|
||||
@Author : mashenquan
|
||||
@File : iflytek_tts.py
|
||||
@Desc : iFLYTEK TTS OAS3 api, which provides text-to-speech functionality
|
||||
"""
|
||||
import asyncio
|
||||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from time import mktime
|
||||
from typing import Optional
|
||||
from urllib.parse import urlencode
|
||||
from wsgiref.handlers import format_date_time
|
||||
|
||||
import aiofiles
|
||||
import websockets as websockets
|
||||
from pydantic import BaseModel
|
||||
|
||||
from metagpt.config import CONFIG
|
||||
from metagpt.logs import logger
|
||||
|
||||
|
||||
class IFlyTekTTSStatus(Enum):
|
||||
STATUS_FIRST_FRAME = 0 # The first frame
|
||||
STATUS_CONTINUE_FRAME = 1 # The intermediate frame
|
||||
STATUS_LAST_FRAME = 2 # The last frame
|
||||
|
||||
|
||||
class AudioData(BaseModel):
|
||||
audio: str
|
||||
status: int
|
||||
ced: str
|
||||
|
||||
|
||||
class IFlyTekTTSResponse(BaseModel):
|
||||
code: int
|
||||
message: str
|
||||
data: Optional[AudioData] = None
|
||||
sid: str
|
||||
|
||||
|
||||
DEFAULT_IFLYTEK_VOICE = "xiaoyan"
|
||||
|
||||
|
||||
class IFlyTekTTS(object):
|
||||
def __init__(self, app_id: str, api_key: str, api_secret: str):
|
||||
"""
|
||||
:param app_id: Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts`
|
||||
:param api_key: WebAPI argument, see: `https://console.xfyun.cn/services/tts`
|
||||
:param api_secret: WebAPI argument, see: `https://console.xfyun.cn/services/tts`
|
||||
"""
|
||||
self.app_id = app_id or CONFIG.IFLYTEK_APP_ID
|
||||
self.api_key = api_key or CONFIG.IFLYTEK_API_KEY
|
||||
self.api_secret = api_secret or CONFIG.API_SECRET
|
||||
|
||||
async def synthesize_speech(self, text, output_file: str, voice=DEFAULT_IFLYTEK_VOICE):
|
||||
url = self._create_url()
|
||||
data = {
|
||||
"common": {"app_id": self.app_id},
|
||||
"business": {"aue": "lame", "sfl": 1, "auf": "audio/L16;rate=16000", "vcn": voice, "tte": "utf8"},
|
||||
"data": {"status": 2, "text": str(base64.b64encode(text.encode("utf-8")), "UTF8")},
|
||||
}
|
||||
req = json.dumps(data)
|
||||
async with websockets.connect(url) as websocket:
|
||||
# send request
|
||||
await websocket.send(req)
|
||||
|
||||
# receive frames
|
||||
async with aiofiles.open(str(output_file), "w") as writer:
|
||||
while True:
|
||||
v = await websocket.recv()
|
||||
rsp = IFlyTekTTSResponse(**json.loads(v))
|
||||
if rsp.data:
|
||||
await writer.write(rsp.data.audio)
|
||||
if rsp.data.status != IFlyTekTTSStatus.STATUS_LAST_FRAME.value:
|
||||
continue
|
||||
break
|
||||
|
||||
def _create_url(self):
|
||||
"""Create request url"""
|
||||
url = "wss://tts-api.xfyun.cn/v2/tts"
|
||||
# Generate a timestamp in RFC1123 format
|
||||
now = datetime.now()
|
||||
date = format_date_time(mktime(now.timetuple()))
|
||||
|
||||
signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
|
||||
signature_origin += "date: " + date + "\n"
|
||||
signature_origin += "GET " + "/v2/tts " + "HTTP/1.1"
|
||||
# Perform HMAC-SHA256 encryption
|
||||
signature_sha = hmac.new(
|
||||
self.api_secret.encode("utf-8"), signature_origin.encode("utf-8"), digestmod=hashlib.sha256
|
||||
).digest()
|
||||
signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8")
|
||||
|
||||
authorization_origin = 'api_key="%s", algorithm="%s", headers="%s", signature="%s"' % (
|
||||
self.api_key,
|
||||
"hmac-sha256",
|
||||
"host date request-line",
|
||||
signature_sha,
|
||||
)
|
||||
authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode(encoding="utf-8")
|
||||
# Combine the authentication parameters of the request into a dictionary.
|
||||
v = {"authorization": authorization, "date": date, "host": "ws-api.xfyun.cn"}
|
||||
# Concatenate the authentication parameters to generate the URL.
|
||||
url = url + "?" + urlencode(v)
|
||||
return url
|
||||
|
||||
|
||||
# Export
|
||||
async def oas3_iflytek_tts(text: str, voice: str = "", app_id: str = "", api_key: str = "", api_secret: str = ""):
|
||||
"""Text to speech
|
||||
For more details, check out:`https://www.xfyun.cn/doc/tts/online_tts/API.html`
|
||||
|
||||
:param voice: Default `xiaoyan`. For more details, checkout: `https://www.xfyun.cn/doc/tts/online_tts/API.html#%E6%8E%A5%E5%8F%A3%E8%B0%83%E7%94%A8%E6%B5%81%E7%A8%8B`
|
||||
:param text: The text used for voice conversion.
|
||||
:param app_id: Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts`
|
||||
:param api_key: WebAPI argument, see: `https://console.xfyun.cn/services/tts`
|
||||
:param api_secret: WebAPI argument, see: `https://console.xfyun.cn/services/tts`
|
||||
:return: Returns the Base64-encoded .mp3 file data if successful, otherwise an empty string.
|
||||
|
||||
"""
|
||||
if not app_id:
|
||||
app_id = CONFIG.IFLYTEK_APP_ID
|
||||
if not api_key:
|
||||
api_key = CONFIG.IFLYTEK_API_KEY
|
||||
if not api_secret:
|
||||
api_secret = CONFIG.IFLYTEK_API_SECRET
|
||||
if not voice:
|
||||
voice = CONFIG.IFLYTEK_VOICE or DEFAULT_IFLYTEK_VOICE
|
||||
|
||||
filename = Path(__file__).parent / (str(uuid.uuid4()).replace("-", "") + ".mp3")
|
||||
try:
|
||||
tts = IFlyTekTTS(app_id=app_id, api_key=api_key, api_secret=api_secret)
|
||||
await tts.synthesize_speech(text=text, output_file=str(filename), voice=voice)
|
||||
async with aiofiles.open(str(filename), mode="r") as reader:
|
||||
base64_string = await reader.read()
|
||||
except Exception as e:
|
||||
logger.error(f"text:{text}, error:{e}")
|
||||
base64_string = ""
|
||||
finally:
|
||||
filename.unlink()
|
||||
|
||||
return base64_string
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.get_event_loop().run_until_complete(
|
||||
oas3_iflytek_tts(
|
||||
text="你好,hello",
|
||||
app_id="f7acef62",
|
||||
api_key="fda72e3aa286042a492525816a5efa08",
|
||||
api_secret="ZDk3NjdiMDBkODJlOWQ1NjRjMGI2NDY4",
|
||||
)
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue