From f8aea281a85fde07459780f3e1f7e3b5a1e27e5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Tue, 5 Sep 2023 19:11:22 +0800 Subject: [PATCH] feat: +iflytek tts --- metagpt/tools/iflytek_tts.py | 162 +++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 metagpt/tools/iflytek_tts.py diff --git a/metagpt/tools/iflytek_tts.py b/metagpt/tools/iflytek_tts.py new file mode 100644 index 000000000..a91d8091b --- /dev/null +++ b/metagpt/tools/iflytek_tts.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/17 +@Author : mashenquan +@File : iflytek_tts.py +@Desc : iFLYTEK TTS OAS3 api, which provides text-to-speech functionality +""" +import asyncio +import base64 +import hashlib +import hmac +import json +import uuid +from datetime import datetime +from enum import Enum +from pathlib import Path +from time import mktime +from typing import Optional +from urllib.parse import urlencode +from wsgiref.handlers import format_date_time + +import aiofiles +import websockets as websockets +from pydantic import BaseModel + +from metagpt.config import CONFIG +from metagpt.logs import logger + + +class IFlyTekTTSStatus(Enum): + STATUS_FIRST_FRAME = 0 # The first frame + STATUS_CONTINUE_FRAME = 1 # The intermediate frame + STATUS_LAST_FRAME = 2 # The last frame + + +class AudioData(BaseModel): + audio: str + status: int + ced: str + + +class IFlyTekTTSResponse(BaseModel): + code: int + message: str + data: Optional[AudioData] = None + sid: str + + +DEFAULT_IFLYTEK_VOICE = "xiaoyan" + + +class IFlyTekTTS(object): + def __init__(self, app_id: str, api_key: str, api_secret: str): + """ + :param app_id: Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts` + :param api_key: WebAPI argument, see: `https://console.xfyun.cn/services/tts` + :param api_secret: WebAPI argument, see: `https://console.xfyun.cn/services/tts` + """ + self.app_id = app_id or CONFIG.IFLYTEK_APP_ID + self.api_key = api_key or CONFIG.IFLYTEK_API_KEY + self.api_secret = api_secret or CONFIG.API_SECRET + + async def synthesize_speech(self, text, output_file: str, voice=DEFAULT_IFLYTEK_VOICE): + url = self._create_url() + data = { + "common": {"app_id": self.app_id}, + "business": {"aue": "lame", "sfl": 1, "auf": "audio/L16;rate=16000", "vcn": voice, "tte": "utf8"}, + "data": {"status": 2, "text": str(base64.b64encode(text.encode("utf-8")), "UTF8")}, + } + req = json.dumps(data) + async with websockets.connect(url) as websocket: + # send request + await websocket.send(req) + + # receive frames + async with aiofiles.open(str(output_file), "w") as writer: + while True: + v = await websocket.recv() + rsp = IFlyTekTTSResponse(**json.loads(v)) + if rsp.data: + await writer.write(rsp.data.audio) + if rsp.data.status != IFlyTekTTSStatus.STATUS_LAST_FRAME.value: + continue + break + + def _create_url(self): + """Create request url""" + url = "wss://tts-api.xfyun.cn/v2/tts" + # Generate a timestamp in RFC1123 format + now = datetime.now() + date = format_date_time(mktime(now.timetuple())) + + signature_origin = "host: " + "ws-api.xfyun.cn" + "\n" + signature_origin += "date: " + date + "\n" + signature_origin += "GET " + "/v2/tts " + "HTTP/1.1" + # Perform HMAC-SHA256 encryption + signature_sha = hmac.new( + self.api_secret.encode("utf-8"), signature_origin.encode("utf-8"), digestmod=hashlib.sha256 + ).digest() + signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8") + + authorization_origin = 'api_key="%s", algorithm="%s", headers="%s", signature="%s"' % ( + self.api_key, + "hmac-sha256", + "host date request-line", + signature_sha, + ) + authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode(encoding="utf-8") + # Combine the authentication parameters of the request into a dictionary. + v = {"authorization": authorization, "date": date, "host": "ws-api.xfyun.cn"} + # Concatenate the authentication parameters to generate the URL. + url = url + "?" + urlencode(v) + return url + + +# Export +async def oas3_iflytek_tts(text: str, voice: str = "", app_id: str = "", api_key: str = "", api_secret: str = ""): + """Text to speech + For more details, check out:`https://www.xfyun.cn/doc/tts/online_tts/API.html` + + :param voice: Default `xiaoyan`. For more details, checkout: `https://www.xfyun.cn/doc/tts/online_tts/API.html#%E6%8E%A5%E5%8F%A3%E8%B0%83%E7%94%A8%E6%B5%81%E7%A8%8B` + :param text: The text used for voice conversion. + :param app_id: Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts` + :param api_key: WebAPI argument, see: `https://console.xfyun.cn/services/tts` + :param api_secret: WebAPI argument, see: `https://console.xfyun.cn/services/tts` + :return: Returns the Base64-encoded .mp3 file data if successful, otherwise an empty string. + + """ + if not app_id: + app_id = CONFIG.IFLYTEK_APP_ID + if not api_key: + api_key = CONFIG.IFLYTEK_API_KEY + if not api_secret: + api_secret = CONFIG.IFLYTEK_API_SECRET + if not voice: + voice = CONFIG.IFLYTEK_VOICE or DEFAULT_IFLYTEK_VOICE + + filename = Path(__file__).parent / (str(uuid.uuid4()).replace("-", "") + ".mp3") + try: + tts = IFlyTekTTS(app_id=app_id, api_key=api_key, api_secret=api_secret) + await tts.synthesize_speech(text=text, output_file=str(filename), voice=voice) + async with aiofiles.open(str(filename), mode="r") as reader: + base64_string = await reader.read() + except Exception as e: + logger.error(f"text:{text}, error:{e}") + base64_string = "" + finally: + filename.unlink() + + return base64_string + + +if __name__ == "__main__": + asyncio.get_event_loop().run_until_complete( + oas3_iflytek_tts( + text="你好,hello", + app_id="f7acef62", + api_key="fda72e3aa286042a492525816a5efa08", + api_secret="ZDk3NjdiMDBkODJlOWQ1NjRjMGI2NDY4", + ) + )