diff --git a/.well-known/metagpt_oas3_api.yaml b/.well-known/metagpt_oas3_api.yaml index 56c6f42d5..0a702e8b6 100644 --- a/.well-known/metagpt_oas3_api.yaml +++ b/.well-known/metagpt_oas3_api.yaml @@ -13,10 +13,17 @@ servers: paths: /tts/azsure: x-prerequisite: - - name: AZURE_TTS_SUBSCRIPTION_KEY - description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" - - name: AZURE_TTS_REGION - description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" + configurations: + AZURE_TTS_SUBSCRIPTION_KEY: + type: string + description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" + AZURE_TTS_REGION: + type: string + description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" + required: + allOf: + - AZURE_TTS_SUBSCRIPTION_KEY + - AZURE_TTS_REGION post: summary: "Convert Text to Base64-encoded .wav File Stream" description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" @@ -73,10 +80,81 @@ paths: '500': description: "Internal Server Error" + /tts/iflytek: + x-prerequisite: + configurations: + IFLYTEK_APP_ID: + type: string + description: "Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts`" + IFLYTEK_API_KEY: + type: string + description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`" + IFLYTEK_API_SECRET: + type: string + description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`" + required: + allOf: + - IFLYTEK_APP_ID + - IFLYTEK_API_KEY + - IFLYTEK_API_SECRET + post: + summary: "Convert Text to Base64-encoded .mp3 File Stream" + description: "For more details, check out: [iFlyTek](https://console.xfyun.cn/services/tts)" + operationId: iflytek_tts.oas3_iflytek_tts + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - text + properties: + text: + type: string + description: Text to convert + voice: + type: string + description: "Voice style, see: [iFlyTek Text-to_Speech](https://www.xfyun.cn/doc/tts/online_tts/API.html#%E6%8E%A5%E5%8F%A3%E8%B0%83%E7%94%A8%E6%B5%81%E7%A8%8B)" + default: "xiaoyan" + app_id: + type: string + description: "Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts`" + default: "" + api_key: + type: string + description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`" + default: "" + api_secret: + type: string + description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`" + default: "" + responses: + '200': + description: "Base64-encoded .mp3 file data if successful, otherwise an empty string." + content: + application/json: + schema: + type: object + properties: + wav_data: + type: string + format: base64 + '400': + description: "Bad Request" + '500': + description: "Internal Server Error" + + /txt2img/openai: x-prerequisite: - - name: OPENAI_API_KEY - description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`" + configurations: + OPENAI_API_KEY: + type: string + description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`" + required: + allOf: + - OPENAI_API_KEY post: summary: "Convert Text to Base64-encoded Image Data Stream" operationId: openai_text_to_image.oas3_openai_text_to_image @@ -116,8 +194,13 @@ paths: description: "Internal Server Error" /txt2embedding/openai: x-prerequisite: - - name: OPENAI_API_KEY - description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`" + configurations: + OPENAI_API_KEY: + type: string + description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`" + required: + allOf: + - OPENAI_API_KEY post: summary: Text to embedding operationId: openai_text_to_embedding.oas3_openai_text_to_embedding @@ -158,8 +241,13 @@ paths: /txt2image/metagpt: x-prerequisite: - - name: METAGPT_TEXT_TO_IMAGE_MODEL_URL - description: "Model url." + configurations: + METAGPT_TEXT_TO_IMAGE_MODEL_URL: + type: string + description: "Model url." + required: + allOf: + - METAGPT_TEXT_TO_IMAGE_MODEL_URL post: summary: "Text to Image" description: "Generate an image from the provided text using the MetaGPT Text-to-Image API." diff --git a/.well-known/skills.yaml b/.well-known/skills.yaml index 009368dbe..c19a9501e 100644 --- a/.well-known/skills.yaml +++ b/.well-known/skills.yaml @@ -1,66 +1,161 @@ +skillapi: "0.1.0" + +info: + title: "Agent Skill Specification" + version: "1.0" + entities: Assistant: - skills: - - name: text_to_speech - description: Text-to-speech - id: text_to_speech.text_to_speech - x-prerequisite: - - name: AZURE_TTS_SUBSCRIPTION_KEY - description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" - - name: AZURE_TTS_REGION - description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" - arguments: - text: 'The text used for voice conversion. Required.' - lang: 'The value can contain a language code such as en (English), or a locale such as en-US (English - United States). The optional parameter are "English", "Chinese". Default value: "Chinese".' - voice: 'Default value: "zh-CN-XiaomoNeural".' - style: 'Speaking style to express different emotions like cheerfulness, empathy, and calm. The optional parameter values are "affectionate", "angry", "calm", "cheerful", "depressed", "disgruntled", "embarrassed", "envious", "fearful", "gentle", "sad", "serious". Default value: "affectionate".' - role: 'With roles, the same voice can act as a different age and gender. The optional parameter values are "Girl", "Boy", "OlderAdultFemale", "OlderAdultMale", "SeniorFemale", "SeniorMale", "YoungAdultFemale", "YoungAdultMale". Default value: "Girl".' - examples: - - ask: 'A girl says "hello world"' - answer: 'text_to_speech(text="hello world", role="Girl")' - - ask: 'A boy affectionate says "hello world"' - answer: 'text_to_speech(text="hello world", role="Boy", style="affectionate")' - - ask: 'A boy says "你好"' - answer: 'text_to_speech(text="hello world", role="Boy", lang="Chinese")' - returns: - type: string - format: base64 + summary: assistant + description: assistant + skills: + - name: text_to_speech + description: Generate a voice file from the input text, text-to-speech + id: text_to_speech.text_to_speech + x-prerequisite: + configurations: + AZURE_TTS_SUBSCRIPTION_KEY: + type: string + description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" + AZURE_TTS_REGION: + type: string + description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" + IFLYTEK_APP_ID: + type: string + description: "Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts`" + IFLYTEK_API_KEY: + type: string + description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`" + IFLYTEK_API_SECRET: + type: string + description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`" + required: + oneOf: + - allOf: + - AZURE_TTS_SUBSCRIPTION_KEY + - AZURE_TTS_REGION + - allOf: + - IFLYTEK_APP_ID + - IFLYTEK_API_KEY + - IFLYTEK_API_SECRET + parameters: + text: + description: 'The text used for voice conversion.' + required: true + type: string + lang: + description: 'The value can contain a language code such as en (English), or a locale such as en-US (English - United States).' + type: string + enum: + - English + - Chinese + default: Chinese + voice: + description: Name of voice styles + type: string + default: zh-CN-XiaomoNeural + style: + type: string + description: Speaking style to express different emotions like cheerfulness, empathy, and calm. + enum: + - affectionate + - angry + - calm + - cheerful + - depressed + - disgruntled + - embarrassed + - envious + - fearful + - gentle + - sad + - serious + default: affectionate + role: + type: string + description: With roles, the same voice can act as a different age and gender. + enum: + - Girl + - Boy + - OlderAdultFemale + - OlderAdultMale + - SeniorFemale + - SeniorMale + - YoungAdultFemale + - YoungAdultMale + default: Girl + examples: + - ask: 'A girl says "hello world"' + answer: 'text_to_speech(text="hello world", role="Girl")' + - ask: 'A boy affectionate says "hello world"' + answer: 'text_to_speech(text="hello world", role="Boy", style="affectionate")' + - ask: 'A boy says "你好"' + answer: 'text_to_speech(text="你好", role="Boy", lang="Chinese")' + returns: + type: string + format: base64 - - name: text_to_image - description: Create a drawing based on the text. - id: text_to_image.text_to_image - x-prerequisite: - - name: OPENAI_API_KEY - description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`" - - name: METAGPT_TEXT_TO_IMAGE_MODEL_URL - description: "Model url." - arguments: - text: 'The text used for image conversion. Required.' - size_type: 'Default value: "512x512".' - examples: - - ask: 'Draw a girl' - answer: 'text_to_image(text="Draw a girl", size_type="512x512")' - - ask: 'Draw an apple' - answer: 'text_to_image(text="Draw an apple", size_type="512x512")' - returns: - type: string - format: base64 + - name: text_to_image + description: Create a drawing based on the text. + id: text_to_image.text_to_image + x-prerequisite: + configurations: + OPENAI_API_KEY: + type: string + description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`" + METAGPT_TEXT_TO_IMAGE_MODEL_URL: + type: string + description: "Model url." + required: + oneOf: + - OPENAI_API_KEY + - METAGPT_TEXT_TO_IMAGE_MODEL_URL + parameters: + text: + description: 'The text used for image conversion.' + type: string + required: true + size_type: + description: size type + type: string + default: "512x512" + examples: + - ask: 'Draw a girl' + answer: 'text_to_image(text="Draw a girl", size_type="512x512")' + - ask: 'Draw an apple' + answer: 'text_to_image(text="Draw an apple", size_type="512x512")' + returns: + type: string + format: base64 - - name: web_search - description: Perform Google searches to provide real-time information. - id: web_search.web_search - x-prerequisite: - - name: SEARCH_ENGINE - description: "Supported values: serpapi/google/serper/ddg" - - name: SERPER_API_KEY - description: "SERPER API KEY, For more details, checkout: `https://serper.dev/api-key`" - arguments: - query: 'The search query. Required.' - max_results: 'The number of search results to retrieve. Default value: 6.' - examples: - - ask: 'Search for information about artificial intelligence' - answer: 'web_search(query="Search for information about artificial intelligence", max_results=6)' - - ask: 'Find news articles about climate change' - answer: 'web_search(query="Find news articles about climate change", max_results=6)' - returns: - type: string \ No newline at end of file + - name: web_search + description: Perform Google searches to provide real-time information. + id: web_search.web_search + x-prerequisite: + configurations: + SEARCH_ENGINE: + type: string + description: "Supported values: serpapi/google/serper/ddg" + SERPER_API_KEY: + type: string + description: "SERPER API KEY, For more details, checkout: `https://serper.dev/api-key`" + required: + allOf: + - SEARCH_ENGINE + - SERPER_API_KEY + parameters: + query: + type: string + description: 'The search query.' + required: true + max_results: + type: number + default: 6 + description: 'The number of search results to retrieve.' + examples: + - ask: 'Search for information about artificial intelligence' + answer: 'web_search(query="Search for information about artificial intelligence", max_results=6)' + - ask: 'Find news articles about climate change' + answer: 'web_search(query="Find news articles about climate change", max_results=6)' + returns: + type: string diff --git a/metagpt/actions/talk_action.py b/metagpt/actions/talk_action.py index eb619cb7e..3695ec5bb 100644 --- a/metagpt/actions/talk_action.py +++ b/metagpt/actions/talk_action.py @@ -50,7 +50,7 @@ class TalkAction(Action): return prompt @property - def prompt_bad(self): + def prompt_gpt4(self): kvs = { "{role}": CONFIG.agent_description or "", "{history}": self._history_summary or "", diff --git a/metagpt/learn/skill_loader.py b/metagpt/learn/skill_loader.py index 83200bca6..dff5e26ae 100644 --- a/metagpt/learn/skill_loader.py +++ b/metagpt/learn/skill_loader.py @@ -25,29 +25,43 @@ class Returns(BaseModel): format: Optional[str] = None -class Prerequisite(BaseModel): - name: str - type: Optional[str] = None - description: Optional[str] = None - default: Optional[str] = None +class Parameter(BaseModel): + type: str + description: str = None class Skill(BaseModel): name: str - description: str - id: str - x_prerequisite: Optional[List[Prerequisite]] = Field(default=None, alias="x-prerequisite") - arguments: Dict + description: str = None + id: str = None + x_prerequisite: Dict = Field(default=None, alias="x-prerequisite") + parameters: Dict[str, Parameter] = None examples: List[Example] returns: Returns + @property + def arguments(self) -> Dict: + if not self.parameters: + return {} + ret = {} + for k, v in self.parameters.items(): + ret[k] = v.description if v.description else "" + return ret -class EntitySkills(BaseModel): + +class Entity(BaseModel): + name: str = None skills: List[Skill] +class Components(BaseModel): + pass + + class SkillsDeclaration(BaseModel): - entities: Dict[str, EntitySkills] + skillapi: str + entities: Dict[str, Entity] + components: Components = None class SkillLoader: @@ -60,8 +74,8 @@ class SkillLoader: def get_skill_list(self, entity_name: str = "Assistant") -> Dict: """Return the skill name based on the skill description.""" - entity_skills = self.get_entity(entity_name) - if not entity_skills: + entity = self.get_entity(entity_name) + if not entity: return {} agent_skills = CONFIG.agent_skills @@ -73,7 +87,7 @@ class SkillLoader: names = [AgentSkill(**i).name for i in agent_skills] description_to_name_mappings = {} - for s in entity_skills.skills: + for s in entity.skills: if s.name not in names: continue description_to_name_mappings[s.description] = s.name @@ -89,8 +103,21 @@ class SkillLoader: if sk.name == name: return sk - def get_entity(self, name) -> EntitySkills: + def get_entity(self, name) -> Entity: """Return a list of skills for the entity.""" if not self._skills: return None return self._skills.entities.get(name) + + +if __name__ == "__main__": + CONFIG.agent_skills = [ + {"id": 1, "name": "text_to_speech", "type": "builtin", "config": {}, "enabled": True}, + {"id": 2, "name": "text_to_image", "type": "builtin", "config": {}, "enabled": True}, + {"id": 3, "name": "ai_call", "type": "builtin", "config": {}, "enabled": True}, + {"id": 3, "name": "data_analysis", "type": "builtin", "config": {}, "enabled": True}, + {"id": 5, "name": "crawler", "type": "builtin", "config": {"engine": "ddg"}, "enabled": True}, + {"id": 6, "name": "knowledge", "type": "builtin", "config": {}, "enabled": True}, + ] + loader = SkillLoader() + print(loader.get_skill_list()) diff --git a/metagpt/learn/text_to_speech.py b/metagpt/learn/text_to_speech.py index 81bc8512b..7c085c02f 100644 --- a/metagpt/learn/text_to_speech.py +++ b/metagpt/learn/text_to_speech.py @@ -11,6 +11,7 @@ import openai from metagpt.config import CONFIG from metagpt.const import BASE64_FORMAT from metagpt.tools.azure_tts import oas3_azsure_tts +from metagpt.tools.iflytek_tts import oas3_iflytek_tts from metagpt.utils.s3 import S3 @@ -22,6 +23,9 @@ async def text_to_speech( role="Girl", subscription_key="", region="", + iflytek_app_id="", + iflytek_api_key="", + iflytek_api_secret="", **kwargs, ): """Text to speech @@ -34,16 +38,35 @@ async def text_to_speech( :param text: The text used for voice conversion. :param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint` :param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API. - :return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string. + :param iflytek_app_id: Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts` + :param iflytek_api_key: WebAPI argument, see: `https://console.xfyun.cn/services/tts` + :param iflytek_api_secret: WebAPI argument, see: `https://console.xfyun.cn/services/tts` + :return: Returns the Base64-encoded .wav/.mp3 file data if successful, otherwise an empty string. """ - audio_declaration = "data:audio/wav;base64," + if (CONFIG.AZURE_TTS_SUBSCRIPTION_KEY and CONFIG.AZURE_TTS_REGION) or (subscription_key and region): + audio_declaration = "data:audio/wav;base64," base64_data = await oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region) s3 = S3() url = await s3.cache(data=base64_data, file_ext=".wav", format=BASE64_FORMAT) if url: return f"[{text}]({url})" return audio_declaration + base64_data if base64_data else base64_data + if (CONFIG.IFLYTEK_APP_ID and CONFIG.IFLYTEK_API_KEY and CONFIG.IFLYTEK_API_SECRET) or ( + iflytek_app_id and iflytek_api_key and iflytek_api_secret + ): + audio_declaration = "data:audio/mp3;base64," + base64_data = await oas3_iflytek_tts( + text=text, app_id=iflytek_app_id, api_key=iflytek_api_key, api_secret=iflytek_api_secret + ) + s3 = S3() + url = await s3.cache(data=base64_data, file_ext=".mp3", format=BASE64_FORMAT) + if url: + return f"[{text}]({url})" + return audio_declaration + base64_data if base64_data else base64_data - raise openai.error.InvalidRequestError(message="AZURE_TTS_SUBSCRIPTION_KEY and AZURE_TTS_REGION error", param={}) + raise openai.error.InvalidRequestError( + message="AZURE_TTS_SUBSCRIPTION_KEY, AZURE_TTS_REGION, IFLYTEK_APP_ID, IFLYTEK_API_KEY, IFLYTEK_API_SECRET error", + param={}, + ) diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 7fc8b867a..953043912 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -228,4 +228,4 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter): from metagpt.memory.brain_memory import BrainMemory memory = BrainMemory(llm_type=LLMType.OPENAI.value, historical_summary=text, cacheable=False) - return await memory.summarize(llm=self, max_length=max_words, keep_language=keep_language) + return await memory.summarize(llm=self, max_words=max_words, keep_language=keep_language) diff --git a/metagpt/tools/iflytek_tts.py b/metagpt/tools/iflytek_tts.py new file mode 100644 index 000000000..cb87d2e7f --- /dev/null +++ b/metagpt/tools/iflytek_tts.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/8/17 +@Author : mashenquan +@File : iflytek_tts.py +@Desc : iFLYTEK TTS OAS3 api, which provides text-to-speech functionality +""" +import asyncio +import base64 +import hashlib +import hmac +import json +import uuid +from datetime import datetime +from enum import Enum +from pathlib import Path +from time import mktime +from typing import Optional +from urllib.parse import urlencode +from wsgiref.handlers import format_date_time + +import aiofiles +import websockets as websockets +from pydantic import BaseModel + +from metagpt.config import CONFIG +from metagpt.logs import logger + + +class IFlyTekTTSStatus(Enum): + STATUS_FIRST_FRAME = 0 # The first frame + STATUS_CONTINUE_FRAME = 1 # The intermediate frame + STATUS_LAST_FRAME = 2 # The last frame + + +class AudioData(BaseModel): + audio: str + status: int + ced: str + + +class IFlyTekTTSResponse(BaseModel): + code: int + message: str + data: Optional[AudioData] = None + sid: str + + +DEFAULT_IFLYTEK_VOICE = "xiaoyan" + + +class IFlyTekTTS(object): + def __init__(self, app_id: str, api_key: str, api_secret: str): + """ + :param app_id: Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts` + :param api_key: WebAPI argument, see: `https://console.xfyun.cn/services/tts` + :param api_secret: WebAPI argument, see: `https://console.xfyun.cn/services/tts` + """ + self.app_id = app_id or CONFIG.IFLYTEK_APP_ID + self.api_key = api_key or CONFIG.IFLYTEK_API_KEY + self.api_secret = api_secret or CONFIG.API_SECRET + + async def synthesize_speech(self, text, output_file: str, voice=DEFAULT_IFLYTEK_VOICE): + url = self._create_url() + data = { + "common": {"app_id": self.app_id}, + "business": {"aue": "lame", "sfl": 1, "auf": "audio/L16;rate=16000", "vcn": voice, "tte": "utf8"}, + "data": {"status": 2, "text": str(base64.b64encode(text.encode("utf-8")), "UTF8")}, + } + req = json.dumps(data) + async with websockets.connect(url) as websocket: + # send request + await websocket.send(req) + + # receive frames + async with aiofiles.open(str(output_file), "w") as writer: + while True: + v = await websocket.recv() + rsp = IFlyTekTTSResponse(**json.loads(v)) + if rsp.data: + await writer.write(rsp.data.audio) + if rsp.data.status != IFlyTekTTSStatus.STATUS_LAST_FRAME.value: + continue + break + + def _create_url(self): + """Create request url""" + url = "wss://tts-api.xfyun.cn/v2/tts" + # Generate a timestamp in RFC1123 format + now = datetime.now() + date = format_date_time(mktime(now.timetuple())) + + signature_origin = "host: " + "ws-api.xfyun.cn" + "\n" + signature_origin += "date: " + date + "\n" + signature_origin += "GET " + "/v2/tts " + "HTTP/1.1" + # Perform HMAC-SHA256 encryption + signature_sha = hmac.new( + self.api_secret.encode("utf-8"), signature_origin.encode("utf-8"), digestmod=hashlib.sha256 + ).digest() + signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8") + + authorization_origin = 'api_key="%s", algorithm="%s", headers="%s", signature="%s"' % ( + self.api_key, + "hmac-sha256", + "host date request-line", + signature_sha, + ) + authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode(encoding="utf-8") + # Combine the authentication parameters of the request into a dictionary. + v = {"authorization": authorization, "date": date, "host": "ws-api.xfyun.cn"} + # Concatenate the authentication parameters to generate the URL. + url = url + "?" + urlencode(v) + return url + + +# Export +async def oas3_iflytek_tts(text: str, voice: str = "", app_id: str = "", api_key: str = "", api_secret: str = ""): + """Text to speech + For more details, check out:`https://www.xfyun.cn/doc/tts/online_tts/API.html` + + :param voice: Default `xiaoyan`. For more details, checkout: `https://www.xfyun.cn/doc/tts/online_tts/API.html#%E6%8E%A5%E5%8F%A3%E8%B0%83%E7%94%A8%E6%B5%81%E7%A8%8B` + :param text: The text used for voice conversion. + :param app_id: Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts` + :param api_key: WebAPI argument, see: `https://console.xfyun.cn/services/tts` + :param api_secret: WebAPI argument, see: `https://console.xfyun.cn/services/tts` + :return: Returns the Base64-encoded .mp3 file data if successful, otherwise an empty string. + + """ + if not app_id: + app_id = CONFIG.IFLYTEK_APP_ID + if not api_key: + api_key = CONFIG.IFLYTEK_API_KEY + if not api_secret: + api_secret = CONFIG.IFLYTEK_API_SECRET + if not voice: + voice = CONFIG.IFLYTEK_VOICE or DEFAULT_IFLYTEK_VOICE + + filename = Path(__file__).parent / (uuid.uuid4().hex + ".mp3") + try: + tts = IFlyTekTTS(app_id=app_id, api_key=api_key, api_secret=api_secret) + await tts.synthesize_speech(text=text, output_file=str(filename), voice=voice) + async with aiofiles.open(str(filename), mode="r") as reader: + base64_string = await reader.read() + except Exception as e: + logger.error(f"text:{text}, error:{e}") + base64_string = "" + finally: + filename.unlink() + + return base64_string + + +if __name__ == "__main__": + asyncio.get_event_loop().run_until_complete( + oas3_iflytek_tts( + text="你好,hello", + app_id="f7acef62", + api_key="fda72e3aa286042a492525816a5efa08", + api_secret="ZDk3NjdiMDBkODJlOWQ1NjRjMGI2NDY4", + ) + ) diff --git a/metagpt/tools/metagpt_oas3_api_svc.py b/metagpt/tools/metagpt_oas3_api_svc.py index 5c23f6566..2ff4c8225 100644 --- a/metagpt/tools/metagpt_oas3_api_svc.py +++ b/metagpt/tools/metagpt_oas3_api_svc.py @@ -7,8 +7,8 @@ @Desc : MetaGPT OpenAPI Specification 3.0 REST API service """ import asyncio -from pathlib import Path import sys +from pathlib import Path import connexion @@ -17,7 +17,7 @@ sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug: def oas_http_svc(): """Start the OAS 3.0 OpenAPI HTTP service""" - app = connexion.AioHttpApp(__name__, specification_dir='../../.well-known/') + app = connexion.AioHttpApp(__name__, specification_dir="../../.well-known/") app.add_api("metagpt_oas3_api.yaml") app.add_api("openapi.yaml") app.run(port=8080) @@ -35,6 +35,7 @@ async def async_main(): def main(): + print("http://localhost:8080/oas3/ui/") oas_http_svc() diff --git a/metagpt/utils/s3.py b/metagpt/utils/s3.py index 96b457972..dde68f720 100644 --- a/metagpt/utils/s3.py +++ b/metagpt/utils/s3.py @@ -132,7 +132,7 @@ class S3: async def cache(self, data: str, file_ext: str, format: str = "") -> str: """Save data to remote S3 and return url""" - object_name = str(uuid.uuid4()).replace("-", "") + file_ext + object_name = uuid.uuid4().hex + file_ext path = Path(__file__).parent pathname = path / object_name try: diff --git a/requirements.txt b/requirements.txt index 588b29e0b..3b2dc3106 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,4 +42,5 @@ connexion[swagger-ui] aiohttp_jinja2 azure-cognitiveservices-speech==1.31.0 aioboto3~=11.3.0 -redis==4.3.5 \ No newline at end of file +redis==4.3.5 +websocket-client==1.6.2 \ No newline at end of file diff --git a/tests/metagpt/learn/test_skill_loader.py b/tests/metagpt/learn/test_skill_loader.py new file mode 100644 index 000000000..5bc0e776f --- /dev/null +++ b/tests/metagpt/learn/test_skill_loader.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +@Time : 2023/9/19 +@Author : mashenquan +@File : test_skill_loader.py +@Desc : Unit tests. +""" + +from metagpt.config import CONFIG +from metagpt.learn.skill_loader import SkillLoader + + +def test_suite(): + CONFIG.agent_skills = [ + {"id": 1, "name": "text_to_speech", "type": "builtin", "config": {}, "enabled": True}, + {"id": 2, "name": "text_to_image", "type": "builtin", "config": {}, "enabled": True}, + {"id": 3, "name": "ai_call", "type": "builtin", "config": {}, "enabled": True}, + {"id": 3, "name": "data_analysis", "type": "builtin", "config": {}, "enabled": True}, + {"id": 5, "name": "crawler", "type": "builtin", "config": {"engine": "ddg"}, "enabled": True}, + {"id": 6, "name": "knowledge", "type": "builtin", "config": {}, "enabled": True}, + {"id": 6, "name": "web_search", "type": "builtin", "config": {}, "enabled": True}, + ] + loader = SkillLoader() + skills = loader.get_skill_list() + assert skills + assert len(skills) >= 3 + for desc, name in skills.items(): + assert desc + assert name + + entity = loader.get_entity("Assistant") + assert entity + assert entity.skills + for sk in entity.skills: + assert sk + assert sk.arguments + + +if __name__ == "__main__": + test_suite()