mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-27 14:25:20 +02:00
Merge pull request #48 from iorisa/feature/talk_prompt
feat: +科大讯飞TTS; update skill spec
This commit is contained in:
commit
2e19f79f47
11 changed files with 534 additions and 96 deletions
|
|
@ -13,10 +13,17 @@ servers:
|
|||
paths:
|
||||
/tts/azsure:
|
||||
x-prerequisite:
|
||||
- name: AZURE_TTS_SUBSCRIPTION_KEY
|
||||
description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
|
||||
- name: AZURE_TTS_REGION
|
||||
description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
|
||||
configurations:
|
||||
AZURE_TTS_SUBSCRIPTION_KEY:
|
||||
type: string
|
||||
description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
|
||||
AZURE_TTS_REGION:
|
||||
type: string
|
||||
description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
|
||||
required:
|
||||
allOf:
|
||||
- AZURE_TTS_SUBSCRIPTION_KEY
|
||||
- AZURE_TTS_REGION
|
||||
post:
|
||||
summary: "Convert Text to Base64-encoded .wav File Stream"
|
||||
description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
|
||||
|
|
@ -73,10 +80,81 @@ paths:
|
|||
'500':
|
||||
description: "Internal Server Error"
|
||||
|
||||
/tts/iflytek:
|
||||
x-prerequisite:
|
||||
configurations:
|
||||
IFLYTEK_APP_ID:
|
||||
type: string
|
||||
description: "Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts`"
|
||||
IFLYTEK_API_KEY:
|
||||
type: string
|
||||
description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`"
|
||||
IFLYTEK_API_SECRET:
|
||||
type: string
|
||||
description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`"
|
||||
required:
|
||||
allOf:
|
||||
- IFLYTEK_APP_ID
|
||||
- IFLYTEK_API_KEY
|
||||
- IFLYTEK_API_SECRET
|
||||
post:
|
||||
summary: "Convert Text to Base64-encoded .mp3 File Stream"
|
||||
description: "For more details, check out: [iFlyTek](https://console.xfyun.cn/services/tts)"
|
||||
operationId: iflytek_tts.oas3_iflytek_tts
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
required:
|
||||
- text
|
||||
properties:
|
||||
text:
|
||||
type: string
|
||||
description: Text to convert
|
||||
voice:
|
||||
type: string
|
||||
description: "Voice style, see: [iFlyTek Text-to_Speech](https://www.xfyun.cn/doc/tts/online_tts/API.html#%E6%8E%A5%E5%8F%A3%E8%B0%83%E7%94%A8%E6%B5%81%E7%A8%8B)"
|
||||
default: "xiaoyan"
|
||||
app_id:
|
||||
type: string
|
||||
description: "Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts`"
|
||||
default: ""
|
||||
api_key:
|
||||
type: string
|
||||
description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`"
|
||||
default: ""
|
||||
api_secret:
|
||||
type: string
|
||||
description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`"
|
||||
default: ""
|
||||
responses:
|
||||
'200':
|
||||
description: "Base64-encoded .mp3 file data if successful, otherwise an empty string."
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
wav_data:
|
||||
type: string
|
||||
format: base64
|
||||
'400':
|
||||
description: "Bad Request"
|
||||
'500':
|
||||
description: "Internal Server Error"
|
||||
|
||||
|
||||
/txt2img/openai:
|
||||
x-prerequisite:
|
||||
- name: OPENAI_API_KEY
|
||||
description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`"
|
||||
configurations:
|
||||
OPENAI_API_KEY:
|
||||
type: string
|
||||
description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`"
|
||||
required:
|
||||
allOf:
|
||||
- OPENAI_API_KEY
|
||||
post:
|
||||
summary: "Convert Text to Base64-encoded Image Data Stream"
|
||||
operationId: openai_text_to_image.oas3_openai_text_to_image
|
||||
|
|
@ -116,8 +194,13 @@ paths:
|
|||
description: "Internal Server Error"
|
||||
/txt2embedding/openai:
|
||||
x-prerequisite:
|
||||
- name: OPENAI_API_KEY
|
||||
description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`"
|
||||
configurations:
|
||||
OPENAI_API_KEY:
|
||||
type: string
|
||||
description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`"
|
||||
required:
|
||||
allOf:
|
||||
- OPENAI_API_KEY
|
||||
post:
|
||||
summary: Text to embedding
|
||||
operationId: openai_text_to_embedding.oas3_openai_text_to_embedding
|
||||
|
|
@ -158,8 +241,13 @@ paths:
|
|||
|
||||
/txt2image/metagpt:
|
||||
x-prerequisite:
|
||||
- name: METAGPT_TEXT_TO_IMAGE_MODEL_URL
|
||||
description: "Model url."
|
||||
configurations:
|
||||
METAGPT_TEXT_TO_IMAGE_MODEL_URL:
|
||||
type: string
|
||||
description: "Model url."
|
||||
required:
|
||||
allOf:
|
||||
- METAGPT_TEXT_TO_IMAGE_MODEL_URL
|
||||
post:
|
||||
summary: "Text to Image"
|
||||
description: "Generate an image from the provided text using the MetaGPT Text-to-Image API."
|
||||
|
|
|
|||
|
|
@ -1,66 +1,161 @@
|
|||
skillapi: "0.1.0"
|
||||
|
||||
info:
|
||||
title: "Agent Skill Specification"
|
||||
version: "1.0"
|
||||
|
||||
entities:
|
||||
Assistant:
|
||||
skills:
|
||||
- name: text_to_speech
|
||||
description: Text-to-speech
|
||||
id: text_to_speech.text_to_speech
|
||||
x-prerequisite:
|
||||
- name: AZURE_TTS_SUBSCRIPTION_KEY
|
||||
description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
|
||||
- name: AZURE_TTS_REGION
|
||||
description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
|
||||
arguments:
|
||||
text: 'The text used for voice conversion. Required.'
|
||||
lang: 'The value can contain a language code such as en (English), or a locale such as en-US (English - United States). The optional parameter are "English", "Chinese". Default value: "Chinese".'
|
||||
voice: 'Default value: "zh-CN-XiaomoNeural".'
|
||||
style: 'Speaking style to express different emotions like cheerfulness, empathy, and calm. The optional parameter values are "affectionate", "angry", "calm", "cheerful", "depressed", "disgruntled", "embarrassed", "envious", "fearful", "gentle", "sad", "serious". Default value: "affectionate".'
|
||||
role: 'With roles, the same voice can act as a different age and gender. The optional parameter values are "Girl", "Boy", "OlderAdultFemale", "OlderAdultMale", "SeniorFemale", "SeniorMale", "YoungAdultFemale", "YoungAdultMale". Default value: "Girl".'
|
||||
examples:
|
||||
- ask: 'A girl says "hello world"'
|
||||
answer: 'text_to_speech(text="hello world", role="Girl")'
|
||||
- ask: 'A boy affectionate says "hello world"'
|
||||
answer: 'text_to_speech(text="hello world", role="Boy", style="affectionate")'
|
||||
- ask: 'A boy says "你好"'
|
||||
answer: 'text_to_speech(text="hello world", role="Boy", lang="Chinese")'
|
||||
returns:
|
||||
type: string
|
||||
format: base64
|
||||
summary: assistant
|
||||
description: assistant
|
||||
skills:
|
||||
- name: text_to_speech
|
||||
description: Generate a voice file from the input text, text-to-speech
|
||||
id: text_to_speech.text_to_speech
|
||||
x-prerequisite:
|
||||
configurations:
|
||||
AZURE_TTS_SUBSCRIPTION_KEY:
|
||||
type: string
|
||||
description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
|
||||
AZURE_TTS_REGION:
|
||||
type: string
|
||||
description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
|
||||
IFLYTEK_APP_ID:
|
||||
type: string
|
||||
description: "Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts`"
|
||||
IFLYTEK_API_KEY:
|
||||
type: string
|
||||
description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`"
|
||||
IFLYTEK_API_SECRET:
|
||||
type: string
|
||||
description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`"
|
||||
required:
|
||||
oneOf:
|
||||
- allOf:
|
||||
- AZURE_TTS_SUBSCRIPTION_KEY
|
||||
- AZURE_TTS_REGION
|
||||
- allOf:
|
||||
- IFLYTEK_APP_ID
|
||||
- IFLYTEK_API_KEY
|
||||
- IFLYTEK_API_SECRET
|
||||
parameters:
|
||||
text:
|
||||
description: 'The text used for voice conversion.'
|
||||
required: true
|
||||
type: string
|
||||
lang:
|
||||
description: 'The value can contain a language code such as en (English), or a locale such as en-US (English - United States).'
|
||||
type: string
|
||||
enum:
|
||||
- English
|
||||
- Chinese
|
||||
default: Chinese
|
||||
voice:
|
||||
description: Name of voice styles
|
||||
type: string
|
||||
default: zh-CN-XiaomoNeural
|
||||
style:
|
||||
type: string
|
||||
description: Speaking style to express different emotions like cheerfulness, empathy, and calm.
|
||||
enum:
|
||||
- affectionate
|
||||
- angry
|
||||
- calm
|
||||
- cheerful
|
||||
- depressed
|
||||
- disgruntled
|
||||
- embarrassed
|
||||
- envious
|
||||
- fearful
|
||||
- gentle
|
||||
- sad
|
||||
- serious
|
||||
default: affectionate
|
||||
role:
|
||||
type: string
|
||||
description: With roles, the same voice can act as a different age and gender.
|
||||
enum:
|
||||
- Girl
|
||||
- Boy
|
||||
- OlderAdultFemale
|
||||
- OlderAdultMale
|
||||
- SeniorFemale
|
||||
- SeniorMale
|
||||
- YoungAdultFemale
|
||||
- YoungAdultMale
|
||||
default: Girl
|
||||
examples:
|
||||
- ask: 'A girl says "hello world"'
|
||||
answer: 'text_to_speech(text="hello world", role="Girl")'
|
||||
- ask: 'A boy affectionate says "hello world"'
|
||||
answer: 'text_to_speech(text="hello world", role="Boy", style="affectionate")'
|
||||
- ask: 'A boy says "你好"'
|
||||
answer: 'text_to_speech(text="你好", role="Boy", lang="Chinese")'
|
||||
returns:
|
||||
type: string
|
||||
format: base64
|
||||
|
||||
- name: text_to_image
|
||||
description: Create a drawing based on the text.
|
||||
id: text_to_image.text_to_image
|
||||
x-prerequisite:
|
||||
- name: OPENAI_API_KEY
|
||||
description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`"
|
||||
- name: METAGPT_TEXT_TO_IMAGE_MODEL_URL
|
||||
description: "Model url."
|
||||
arguments:
|
||||
text: 'The text used for image conversion. Required.'
|
||||
size_type: 'Default value: "512x512".'
|
||||
examples:
|
||||
- ask: 'Draw a girl'
|
||||
answer: 'text_to_image(text="Draw a girl", size_type="512x512")'
|
||||
- ask: 'Draw an apple'
|
||||
answer: 'text_to_image(text="Draw an apple", size_type="512x512")'
|
||||
returns:
|
||||
type: string
|
||||
format: base64
|
||||
- name: text_to_image
|
||||
description: Create a drawing based on the text.
|
||||
id: text_to_image.text_to_image
|
||||
x-prerequisite:
|
||||
configurations:
|
||||
OPENAI_API_KEY:
|
||||
type: string
|
||||
description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`"
|
||||
METAGPT_TEXT_TO_IMAGE_MODEL_URL:
|
||||
type: string
|
||||
description: "Model url."
|
||||
required:
|
||||
oneOf:
|
||||
- OPENAI_API_KEY
|
||||
- METAGPT_TEXT_TO_IMAGE_MODEL_URL
|
||||
parameters:
|
||||
text:
|
||||
description: 'The text used for image conversion.'
|
||||
type: string
|
||||
required: true
|
||||
size_type:
|
||||
description: size type
|
||||
type: string
|
||||
default: "512x512"
|
||||
examples:
|
||||
- ask: 'Draw a girl'
|
||||
answer: 'text_to_image(text="Draw a girl", size_type="512x512")'
|
||||
- ask: 'Draw an apple'
|
||||
answer: 'text_to_image(text="Draw an apple", size_type="512x512")'
|
||||
returns:
|
||||
type: string
|
||||
format: base64
|
||||
|
||||
- name: web_search
|
||||
description: Perform Google searches to provide real-time information.
|
||||
id: web_search.web_search
|
||||
x-prerequisite:
|
||||
- name: SEARCH_ENGINE
|
||||
description: "Supported values: serpapi/google/serper/ddg"
|
||||
- name: SERPER_API_KEY
|
||||
description: "SERPER API KEY, For more details, checkout: `https://serper.dev/api-key`"
|
||||
arguments:
|
||||
query: 'The search query. Required.'
|
||||
max_results: 'The number of search results to retrieve. Default value: 6.'
|
||||
examples:
|
||||
- ask: 'Search for information about artificial intelligence'
|
||||
answer: 'web_search(query="Search for information about artificial intelligence", max_results=6)'
|
||||
- ask: 'Find news articles about climate change'
|
||||
answer: 'web_search(query="Find news articles about climate change", max_results=6)'
|
||||
returns:
|
||||
type: string
|
||||
- name: web_search
|
||||
description: Perform Google searches to provide real-time information.
|
||||
id: web_search.web_search
|
||||
x-prerequisite:
|
||||
configurations:
|
||||
SEARCH_ENGINE:
|
||||
type: string
|
||||
description: "Supported values: serpapi/google/serper/ddg"
|
||||
SERPER_API_KEY:
|
||||
type: string
|
||||
description: "SERPER API KEY, For more details, checkout: `https://serper.dev/api-key`"
|
||||
required:
|
||||
allOf:
|
||||
- SEARCH_ENGINE
|
||||
- SERPER_API_KEY
|
||||
parameters:
|
||||
query:
|
||||
type: string
|
||||
description: 'The search query.'
|
||||
required: true
|
||||
max_results:
|
||||
type: number
|
||||
default: 6
|
||||
description: 'The number of search results to retrieve.'
|
||||
examples:
|
||||
- ask: 'Search for information about artificial intelligence'
|
||||
answer: 'web_search(query="Search for information about artificial intelligence", max_results=6)'
|
||||
- ask: 'Find news articles about climate change'
|
||||
answer: 'web_search(query="Find news articles about climate change", max_results=6)'
|
||||
returns:
|
||||
type: string
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ class TalkAction(Action):
|
|||
return prompt
|
||||
|
||||
@property
|
||||
def prompt_bad(self):
|
||||
def prompt_gpt4(self):
|
||||
kvs = {
|
||||
"{role}": CONFIG.agent_description or "",
|
||||
"{history}": self._history_summary or "",
|
||||
|
|
|
|||
|
|
@ -25,29 +25,43 @@ class Returns(BaseModel):
|
|||
format: Optional[str] = None
|
||||
|
||||
|
||||
class Prerequisite(BaseModel):
|
||||
name: str
|
||||
type: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
default: Optional[str] = None
|
||||
class Parameter(BaseModel):
|
||||
type: str
|
||||
description: str = None
|
||||
|
||||
|
||||
class Skill(BaseModel):
|
||||
name: str
|
||||
description: str
|
||||
id: str
|
||||
x_prerequisite: Optional[List[Prerequisite]] = Field(default=None, alias="x-prerequisite")
|
||||
arguments: Dict
|
||||
description: str = None
|
||||
id: str = None
|
||||
x_prerequisite: Dict = Field(default=None, alias="x-prerequisite")
|
||||
parameters: Dict[str, Parameter] = None
|
||||
examples: List[Example]
|
||||
returns: Returns
|
||||
|
||||
@property
|
||||
def arguments(self) -> Dict:
|
||||
if not self.parameters:
|
||||
return {}
|
||||
ret = {}
|
||||
for k, v in self.parameters.items():
|
||||
ret[k] = v.description if v.description else ""
|
||||
return ret
|
||||
|
||||
class EntitySkills(BaseModel):
|
||||
|
||||
class Entity(BaseModel):
|
||||
name: str = None
|
||||
skills: List[Skill]
|
||||
|
||||
|
||||
class Components(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
class SkillsDeclaration(BaseModel):
|
||||
entities: Dict[str, EntitySkills]
|
||||
skillapi: str
|
||||
entities: Dict[str, Entity]
|
||||
components: Components = None
|
||||
|
||||
|
||||
class SkillLoader:
|
||||
|
|
@ -60,8 +74,8 @@ class SkillLoader:
|
|||
|
||||
def get_skill_list(self, entity_name: str = "Assistant") -> Dict:
|
||||
"""Return the skill name based on the skill description."""
|
||||
entity_skills = self.get_entity(entity_name)
|
||||
if not entity_skills:
|
||||
entity = self.get_entity(entity_name)
|
||||
if not entity:
|
||||
return {}
|
||||
|
||||
agent_skills = CONFIG.agent_skills
|
||||
|
|
@ -73,7 +87,7 @@ class SkillLoader:
|
|||
|
||||
names = [AgentSkill(**i).name for i in agent_skills]
|
||||
description_to_name_mappings = {}
|
||||
for s in entity_skills.skills:
|
||||
for s in entity.skills:
|
||||
if s.name not in names:
|
||||
continue
|
||||
description_to_name_mappings[s.description] = s.name
|
||||
|
|
@ -89,8 +103,21 @@ class SkillLoader:
|
|||
if sk.name == name:
|
||||
return sk
|
||||
|
||||
def get_entity(self, name) -> EntitySkills:
|
||||
def get_entity(self, name) -> Entity:
|
||||
"""Return a list of skills for the entity."""
|
||||
if not self._skills:
|
||||
return None
|
||||
return self._skills.entities.get(name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
CONFIG.agent_skills = [
|
||||
{"id": 1, "name": "text_to_speech", "type": "builtin", "config": {}, "enabled": True},
|
||||
{"id": 2, "name": "text_to_image", "type": "builtin", "config": {}, "enabled": True},
|
||||
{"id": 3, "name": "ai_call", "type": "builtin", "config": {}, "enabled": True},
|
||||
{"id": 3, "name": "data_analysis", "type": "builtin", "config": {}, "enabled": True},
|
||||
{"id": 5, "name": "crawler", "type": "builtin", "config": {"engine": "ddg"}, "enabled": True},
|
||||
{"id": 6, "name": "knowledge", "type": "builtin", "config": {}, "enabled": True},
|
||||
]
|
||||
loader = SkillLoader()
|
||||
print(loader.get_skill_list())
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import openai
|
|||
from metagpt.config import CONFIG
|
||||
from metagpt.const import BASE64_FORMAT
|
||||
from metagpt.tools.azure_tts import oas3_azsure_tts
|
||||
from metagpt.tools.iflytek_tts import oas3_iflytek_tts
|
||||
from metagpt.utils.s3 import S3
|
||||
|
||||
|
||||
|
|
@ -22,6 +23,9 @@ async def text_to_speech(
|
|||
role="Girl",
|
||||
subscription_key="",
|
||||
region="",
|
||||
iflytek_app_id="",
|
||||
iflytek_api_key="",
|
||||
iflytek_api_secret="",
|
||||
**kwargs,
|
||||
):
|
||||
"""Text to speech
|
||||
|
|
@ -34,16 +38,35 @@ async def text_to_speech(
|
|||
:param text: The text used for voice conversion.
|
||||
:param subscription_key: key is used to access your Azure AI service API, see: `https://portal.azure.com/` > `Resource Management` > `Keys and Endpoint`
|
||||
:param region: This is the location (or region) of your resource. You may need to use this field when making calls to this API.
|
||||
:return: Returns the Base64-encoded .wav file data if successful, otherwise an empty string.
|
||||
:param iflytek_app_id: Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts`
|
||||
:param iflytek_api_key: WebAPI argument, see: `https://console.xfyun.cn/services/tts`
|
||||
:param iflytek_api_secret: WebAPI argument, see: `https://console.xfyun.cn/services/tts`
|
||||
:return: Returns the Base64-encoded .wav/.mp3 file data if successful, otherwise an empty string.
|
||||
|
||||
"""
|
||||
audio_declaration = "data:audio/wav;base64,"
|
||||
|
||||
if (CONFIG.AZURE_TTS_SUBSCRIPTION_KEY and CONFIG.AZURE_TTS_REGION) or (subscription_key and region):
|
||||
audio_declaration = "data:audio/wav;base64,"
|
||||
base64_data = await oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region)
|
||||
s3 = S3()
|
||||
url = await s3.cache(data=base64_data, file_ext=".wav", format=BASE64_FORMAT)
|
||||
if url:
|
||||
return f"[{text}]({url})"
|
||||
return audio_declaration + base64_data if base64_data else base64_data
|
||||
if (CONFIG.IFLYTEK_APP_ID and CONFIG.IFLYTEK_API_KEY and CONFIG.IFLYTEK_API_SECRET) or (
|
||||
iflytek_app_id and iflytek_api_key and iflytek_api_secret
|
||||
):
|
||||
audio_declaration = "data:audio/mp3;base64,"
|
||||
base64_data = await oas3_iflytek_tts(
|
||||
text=text, app_id=iflytek_app_id, api_key=iflytek_api_key, api_secret=iflytek_api_secret
|
||||
)
|
||||
s3 = S3()
|
||||
url = await s3.cache(data=base64_data, file_ext=".mp3", format=BASE64_FORMAT)
|
||||
if url:
|
||||
return f"[{text}]({url})"
|
||||
return audio_declaration + base64_data if base64_data else base64_data
|
||||
|
||||
raise openai.error.InvalidRequestError(message="AZURE_TTS_SUBSCRIPTION_KEY and AZURE_TTS_REGION error", param={})
|
||||
raise openai.error.InvalidRequestError(
|
||||
message="AZURE_TTS_SUBSCRIPTION_KEY, AZURE_TTS_REGION, IFLYTEK_APP_ID, IFLYTEK_API_KEY, IFLYTEK_API_SECRET error",
|
||||
param={},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -228,4 +228,4 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
|
|||
from metagpt.memory.brain_memory import BrainMemory
|
||||
|
||||
memory = BrainMemory(llm_type=LLMType.OPENAI.value, historical_summary=text, cacheable=False)
|
||||
return await memory.summarize(llm=self, max_length=max_words, keep_language=keep_language)
|
||||
return await memory.summarize(llm=self, max_words=max_words, keep_language=keep_language)
|
||||
|
|
|
|||
162
metagpt/tools/iflytek_tts.py
Normal file
162
metagpt/tools/iflytek_tts.py
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2023/8/17
|
||||
@Author : mashenquan
|
||||
@File : iflytek_tts.py
|
||||
@Desc : iFLYTEK TTS OAS3 api, which provides text-to-speech functionality
|
||||
"""
|
||||
import asyncio
|
||||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from time import mktime
|
||||
from typing import Optional
|
||||
from urllib.parse import urlencode
|
||||
from wsgiref.handlers import format_date_time
|
||||
|
||||
import aiofiles
|
||||
import websockets as websockets
|
||||
from pydantic import BaseModel
|
||||
|
||||
from metagpt.config import CONFIG
|
||||
from metagpt.logs import logger
|
||||
|
||||
|
||||
class IFlyTekTTSStatus(Enum):
|
||||
STATUS_FIRST_FRAME = 0 # The first frame
|
||||
STATUS_CONTINUE_FRAME = 1 # The intermediate frame
|
||||
STATUS_LAST_FRAME = 2 # The last frame
|
||||
|
||||
|
||||
class AudioData(BaseModel):
|
||||
audio: str
|
||||
status: int
|
||||
ced: str
|
||||
|
||||
|
||||
class IFlyTekTTSResponse(BaseModel):
|
||||
code: int
|
||||
message: str
|
||||
data: Optional[AudioData] = None
|
||||
sid: str
|
||||
|
||||
|
||||
DEFAULT_IFLYTEK_VOICE = "xiaoyan"
|
||||
|
||||
|
||||
class IFlyTekTTS(object):
|
||||
def __init__(self, app_id: str, api_key: str, api_secret: str):
|
||||
"""
|
||||
:param app_id: Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts`
|
||||
:param api_key: WebAPI argument, see: `https://console.xfyun.cn/services/tts`
|
||||
:param api_secret: WebAPI argument, see: `https://console.xfyun.cn/services/tts`
|
||||
"""
|
||||
self.app_id = app_id or CONFIG.IFLYTEK_APP_ID
|
||||
self.api_key = api_key or CONFIG.IFLYTEK_API_KEY
|
||||
self.api_secret = api_secret or CONFIG.API_SECRET
|
||||
|
||||
async def synthesize_speech(self, text, output_file: str, voice=DEFAULT_IFLYTEK_VOICE):
|
||||
url = self._create_url()
|
||||
data = {
|
||||
"common": {"app_id": self.app_id},
|
||||
"business": {"aue": "lame", "sfl": 1, "auf": "audio/L16;rate=16000", "vcn": voice, "tte": "utf8"},
|
||||
"data": {"status": 2, "text": str(base64.b64encode(text.encode("utf-8")), "UTF8")},
|
||||
}
|
||||
req = json.dumps(data)
|
||||
async with websockets.connect(url) as websocket:
|
||||
# send request
|
||||
await websocket.send(req)
|
||||
|
||||
# receive frames
|
||||
async with aiofiles.open(str(output_file), "w") as writer:
|
||||
while True:
|
||||
v = await websocket.recv()
|
||||
rsp = IFlyTekTTSResponse(**json.loads(v))
|
||||
if rsp.data:
|
||||
await writer.write(rsp.data.audio)
|
||||
if rsp.data.status != IFlyTekTTSStatus.STATUS_LAST_FRAME.value:
|
||||
continue
|
||||
break
|
||||
|
||||
def _create_url(self):
|
||||
"""Create request url"""
|
||||
url = "wss://tts-api.xfyun.cn/v2/tts"
|
||||
# Generate a timestamp in RFC1123 format
|
||||
now = datetime.now()
|
||||
date = format_date_time(mktime(now.timetuple()))
|
||||
|
||||
signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
|
||||
signature_origin += "date: " + date + "\n"
|
||||
signature_origin += "GET " + "/v2/tts " + "HTTP/1.1"
|
||||
# Perform HMAC-SHA256 encryption
|
||||
signature_sha = hmac.new(
|
||||
self.api_secret.encode("utf-8"), signature_origin.encode("utf-8"), digestmod=hashlib.sha256
|
||||
).digest()
|
||||
signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8")
|
||||
|
||||
authorization_origin = 'api_key="%s", algorithm="%s", headers="%s", signature="%s"' % (
|
||||
self.api_key,
|
||||
"hmac-sha256",
|
||||
"host date request-line",
|
||||
signature_sha,
|
||||
)
|
||||
authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode(encoding="utf-8")
|
||||
# Combine the authentication parameters of the request into a dictionary.
|
||||
v = {"authorization": authorization, "date": date, "host": "ws-api.xfyun.cn"}
|
||||
# Concatenate the authentication parameters to generate the URL.
|
||||
url = url + "?" + urlencode(v)
|
||||
return url
|
||||
|
||||
|
||||
# Export
|
||||
async def oas3_iflytek_tts(text: str, voice: str = "", app_id: str = "", api_key: str = "", api_secret: str = ""):
|
||||
"""Text to speech
|
||||
For more details, check out:`https://www.xfyun.cn/doc/tts/online_tts/API.html`
|
||||
|
||||
:param voice: Default `xiaoyan`. For more details, checkout: `https://www.xfyun.cn/doc/tts/online_tts/API.html#%E6%8E%A5%E5%8F%A3%E8%B0%83%E7%94%A8%E6%B5%81%E7%A8%8B`
|
||||
:param text: The text used for voice conversion.
|
||||
:param app_id: Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts`
|
||||
:param api_key: WebAPI argument, see: `https://console.xfyun.cn/services/tts`
|
||||
:param api_secret: WebAPI argument, see: `https://console.xfyun.cn/services/tts`
|
||||
:return: Returns the Base64-encoded .mp3 file data if successful, otherwise an empty string.
|
||||
|
||||
"""
|
||||
if not app_id:
|
||||
app_id = CONFIG.IFLYTEK_APP_ID
|
||||
if not api_key:
|
||||
api_key = CONFIG.IFLYTEK_API_KEY
|
||||
if not api_secret:
|
||||
api_secret = CONFIG.IFLYTEK_API_SECRET
|
||||
if not voice:
|
||||
voice = CONFIG.IFLYTEK_VOICE or DEFAULT_IFLYTEK_VOICE
|
||||
|
||||
filename = Path(__file__).parent / (uuid.uuid4().hex + ".mp3")
|
||||
try:
|
||||
tts = IFlyTekTTS(app_id=app_id, api_key=api_key, api_secret=api_secret)
|
||||
await tts.synthesize_speech(text=text, output_file=str(filename), voice=voice)
|
||||
async with aiofiles.open(str(filename), mode="r") as reader:
|
||||
base64_string = await reader.read()
|
||||
except Exception as e:
|
||||
logger.error(f"text:{text}, error:{e}")
|
||||
base64_string = ""
|
||||
finally:
|
||||
filename.unlink()
|
||||
|
||||
return base64_string
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.get_event_loop().run_until_complete(
|
||||
oas3_iflytek_tts(
|
||||
text="你好,hello",
|
||||
app_id="f7acef62",
|
||||
api_key="fda72e3aa286042a492525816a5efa08",
|
||||
api_secret="ZDk3NjdiMDBkODJlOWQ1NjRjMGI2NDY4",
|
||||
)
|
||||
)
|
||||
|
|
@ -7,8 +7,8 @@
|
|||
@Desc : MetaGPT OpenAPI Specification 3.0 REST API service
|
||||
"""
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import connexion
|
||||
|
||||
|
|
@ -17,7 +17,7 @@ sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) # fix-bug:
|
|||
|
||||
def oas_http_svc():
|
||||
"""Start the OAS 3.0 OpenAPI HTTP service"""
|
||||
app = connexion.AioHttpApp(__name__, specification_dir='../../.well-known/')
|
||||
app = connexion.AioHttpApp(__name__, specification_dir="../../.well-known/")
|
||||
app.add_api("metagpt_oas3_api.yaml")
|
||||
app.add_api("openapi.yaml")
|
||||
app.run(port=8080)
|
||||
|
|
@ -35,6 +35,7 @@ async def async_main():
|
|||
|
||||
|
||||
def main():
|
||||
print("http://localhost:8080/oas3/ui/")
|
||||
oas_http_svc()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ class S3:
|
|||
|
||||
async def cache(self, data: str, file_ext: str, format: str = "") -> str:
|
||||
"""Save data to remote S3 and return url"""
|
||||
object_name = str(uuid.uuid4()).replace("-", "") + file_ext
|
||||
object_name = uuid.uuid4().hex + file_ext
|
||||
path = Path(__file__).parent
|
||||
pathname = path / object_name
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -42,4 +42,5 @@ connexion[swagger-ui]
|
|||
aiohttp_jinja2
|
||||
azure-cognitiveservices-speech==1.31.0
|
||||
aioboto3~=11.3.0
|
||||
redis==4.3.5
|
||||
redis==4.3.5
|
||||
websocket-client==1.6.2
|
||||
41
tests/metagpt/learn/test_skill_loader.py
Normal file
41
tests/metagpt/learn/test_skill_loader.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
@Time : 2023/9/19
|
||||
@Author : mashenquan
|
||||
@File : test_skill_loader.py
|
||||
@Desc : Unit tests.
|
||||
"""
|
||||
|
||||
from metagpt.config import CONFIG
|
||||
from metagpt.learn.skill_loader import SkillLoader
|
||||
|
||||
|
||||
def test_suite():
|
||||
CONFIG.agent_skills = [
|
||||
{"id": 1, "name": "text_to_speech", "type": "builtin", "config": {}, "enabled": True},
|
||||
{"id": 2, "name": "text_to_image", "type": "builtin", "config": {}, "enabled": True},
|
||||
{"id": 3, "name": "ai_call", "type": "builtin", "config": {}, "enabled": True},
|
||||
{"id": 3, "name": "data_analysis", "type": "builtin", "config": {}, "enabled": True},
|
||||
{"id": 5, "name": "crawler", "type": "builtin", "config": {"engine": "ddg"}, "enabled": True},
|
||||
{"id": 6, "name": "knowledge", "type": "builtin", "config": {}, "enabled": True},
|
||||
{"id": 6, "name": "web_search", "type": "builtin", "config": {}, "enabled": True},
|
||||
]
|
||||
loader = SkillLoader()
|
||||
skills = loader.get_skill_list()
|
||||
assert skills
|
||||
assert len(skills) >= 3
|
||||
for desc, name in skills.items():
|
||||
assert desc
|
||||
assert name
|
||||
|
||||
entity = loader.get_entity("Assistant")
|
||||
assert entity
|
||||
assert entity.skills
|
||||
for sk in entity.skills:
|
||||
assert sk
|
||||
assert sk.arguments
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_suite()
|
||||
Loading…
Add table
Add a link
Reference in a new issue