From 89be81524c963a64e5e21c4cc05126bf289eb63e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Fri, 15 Sep 2023 21:56:39 +0800 Subject: [PATCH] feat: update skill specification --- .well-known/skills.yaml | 213 +++++++++++++++++++++++----------- metagpt/learn/skill_loader.py | 61 +++++++--- 2 files changed, 189 insertions(+), 85 deletions(-) diff --git a/.well-known/skills.yaml b/.well-known/skills.yaml index d08d7aced..137bfcdb4 100644 --- a/.well-known/skills.yaml +++ b/.well-known/skills.yaml @@ -1,72 +1,149 @@ +skillapi: "0.1.0" + +info: + title: "Agent Skill Specification" + version: "1.0" + entities: Assistant: - skills: - - name: text_to_speech - description: Text-to-speech - id: text_to_speech.text_to_speech - x-prerequisite: - - name: AZURE_TTS_SUBSCRIPTION_KEY - description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" - - name: AZURE_TTS_REGION - description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" - arguments: - text: 'The text used for voice conversion. Required.' - lang: 'The value can contain a language code such as en (English), or a locale such as en-US (English - United States). The optional parameter are "English", "Chinese". Default value: "Chinese".' - voice: 'Default value: "zh-CN-XiaomoNeural".' - style: 'Speaking style to express different emotions like cheerfulness, empathy, and calm. The optional parameter values are "affectionate", "angry", "calm", "cheerful", "depressed", "disgruntled", "embarrassed", "envious", "fearful", "gentle", "sad", "serious". Default value: "affectionate".' - role: 'With roles, the same voice can act as a different age and gender. The optional parameter values are "Girl", "Boy", "OlderAdultFemale", "OlderAdultMale", "SeniorFemale", "SeniorMale", "YoungAdultFemale", "YoungAdultMale". Default value: "Girl".' - examples: - - ask: 'A girl says "hello world"' - answer: 'text_to_speech(text="hello world", role="Girl")' - - ask: 'A boy affectionate says "hello world"' - answer: 'text_to_speech(text="hello world", role="Boy", style="affectionate")' - - ask: 'A boy says "你好"' - answer: 'text_to_speech(text="你好", role="Boy", lang="Chinese")' - - ask: 'How to speak "你好"?' - answer: 'text_to_speech(text="你好", lang="Chinese")' - returns: - type: string - format: base64 + summary: assistant + description: assistant + skills: + - name: text_to_speech + description: Text-to-speech + id: text_to_speech.text_to_speech + required: + oneOf: + - schema: + type: object + properties: + AZURE_TTS_SUBSCRIPTION_KEY: + type: string + description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" + AZURE_TTS_REGION: + type: string + description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)" + - schema: + type: object + properties: + IFLYTEK_APP_ID: + type: string + description: "Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts`" + IFLYTEK_API_KEY: + type: string + description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`" + IFLYTEK_API_SECRET: + type: string + description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`" + parameters: + text: + description: 'The text used for voice conversion.' + required: true + type: string + lang: + description: 'The value can contain a language code such as en (English), or a locale such as en-US (English - United States).' + type: string + enum: + - English + - Chinese + default: Chinese + voice: + description: Name of voice styles + type: string + default: zh-CN-XiaomoNeural + style: + type: string + description: Speaking style to express different emotions like cheerfulness, empathy, and calm. + enum: + - affectionate + - angry + - calm + - cheerful + - depressed + - disgruntled + - embarrassed + - envious + - fearful + - gentle + - sad + - serious + default: affectionate + role: + type: string + description: With roles, the same voice can act as a different age and gender. + enum: + - Girl + - Boy + - OlderAdultFemale + - OlderAdultMale + - SeniorFemale + - SeniorMale + - YoungAdultFemale + - YoungAdultMale + default: Girl + examples: + - ask: 'A girl says "hello world"' + answer: 'text_to_speech(text="hello world", role="Girl")' + - ask: 'A boy affectionate says "hello world"' + answer: 'text_to_speech(text="hello world", role="Boy", style="affectionate")' + - ask: 'A boy says "你好"' + answer: 'text_to_speech(text="hello world", role="Boy", lang="Chinese")' + returns: + type: string + format: base64 - - name: text_to_image - description: Create a drawing based on the text. - id: text_to_image.text_to_image - x-prerequisite: - - name: OPENAI_API_KEY - description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`" - - name: METAGPT_TEXT_TO_IMAGE_MODEL_URL - description: "Model url." - arguments: - text: 'The text used for image conversion. Required.' - size_type: 'Default value: "512x512".' - examples: - - ask: 'Draw a girl' - answer: 'text_to_image(text="Draw a girl", size_type="512x512")' - - ask: 'Draw an apple' - answer: 'text_to_image(text="Draw an apple", size_type="512x512")' - - ask: 'Draw an apple picture' - answer: 'text_to_image(text="Draw an apple", size_type="512x512")' - - ask: 'Draw an apple image' - answer: 'text_to_image(text="Draw an apple", size_type="512x512")' - returns: - type: string - format: base64 + - name: text_to_image + description: Create a drawing based on the text. + id: text_to_image.text_to_image + required: + oneOf: + - name: OPENAI_API_KEY + type: string + description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`" + - name: METAGPT_TEXT_TO_IMAGE_MODEL_URL + type: string + description: "Model url." + parameters: + text: + description: 'The text used for image conversion.' + type: string + required: true + size_type: + description: size type + type: string + default: "512x512" + examples: + - ask: 'Draw a girl' + answer: 'text_to_image(text="Draw a girl", size_type="512x512")' + - ask: 'Draw an apple' + answer: 'text_to_image(text="Draw an apple", size_type="512x512")' + returns: + type: string + format: base64 - - name: web_search - description: Perform Google searches to provide real-time information. - id: web_search.web_search - x-prerequisite: - - name: SEARCH_ENGINE - description: "Supported values: serpapi/google/serper/ddg" - - name: SERPER_API_KEY - description: "SERPER API KEY, For more details, checkout: `https://serper.dev/api-key`" - arguments: - query: 'The search query. Required.' - max_results: 'The number of search results to retrieve. Default value: 6.' - examples: - - ask: 'Search for information about artificial intelligence' - answer: 'web_search(query="Search for information about artificial intelligence", max_results=6)' - - ask: 'Find news articles about climate change' - answer: 'web_search(query="Find news articles about climate change", max_results=6)' - returns: - type: string \ No newline at end of file + - name: web_search + description: Perform Google searches to provide real-time information. + id: web_search.web_search + required: + - name: SEARCH_ENGINE + type: string + description: "Supported values: serpapi/google/serper/ddg" + - name: SERPER_API_KEY + type: string + description: "SERPER API KEY, For more details, checkout: `https://serper.dev/api-key`" + parameters: + query: + type: string + description: 'The search query.' + required: true + max_results: + type: number + default: 6 + description: 'The number of search results to retrieve.' + examples: + - ask: 'Search for information about artificial intelligence' + answer: 'web_search(query="Search for information about artificial intelligence", max_results=6)' + - ask: 'Find news articles about climate change' + answer: 'web_search(query="Find news articles about climate change", max_results=6)' + returns: + type: string diff --git a/metagpt/learn/skill_loader.py b/metagpt/learn/skill_loader.py index 83200bca6..b1d27db92 100644 --- a/metagpt/learn/skill_loader.py +++ b/metagpt/learn/skill_loader.py @@ -7,10 +7,10 @@ @Desc : Skill YAML Configuration Loader. """ from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union import yaml -from pydantic import BaseModel, Field +from pydantic import BaseModel from metagpt.config import CONFIG @@ -25,29 +25,43 @@ class Returns(BaseModel): format: Optional[str] = None -class Prerequisite(BaseModel): - name: str - type: Optional[str] = None - description: Optional[str] = None - default: Optional[str] = None +class Parameter(BaseModel): + type: str + description: str = None class Skill(BaseModel): name: str - description: str - id: str - x_prerequisite: Optional[List[Prerequisite]] = Field(default=None, alias="x-prerequisite") - arguments: Dict + description: str = None + id: str = None + required: Optional[Union[List, Dict]] = None + parameters: Dict[str, Parameter] = None examples: List[Example] returns: Returns + @property + def arguments(self) -> Dict: + if not self.parameters: + return {} + ret = {} + for k, v in self.parameters.items(): + ret[k] = v.description if v.description else "" + return ret -class EntitySkills(BaseModel): + +class Entity(BaseModel): + name: str = None skills: List[Skill] +class Components(BaseModel): + pass + + class SkillsDeclaration(BaseModel): - entities: Dict[str, EntitySkills] + skillapi: str + entities: Dict[str, Entity] + components: Components = None class SkillLoader: @@ -60,8 +74,8 @@ class SkillLoader: def get_skill_list(self, entity_name: str = "Assistant") -> Dict: """Return the skill name based on the skill description.""" - entity_skills = self.get_entity(entity_name) - if not entity_skills: + entity = self.get_entity(entity_name) + if not entity: return {} agent_skills = CONFIG.agent_skills @@ -73,7 +87,7 @@ class SkillLoader: names = [AgentSkill(**i).name for i in agent_skills] description_to_name_mappings = {} - for s in entity_skills.skills: + for s in entity.skills: if s.name not in names: continue description_to_name_mappings[s.description] = s.name @@ -89,8 +103,21 @@ class SkillLoader: if sk.name == name: return sk - def get_entity(self, name) -> EntitySkills: + def get_entity(self, name) -> Entity: """Return a list of skills for the entity.""" if not self._skills: return None return self._skills.entities.get(name) + + +if __name__ == "__main__": + CONFIG.agent_skills = [ + {"id": 1, "name": "text_to_speech", "type": "builtin", "config": {}, "enabled": True}, + {"id": 2, "name": "text_to_image", "type": "builtin", "config": {}, "enabled": True}, + {"id": 3, "name": "ai_call", "type": "builtin", "config": {}, "enabled": True}, + {"id": 3, "name": "data_analysis", "type": "builtin", "config": {}, "enabled": True}, + {"id": 5, "name": "crawler", "type": "builtin", "config": {"engine": "ddg"}, "enabled": True}, + {"id": 6, "name": "knowledge", "type": "builtin", "config": {}, "enabled": True}, + ] + loader = SkillLoader() + print(loader.get_skill_list())