From 89be81524c963a64e5e21c4cc05126bf289eb63e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= <mashenquan@fuzhi.ai>
Date: Fri, 15 Sep 2023 21:56:39 +0800
Subject: [PATCH] feat: update skill specification

---
 .well-known/skills.yaml       | 213 +++++++++++++++++++++++-----------
 metagpt/learn/skill_loader.py |  61 +++++++---
 2 files changed, 189 insertions(+), 85 deletions(-)

diff --git a/.well-known/skills.yaml b/.well-known/skills.yaml
index d08d7aced..137bfcdb4 100644
--- a/.well-known/skills.yaml
+++ b/.well-known/skills.yaml
@@ -1,72 +1,149 @@
+skillapi: "0.1.0"
+
+info:
+  title: "Agent Skill Specification"
+  version: "1.0"
+
 entities:
   Assistant:
-     skills:
-     - name: text_to_speech
-       description: Text-to-speech
-       id: text_to_speech.text_to_speech
-       x-prerequisite:
-         - name: AZURE_TTS_SUBSCRIPTION_KEY
-           description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
-         - name: AZURE_TTS_REGION
-           description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
-       arguments:
-         text: 'The text used for voice conversion. Required.'
-         lang: 'The value can contain a language code such as en (English), or a locale such as en-US (English - United States). The optional parameter are "English", "Chinese". Default value: "Chinese".'
-         voice: 'Default value: "zh-CN-XiaomoNeural".'
-         style: 'Speaking style to express different emotions like cheerfulness, empathy, and calm. The optional parameter values are "affectionate", "angry", "calm", "cheerful", "depressed", "disgruntled", "embarrassed", "envious", "fearful", "gentle", "sad", "serious". Default value: "affectionate".'
-         role: 'With roles, the same voice can act as a different age and gender. The optional parameter values are "Girl", "Boy", "OlderAdultFemale", "OlderAdultMale", "SeniorFemale", "SeniorMale", "YoungAdultFemale", "YoungAdultMale". Default value: "Girl".'
-       examples:
-         - ask: 'A girl says "hello world"'
-           answer: 'text_to_speech(text="hello world", role="Girl")'
-         - ask: 'A boy affectionate says "hello world"'
-           answer: 'text_to_speech(text="hello world", role="Boy", style="affectionate")'
-         - ask: 'A boy says "你好"'
-           answer: 'text_to_speech(text="你好", role="Boy", lang="Chinese")'
-         - ask: 'How to speak "你好"?'
-           answer: 'text_to_speech(text="你好", lang="Chinese")'
-       returns:
-         type: string
-         format: base64
+    summary: assistant
+    description: assistant
+    skills:
+      - name: text_to_speech
+        description: Text-to-speech
+        id: text_to_speech.text_to_speech
+        required:
+          oneOf:
+            - schema:
+               type: object
+               properties:
+                 AZURE_TTS_SUBSCRIPTION_KEY:
+                   type: string
+                   description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
+                 AZURE_TTS_REGION:
+                   type: string
+                   description: "For more details, check out: [Azure Text-to_Speech](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts)"
+            - schema:
+               type: object
+               properties:
+                IFLYTEK_APP_ID:
+                  type: string
+                  description: "Application ID is used to access your iFlyTek service API, see: `https://console.xfyun.cn/services/tts`"
+                IFLYTEK_API_KEY:
+                  type: string
+                  description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`"
+                IFLYTEK_API_SECRET:
+                  type: string
+                  description: "WebAPI argument, see: `https://console.xfyun.cn/services/tts`"
+        parameters:
+          text:
+            description: 'The text used for voice conversion.'
+            required: true
+            type: string
+          lang:
+            description: 'The value can contain a language code such as en (English), or a locale such as en-US (English - United States).'
+            type: string
+            enum:
+              - English
+              - Chinese
+            default: Chinese
+          voice:
+            description: Name of voice styles
+            type: string
+            default: zh-CN-XiaomoNeural
+          style: 
+            type: string
+            description: Speaking style to express different emotions like cheerfulness, empathy, and calm. 
+            enum:
+              - affectionate
+              - angry
+              - calm
+              - cheerful
+              - depressed
+              - disgruntled
+              - embarrassed
+              - envious
+              - fearful
+              - gentle
+              - sad
+              - serious
+            default: affectionate
+          role:
+            type: string
+            description: With roles, the same voice can act as a different age and gender.
+            enum:
+              - Girl
+              - Boy
+              - OlderAdultFemale
+              - OlderAdultMale
+              - SeniorFemale
+              - SeniorMale
+              - YoungAdultFemale
+              - YoungAdultMale
+            default: Girl
+        examples:
+           - ask: 'A girl says "hello world"'
+             answer: 'text_to_speech(text="hello world", role="Girl")'
+           - ask: 'A boy affectionate says "hello world"'
+             answer: 'text_to_speech(text="hello world", role="Boy", style="affectionate")'
+           - ask: 'A boy says "你好"'
+             answer: 'text_to_speech(text="hello world", role="Boy", lang="Chinese")'
+        returns:
+          type: string
+          format: base64
 
-     - name: text_to_image
-       description: Create a drawing based on the text.
-       id: text_to_image.text_to_image
-       x-prerequisite:
-       - name: OPENAI_API_KEY
-         description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`"
-       - name: METAGPT_TEXT_TO_IMAGE_MODEL_URL
-         description: "Model url."
-       arguments:
-         text: 'The text used for image conversion. Required.'
-         size_type: 'Default value: "512x512".'
-       examples:
-         - ask: 'Draw a girl'
-           answer: 'text_to_image(text="Draw a girl", size_type="512x512")'
-         - ask: 'Draw an apple'
-           answer: 'text_to_image(text="Draw an apple", size_type="512x512")'
-         - ask: 'Draw an apple picture'
-           answer: 'text_to_image(text="Draw an apple", size_type="512x512")'
-         - ask: 'Draw an apple image'
-           answer: 'text_to_image(text="Draw an apple", size_type="512x512")'
-       returns:
-         type: string
-         format: base64
+      - name: text_to_image
+        description: Create a drawing based on the text.
+        id: text_to_image.text_to_image
+        required:
+          oneOf: 
+            - name: OPENAI_API_KEY
+              type: string
+              description: "OpenAI API key, For more details, checkout: `https://platform.openai.com/account/api-keys`"
+            - name: METAGPT_TEXT_TO_IMAGE_MODEL_URL
+              type: string
+              description: "Model url."
+        parameters:
+          text: 
+            description: 'The text used for image conversion.'
+            type: string
+            required: true
+          size_type:
+            description: size type
+            type: string
+            default: "512x512"
+        examples:
+          - ask: 'Draw a girl'
+            answer: 'text_to_image(text="Draw a girl", size_type="512x512")'
+          - ask: 'Draw an apple'
+            answer: 'text_to_image(text="Draw an apple", size_type="512x512")'
+        returns:
+          type: string
+          format: base64
 
-     - name: web_search
-       description: Perform Google searches to provide real-time information.
-       id: web_search.web_search
-       x-prerequisite:
-       - name: SEARCH_ENGINE
-         description: "Supported values: serpapi/google/serper/ddg"
-       - name: SERPER_API_KEY
-         description: "SERPER API KEY, For more details, checkout: `https://serper.dev/api-key`"
-       arguments:
-         query: 'The search query. Required.'
-         max_results: 'The number of search results to retrieve. Default value: 6.'
-       examples:
-         - ask: 'Search for information about artificial intelligence'
-           answer: 'web_search(query="Search for information about artificial intelligence", max_results=6)'
-         - ask: 'Find news articles about climate change'
-           answer: 'web_search(query="Find news articles about climate change", max_results=6)'
-       returns:
-         type: string
\ No newline at end of file
+      - name: web_search
+        description: Perform Google searches to provide real-time information.
+        id: web_search.web_search
+        required:
+          - name: SEARCH_ENGINE
+            type: string
+            description: "Supported values: serpapi/google/serper/ddg"
+          - name: SERPER_API_KEY
+            type: string
+            description: "SERPER API KEY, For more details, checkout: `https://serper.dev/api-key`"
+        parameters:
+          query:
+            type: string
+            description: 'The search query.'
+            required: true
+          max_results:
+            type: number
+            default: 6
+            description: 'The number of search results to retrieve.'
+        examples:
+          - ask: 'Search for information about artificial intelligence'
+            answer: 'web_search(query="Search for information about artificial intelligence", max_results=6)'
+          - ask: 'Find news articles about climate change'
+            answer: 'web_search(query="Find news articles about climate change", max_results=6)'
+        returns:
+          type: string
diff --git a/metagpt/learn/skill_loader.py b/metagpt/learn/skill_loader.py
index 83200bca6..b1d27db92 100644
--- a/metagpt/learn/skill_loader.py
+++ b/metagpt/learn/skill_loader.py
@@ -7,10 +7,10 @@
 @Desc    : Skill YAML Configuration Loader.
 """
 from pathlib import Path
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 
 import yaml
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 
 from metagpt.config import CONFIG
 
@@ -25,29 +25,43 @@ class Returns(BaseModel):
     format: Optional[str] = None
 
 
-class Prerequisite(BaseModel):
-    name: str
-    type: Optional[str] = None
-    description: Optional[str] = None
-    default: Optional[str] = None
+class Parameter(BaseModel):
+    type: str
+    description: str = None
 
 
 class Skill(BaseModel):
     name: str
-    description: str
-    id: str
-    x_prerequisite: Optional[List[Prerequisite]] = Field(default=None, alias="x-prerequisite")
-    arguments: Dict
+    description: str = None
+    id: str = None
+    required: Optional[Union[List, Dict]] = None
+    parameters: Dict[str, Parameter] = None
     examples: List[Example]
     returns: Returns
 
+    @property
+    def arguments(self) -> Dict:
+        if not self.parameters:
+            return {}
+        ret = {}
+        for k, v in self.parameters.items():
+            ret[k] = v.description if v.description else ""
+        return ret
 
-class EntitySkills(BaseModel):
+
+class Entity(BaseModel):
+    name: str = None
     skills: List[Skill]
 
 
+class Components(BaseModel):
+    pass
+
+
 class SkillsDeclaration(BaseModel):
-    entities: Dict[str, EntitySkills]
+    skillapi: str
+    entities: Dict[str, Entity]
+    components: Components = None
 
 
 class SkillLoader:
@@ -60,8 +74,8 @@ class SkillLoader:
 
     def get_skill_list(self, entity_name: str = "Assistant") -> Dict:
         """Return the skill name based on the skill description."""
-        entity_skills = self.get_entity(entity_name)
-        if not entity_skills:
+        entity = self.get_entity(entity_name)
+        if not entity:
             return {}
 
         agent_skills = CONFIG.agent_skills
@@ -73,7 +87,7 @@ class SkillLoader:
 
         names = [AgentSkill(**i).name for i in agent_skills]
         description_to_name_mappings = {}
-        for s in entity_skills.skills:
+        for s in entity.skills:
             if s.name not in names:
                 continue
             description_to_name_mappings[s.description] = s.name
@@ -89,8 +103,21 @@ class SkillLoader:
             if sk.name == name:
                 return sk
 
-    def get_entity(self, name) -> EntitySkills:
+    def get_entity(self, name) -> Entity:
         """Return a list of skills for the entity."""
         if not self._skills:
             return None
         return self._skills.entities.get(name)
+
+
+if __name__ == "__main__":
+    CONFIG.agent_skills = [
+        {"id": 1, "name": "text_to_speech", "type": "builtin", "config": {}, "enabled": True},
+        {"id": 2, "name": "text_to_image", "type": "builtin", "config": {}, "enabled": True},
+        {"id": 3, "name": "ai_call", "type": "builtin", "config": {}, "enabled": True},
+        {"id": 3, "name": "data_analysis", "type": "builtin", "config": {}, "enabled": True},
+        {"id": 5, "name": "crawler", "type": "builtin", "config": {"engine": "ddg"}, "enabled": True},
+        {"id": 6, "name": "knowledge", "type": "builtin", "config": {}, "enabled": True},
+    ]
+    loader = SkillLoader()
+    print(loader.get_skill_list())