mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-08 15:05:17 +02:00
feat: +common talk role
This commit is contained in:
parent
4fe3d6e879
commit
2c593bedea
13 changed files with 261 additions and 46 deletions
|
|
@ -3,15 +3,41 @@ entities:
|
|||
skills:
|
||||
- name: text_to_speech
|
||||
description: Text-to-speech
|
||||
id: text_to_speech.text_to_speech
|
||||
requisite:
|
||||
- AZURE_TTS_SUBSCRIPTION_KEY
|
||||
- AZURE_TTS_REGION
|
||||
arguments:
|
||||
text: 'The text used for voice conversion. Required.'
|
||||
lang: 'The value can contain a language code such as en (English), or a locale such as en-US (English - United States). The optional parameter are "English", "Chinese". Default value: "Chinese".'
|
||||
voice: 'Default value: "zh-CN-XiaomoNeural".'
|
||||
style: 'Speaking style to express different emotions like cheerfulness, empathy, and calm. The optional parameter values are "affectionate", "angry", "calm", "cheerful", "depressed", "disgruntled", "embarrassed", "envious", "fearful", "gentle", "sad", "serious". Default value: "affectionate".'
|
||||
role: 'With roles, the same voice can act as a different age and gender. The optional parameter values are "Girl", "Boy", "OlderAdultFemale", "OlderAdultMale", "SeniorFemale", "SeniorMale", "YoungAdultFemale", "YoungAdultMale". Default value: "Girl".'
|
||||
examples:
|
||||
- ask: 'A girl says "hello world"'
|
||||
answer: 'text_to_speech(text="hello world", role="Girl")'
|
||||
- ask: 'A boy affectionate says "hello world"'
|
||||
answer: 'text_to_speech(text="hello world", role="Boy", style="affectionate")'
|
||||
- ask: 'A boy says "你好"'
|
||||
answer: 'text_to_speech(text="hello world", role="Boy", lang="Chinese")'
|
||||
returns:
|
||||
type: string
|
||||
format: base64
|
||||
|
||||
- name: text_to_image
|
||||
description: Create a drawing based on the text.
|
||||
id: text_to_image.text_to_image
|
||||
requisite:
|
||||
- OPENAI_API_KEY
|
||||
- METAGPT_TEXT_TO_IMAGE_MODEL
|
||||
- name: text_to_embedding
|
||||
description: Convert the text into embeddings.
|
||||
requisite:
|
||||
- OPENAI_API_KEY
|
||||
arguments:
|
||||
text: 'The text used for image conversion. Required.'
|
||||
size_type: 'Default value: "512x512".'
|
||||
examples:
|
||||
- ask: 'Draw a girl'
|
||||
answer: 'text_to_image(text="Draw a girl", size_type="512x512")'
|
||||
- ask: 'Draw an apple'
|
||||
answer: 'text_to_image(text="Draw an apple", size_type="512x512")'
|
||||
returns:
|
||||
type: string
|
||||
format: base64
|
||||
|
|
|
|||
|
|
@ -0,0 +1,88 @@
|
|||
import ast
|
||||
import importlib
|
||||
|
||||
from metagpt.actions import Action, ActionOutput
|
||||
from metagpt.learn.skill_loader import Skill
|
||||
from metagpt.logs import logger
|
||||
|
||||
|
||||
class ArgumentsParingAction(Action):
|
||||
def __init__(self, options, last_talk: str, skill: Skill, context=None, llm=None, **kwargs):
|
||||
super(ArgumentsParingAction, self).__init__(options=options, name='', context=context, llm=llm)
|
||||
self.skill = skill
|
||||
self.ask = last_talk
|
||||
self.rsp = None
|
||||
self.args = None
|
||||
|
||||
@property
|
||||
def prompt(self):
|
||||
prompt = f"{self.skill.name} function parameters description:\n"
|
||||
for k, v in self.skill.arguments.items():
|
||||
prompt += f"parameter `{k}`: {v}\n"
|
||||
prompt += "\n"
|
||||
prompt += "Examples:\n"
|
||||
for e in self.skill.examples:
|
||||
prompt += f"If want you to do `{e.ask}`, return `{e.answer}` brief and clear.\n"
|
||||
prompt += f"\nNow I want you to do `{self.ask}`, return in examples format above, brief and clear."
|
||||
return prompt
|
||||
|
||||
async def run(self, *args, **kwargs) -> ActionOutput:
|
||||
prompt = self.prompt
|
||||
logger.info(prompt)
|
||||
rsp = await self.llm.aask(msg=prompt, system_msgs=[])
|
||||
logger.info(rsp)
|
||||
self.args = ArgumentsParingAction.parse_arguments(skill_name=self.skill.name, txt=rsp)
|
||||
self.rsp = ActionOutput(content=rsp)
|
||||
return self.rsp
|
||||
|
||||
@staticmethod
|
||||
def parse_arguments(skill_name, txt) -> dict:
|
||||
prefix = skill_name + "("
|
||||
if prefix not in txt:
|
||||
logger.error(f"{skill_name} not in {txt}")
|
||||
return None
|
||||
if ")" not in txt:
|
||||
logger.error(f"')' not in {txt}")
|
||||
return None
|
||||
begin_ix = txt.find(prefix)
|
||||
end_ix = txt.rfind(")")
|
||||
args_txt = txt[begin_ix + len(prefix): end_ix]
|
||||
logger.info(args_txt)
|
||||
fake_expression = f"dict({args_txt})"
|
||||
parsed_expression = ast.parse(fake_expression, mode='eval')
|
||||
args = {}
|
||||
for keyword in parsed_expression.body.keywords:
|
||||
key = keyword.arg
|
||||
value = ast.literal_eval(keyword.value)
|
||||
args[key] = value
|
||||
return args
|
||||
|
||||
|
||||
class SkillAction(Action):
|
||||
def __init__(self, options, skill: Skill, args: dict, context=None, llm=None, **kwargs):
|
||||
super(SkillAction, self).__init__(options=options, name='', context=context, llm=llm)
|
||||
self._skill = skill
|
||||
self._args = args
|
||||
self.rsp = None
|
||||
|
||||
async def run(self, *args, **kwargs) -> str | ActionOutput | None:
|
||||
"""Run action"""
|
||||
self.rsp = self.find_and_call_function(self._skill.name, args=self._args, **self.options)
|
||||
return ActionOutput(content=self.rsp, instruct_content=self._skill.json())
|
||||
|
||||
@staticmethod
|
||||
def find_and_call_function(function_name, args, **kwargs):
|
||||
try:
|
||||
module = importlib.import_module("metagpt.learn")
|
||||
function = getattr(module, function_name)
|
||||
# 调用函数并返回结果
|
||||
result = function(**args, **kwargs)
|
||||
return result
|
||||
except (ModuleNotFoundError, AttributeError):
|
||||
logger.error(f"{function_name} not found")
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
ArgumentsParingAction.parse_arguments(skill_name="text_to_image",
|
||||
txt='`text_to_image(text="Draw an apple", size_type="512x512")`')
|
||||
|
|
@ -4,7 +4,7 @@ from metagpt.logs import logger
|
|||
|
||||
|
||||
class TalkAction(Action):
|
||||
def __init__(self, options, name: str = '', talk='', history_summary='', context=None, llm=None):
|
||||
def __init__(self, options, name: str = '', talk='', history_summary='', context=None, llm=None, **kwargs):
|
||||
context = context or {}
|
||||
context["talk"] = talk
|
||||
context["history_summery"] = history_summary
|
||||
|
|
|
|||
|
|
@ -5,3 +5,11 @@
|
|||
@Author : alexanderwu
|
||||
@File : __init__.py
|
||||
"""
|
||||
|
||||
from metagpt.learn.text_to_image import text_to_image
|
||||
from metagpt.learn.text_to_speech import text_to_speech
|
||||
|
||||
__all__ = [
|
||||
"text_to_image",
|
||||
"text_to_speech",
|
||||
]
|
||||
|
|
@ -1,14 +1,26 @@
|
|||
from pathlib import Path
|
||||
from typing import List, Dict
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class Example(BaseModel):
|
||||
ask: str
|
||||
answer: str
|
||||
|
||||
class Returns(BaseModel):
|
||||
type: str
|
||||
format: Optional[str] = None
|
||||
|
||||
class Skill(BaseModel):
|
||||
name: str
|
||||
description: str
|
||||
id: str
|
||||
requisite: List[str]
|
||||
arguments: Dict
|
||||
examples: List[Example]
|
||||
returns: Returns
|
||||
|
||||
|
||||
class EntitySkills(BaseModel):
|
||||
|
|
@ -26,13 +38,26 @@ class SkillLoader:
|
|||
skills = yaml.safe_load(file)
|
||||
self._skills = SkillsDeclaration(**skills)
|
||||
|
||||
def get_skill_list(self, entity_name: str = "Assistant"):
|
||||
if not self._skills or entity_name not in self._skills.entities:
|
||||
def get_skill_list(self, entity_name: str = "Assistant") -> Dict:
|
||||
entity_skills = self.get_entity(entity_name)
|
||||
if not entity_skills:
|
||||
return {}
|
||||
entity_skills = self._skills.entities.get(entity_name)
|
||||
|
||||
description_to_name_mappings = {}
|
||||
for s in entity_skills.skills:
|
||||
description_to_name_mappings[s.description] = s.name
|
||||
|
||||
return description_to_name_mappings
|
||||
|
||||
def get_skill(self, name, entity_name: str = "Assistant") -> Skill:
|
||||
entity = self.get_entity(entity_name)
|
||||
if not entity:
|
||||
return None
|
||||
for sk in entity.skills:
|
||||
if sk.name == name:
|
||||
return sk
|
||||
|
||||
def get_entity(self, name) -> EntitySkills:
|
||||
if not self._skills:
|
||||
return None
|
||||
return self._skills.entities.get(name)
|
||||
|
|
@ -16,7 +16,7 @@ from metagpt.utils.common import initialize_environment
|
|||
@skill_metadata(name="Text to Embedding",
|
||||
description="Convert the text into embeddings.",
|
||||
requisite="`OPENAI_API_KEY`")
|
||||
def text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""):
|
||||
def text_to_embedding(text, model="text-embedding-ada-002", openai_api_key="", **kwargs):
|
||||
"""Text to embedding
|
||||
|
||||
:param text: The text used for embedding.
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ from metagpt.utils.common import initialize_environment
|
|||
@skill_metadata(name="Text to image",
|
||||
description="Create a drawing based on the text.",
|
||||
requisite="`OPENAI_API_KEY` or `METAGPT_TEXT_TO_IMAGE_MODEL`")
|
||||
def text_to_image(text, size_type: str = "512x512", openai_api_key="", model_url=""):
|
||||
def text_to_image(text, size_type: str = "512x512", openai_api_key="", model_url="", **kwargs):
|
||||
"""Text to image
|
||||
|
||||
:param text: The text used for image conversion.
|
||||
|
|
@ -27,8 +27,14 @@ def text_to_image(text, size_type: str = "512x512", openai_api_key="", model_url
|
|||
:return: The image data is returned in Base64 encoding.
|
||||
"""
|
||||
initialize_environment()
|
||||
image_declaration = "data:image/png;base64,"
|
||||
if os.environ.get("METAGPT_TEXT_TO_IMAGE_MODEL") or model_url:
|
||||
return oas3_metagpt_text_to_image(text, size_type, model_url)
|
||||
data = oas3_metagpt_text_to_image(text, size_type, model_url)
|
||||
return image_declaration + data if data else ""
|
||||
if os.environ.get("OPENAI_API_KEY") or openai_api_key:
|
||||
return oas3_openai_text_to_image(text, size_type, openai_api_key)
|
||||
data = oas3_openai_text_to_image(text, size_type, openai_api_key)
|
||||
return image_declaration + data if data else ""
|
||||
|
||||
raise EnvironmentError
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ from metagpt.utils.common import initialize_environment
|
|||
description="Text-to-speech",
|
||||
requisite="`AZURE_TTS_SUBSCRIPTION_KEY` and `AZURE_TTS_REGION`")
|
||||
def text_to_speech(text, lang="zh-CN", voice="zh-CN-XiaomoNeural", style="affectionate", role="Girl",
|
||||
subscription_key="", region=""):
|
||||
subscription_key="", region="", **kwargs):
|
||||
"""Text to speech
|
||||
For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
|
||||
|
||||
|
|
@ -32,8 +32,10 @@ def text_to_speech(text, lang="zh-CN", voice="zh-CN-XiaomoNeural", style="affect
|
|||
|
||||
"""
|
||||
initialize_environment()
|
||||
audio_declaration = "data:audio/wav;base64,"
|
||||
if (os.environ.get("AZURE_TTS_SUBSCRIPTION_KEY") and os.environ.get("AZURE_TTS_REGION")) or \
|
||||
(subscription_key and region):
|
||||
return oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region)
|
||||
data = oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region)
|
||||
return audio_declaration + data if data else data
|
||||
|
||||
raise EnvironmentError
|
||||
|
|
|
|||
|
|
@ -35,9 +35,15 @@ class BrainMemory(pydantic.BaseModel):
|
|||
return "\n".join(texts)
|
||||
|
||||
def move_to_solution(self):
|
||||
while len(self.history) > 1:
|
||||
msg = self.history.pop()
|
||||
self.solution.append(msg)
|
||||
if len(self.history) < 2:
|
||||
return
|
||||
msgs = self.history[:-1]
|
||||
self.solution.extend(msgs)
|
||||
if not self.history[-1].is_contain(MessageType.Talk.value):
|
||||
self.solution.append(self.history[-1])
|
||||
self.history = []
|
||||
else:
|
||||
self.history = self.history[-1:]
|
||||
|
||||
@property
|
||||
def last_talk(self):
|
||||
|
|
|
|||
|
|
@ -153,26 +153,10 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
|
|||
self.rpm = int(self._options.get("RPM", 10))
|
||||
|
||||
async def _achat_completion_stream(self, messages: list[dict]) -> str:
|
||||
max_try = 5
|
||||
response = None
|
||||
for i in range(max_try):
|
||||
try:
|
||||
response = await openai.ChatCompletion.acreate(
|
||||
response = await self.async_retry_call(openai.ChatCompletion.acreate,
|
||||
**self._cons_kwargs(messages),
|
||||
stream=True
|
||||
)
|
||||
break
|
||||
except openai.error.RateLimitError as e:
|
||||
random_time = random.uniform(0, 3) # 生成0到5秒之间的随机时间
|
||||
rounded_time = round(random_time, 1) # 保留一位小数,以实现0.1秒的精度
|
||||
logger.warning(f"Exception:{e}, sleeping for {rounded_time} seconds")
|
||||
await asyncio.sleep(rounded_time)
|
||||
continue
|
||||
except Exception as e:
|
||||
error_str = traceback.format_exc()
|
||||
logger.error(f"Exception:{e}, stack:{error_str}")
|
||||
raise e
|
||||
|
||||
# create variables to collect the stream of chunks
|
||||
collected_chunks = []
|
||||
collected_messages = []
|
||||
|
|
@ -213,12 +197,12 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
|
|||
return kwargs
|
||||
|
||||
async def _achat_completion(self, messages: list[dict]) -> dict:
|
||||
rsp = await self.llm.ChatCompletion.acreate(**self._cons_kwargs(messages))
|
||||
rsp = await self.async_retry_call(self.llm.ChatCompletion.acreate, **self._cons_kwargs(messages))
|
||||
self._update_costs(rsp.get("usage"))
|
||||
return rsp
|
||||
|
||||
def _chat_completion(self, messages: list[dict]) -> dict:
|
||||
rsp = self.llm.ChatCompletion.create(**self._cons_kwargs(messages))
|
||||
rsp = self.retry_call(self.llm.ChatCompletion.create, **self._cons_kwargs(messages))
|
||||
self._update_costs(rsp)
|
||||
return rsp
|
||||
|
||||
|
|
@ -398,4 +382,43 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
|
|||
if match:
|
||||
return match.group(1), match.group(2)
|
||||
else:
|
||||
return None, input_string
|
||||
return None, input_string
|
||||
|
||||
@staticmethod
|
||||
async def async_retry_call(func, *args, **kwargs):
|
||||
for i in range(OpenAIGPTAPI.MAX_TRY):
|
||||
try:
|
||||
rsp = await func(*args, **kwargs)
|
||||
return rsp
|
||||
except openai.error.RateLimitError as e:
|
||||
random_time = random.uniform(0, 3) # 生成0到5秒之间的随机时间
|
||||
rounded_time = round(random_time, 1) # 保留一位小数,以实现0.1秒的精度
|
||||
logger.warning(f"Exception:{e}, sleeping for {rounded_time} seconds")
|
||||
await asyncio.sleep(rounded_time)
|
||||
continue
|
||||
except openai.error.APIConnectionError as e:
|
||||
logger.warning(f"Exception:{e}")
|
||||
continue
|
||||
except Exception as e:
|
||||
error_str = traceback.format_exc()
|
||||
logger.error(f"Exception:{e}, stack:{error_str}")
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
def retry_call(func, *args, **kwargs):
|
||||
for i in range(OpenAIGPTAPI.MAX_TRY):
|
||||
try:
|
||||
rsp = func(*args, **kwargs)
|
||||
return rsp
|
||||
except openai.error.RateLimitError as e:
|
||||
logger.warning(f"Exception:{e}")
|
||||
continue
|
||||
except openai.error.APIConnectionError as e:
|
||||
logger.warning(f"Exception:{e}")
|
||||
continue
|
||||
except Exception as e:
|
||||
error_str = traceback.format_exc()
|
||||
logger.error(f"Exception:{e}, stack:{error_str}")
|
||||
raise e
|
||||
|
||||
MAX_TRY = 5
|
||||
|
|
|
|||
|
|
@ -15,8 +15,8 @@
|
|||
"""
|
||||
import asyncio
|
||||
|
||||
|
||||
from metagpt.actions import ActionOutput
|
||||
from metagpt.actions.skill_action import SkillAction, ArgumentsParingAction
|
||||
from metagpt.actions.talk_action import TalkAction
|
||||
from metagpt.config import Config
|
||||
from metagpt.learn.skill_loader import SkillLoader
|
||||
|
|
@ -53,7 +53,7 @@ class Assistant(Role):
|
|||
logger.info(prompt)
|
||||
rsp = await self._llm.aask(prompt, [])
|
||||
logger.info(rsp)
|
||||
return await self._plan(rsp)
|
||||
return await self._plan(rsp, last_talk=last_talk)
|
||||
|
||||
async def act(self) -> ActionOutput:
|
||||
result = await self._rc.todo.run(**self._options)
|
||||
|
|
@ -88,8 +88,18 @@ class Assistant(Role):
|
|||
return True
|
||||
|
||||
async def skill_handler(self, text, **kwargs) -> bool:
|
||||
skill =
|
||||
pass
|
||||
last_talk = kwargs.get("last_talk")
|
||||
skill = self.skills.get_skill(text)
|
||||
logger.info(f"skill not found: {text}")
|
||||
if not skill:
|
||||
return await self.talk_handler(text=last_talk, **kwargs)
|
||||
action = ArgumentsParingAction(options=self.options, skill=skill, llm=self._llm, **kwargs)
|
||||
await action.run(**kwargs)
|
||||
if action.args is None:
|
||||
return await self.talk_handler(text=last_talk, **kwargs)
|
||||
action = SkillAction(options=self.options, skill=skill, args=action.args, llm=self._llm)
|
||||
self.add_to_do(action)
|
||||
return True
|
||||
|
||||
async def refine_memory(self) -> str:
|
||||
history_text = self.memory.history_text
|
||||
|
|
@ -97,7 +107,7 @@ class Assistant(Role):
|
|||
if history_text == "":
|
||||
return last_talk
|
||||
history_summary = await self._llm.get_context_title(history_text, max_words=20)
|
||||
if await self._llm.is_related(last_talk, history_summary): # 合并相关内容
|
||||
if last_talk and await self._llm.is_related(last_talk, history_summary): # 合并相关内容
|
||||
last_talk = await self._llm.rewrite(sentence=last_talk, context=history_text)
|
||||
return last_talk
|
||||
|
||||
|
|
@ -109,11 +119,20 @@ class Assistant(Role):
|
|||
from metagpt.provider.openai_api import OpenAIGPTAPI
|
||||
return OpenAIGPTAPI.extract_info(input_string)
|
||||
|
||||
def get_memory(self) -> str:
|
||||
return self.memory.json()
|
||||
|
||||
def load_memory(self, jsn):
|
||||
try:
|
||||
self.memory = BrainMemory(**jsn)
|
||||
except Exception as e:
|
||||
logger.exception(f"load error:{e}, data:{jsn}")
|
||||
|
||||
|
||||
async def main():
|
||||
options = Config().runtime_options
|
||||
cost_manager = CostManager(**options)
|
||||
topic = "dataiku vs. datarobot"
|
||||
topic = "draw an apple"
|
||||
role = Assistant(options=options, cost_manager=cost_manager, language="Chinese")
|
||||
await role.talk(topic)
|
||||
while True:
|
||||
|
|
@ -121,8 +140,9 @@ async def main():
|
|||
if not has_action:
|
||||
break
|
||||
msg = await role.act()
|
||||
print(msg)
|
||||
logger.info(msg)
|
||||
# 获取用户终端输入
|
||||
logger.info("Enter prompt")
|
||||
talk = input("You: ")
|
||||
await role.talk(talk)
|
||||
|
||||
|
|
|
|||
|
|
@ -325,4 +325,12 @@ class Role:
|
|||
self._actions.append(act)
|
||||
|
||||
def add_to_do(self, act):
|
||||
self._rc.todo = act
|
||||
self._rc.todo = act
|
||||
|
||||
async def think(self) -> bool:
|
||||
return await self._think()
|
||||
|
||||
async def act(self) -> ActionOutput:
|
||||
msg = await self._act()
|
||||
return ActionOutput(content=msg.content,
|
||||
instruct_content=msg.instruct_content)
|
||||
|
|
|
|||
|
|
@ -67,6 +67,9 @@ class Message:
|
|||
intersection = set(tags) & self.tags
|
||||
return len(intersection) > 0
|
||||
|
||||
def is_contain(self, tag):
|
||||
return self.is_contain_tags([tag])
|
||||
|
||||
|
||||
@dataclass
|
||||
class UserMessage(Message):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue