feat: +common talk role

This commit is contained in:
莘权 马 2023-08-26 16:59:12 +08:00
parent 4fe3d6e879
commit 2c593bedea
13 changed files with 261 additions and 46 deletions

View file

@ -3,15 +3,41 @@ entities:
skills:
- name: text_to_speech
description: Text-to-speech
id: text_to_speech.text_to_speech
requisite:
- AZURE_TTS_SUBSCRIPTION_KEY
- AZURE_TTS_REGION
arguments:
text: 'The text used for voice conversion. Required.'
lang: 'The value can contain a language code such as en (English), or a locale such as en-US (English - United States). The optional parameter are "English", "Chinese". Default value: "Chinese".'
voice: 'Default value: "zh-CN-XiaomoNeural".'
style: 'Speaking style to express different emotions like cheerfulness, empathy, and calm. The optional parameter values are "affectionate", "angry", "calm", "cheerful", "depressed", "disgruntled", "embarrassed", "envious", "fearful", "gentle", "sad", "serious". Default value: "affectionate".'
role: 'With roles, the same voice can act as a different age and gender. The optional parameter values are "Girl", "Boy", "OlderAdultFemale", "OlderAdultMale", "SeniorFemale", "SeniorMale", "YoungAdultFemale", "YoungAdultMale". Default value: "Girl".'
examples:
- ask: 'A girl says "hello world"'
answer: 'text_to_speech(text="hello world", role="Girl")'
- ask: 'A boy affectionate says "hello world"'
answer: 'text_to_speech(text="hello world", role="Boy", style="affectionate")'
- ask: 'A boy says "你好"'
answer: 'text_to_speech(text="hello world", role="Boy", lang="Chinese")'
returns:
type: string
format: base64
- name: text_to_image
description: Create a drawing based on the text.
id: text_to_image.text_to_image
requisite:
- OPENAI_API_KEY
- METAGPT_TEXT_TO_IMAGE_MODEL
- name: text_to_embedding
description: Convert the text into embeddings.
requisite:
- OPENAI_API_KEY
arguments:
text: 'The text used for image conversion. Required.'
size_type: 'Default value: "512x512".'
examples:
- ask: 'Draw a girl'
answer: 'text_to_image(text="Draw a girl", size_type="512x512")'
- ask: 'Draw an apple'
answer: 'text_to_image(text="Draw an apple", size_type="512x512")'
returns:
type: string
format: base64

View file

@ -0,0 +1,88 @@
import ast
import importlib
from metagpt.actions import Action, ActionOutput
from metagpt.learn.skill_loader import Skill
from metagpt.logs import logger
class ArgumentsParingAction(Action):
def __init__(self, options, last_talk: str, skill: Skill, context=None, llm=None, **kwargs):
super(ArgumentsParingAction, self).__init__(options=options, name='', context=context, llm=llm)
self.skill = skill
self.ask = last_talk
self.rsp = None
self.args = None
@property
def prompt(self):
prompt = f"{self.skill.name} function parameters description:\n"
for k, v in self.skill.arguments.items():
prompt += f"parameter `{k}`: {v}\n"
prompt += "\n"
prompt += "Examples:\n"
for e in self.skill.examples:
prompt += f"If want you to do `{e.ask}`, return `{e.answer}` brief and clear.\n"
prompt += f"\nNow I want you to do `{self.ask}`, return in examples format above, brief and clear."
return prompt
async def run(self, *args, **kwargs) -> ActionOutput:
prompt = self.prompt
logger.info(prompt)
rsp = await self.llm.aask(msg=prompt, system_msgs=[])
logger.info(rsp)
self.args = ArgumentsParingAction.parse_arguments(skill_name=self.skill.name, txt=rsp)
self.rsp = ActionOutput(content=rsp)
return self.rsp
@staticmethod
def parse_arguments(skill_name, txt) -> dict:
prefix = skill_name + "("
if prefix not in txt:
logger.error(f"{skill_name} not in {txt}")
return None
if ")" not in txt:
logger.error(f"')' not in {txt}")
return None
begin_ix = txt.find(prefix)
end_ix = txt.rfind(")")
args_txt = txt[begin_ix + len(prefix): end_ix]
logger.info(args_txt)
fake_expression = f"dict({args_txt})"
parsed_expression = ast.parse(fake_expression, mode='eval')
args = {}
for keyword in parsed_expression.body.keywords:
key = keyword.arg
value = ast.literal_eval(keyword.value)
args[key] = value
return args
class SkillAction(Action):
def __init__(self, options, skill: Skill, args: dict, context=None, llm=None, **kwargs):
super(SkillAction, self).__init__(options=options, name='', context=context, llm=llm)
self._skill = skill
self._args = args
self.rsp = None
async def run(self, *args, **kwargs) -> str | ActionOutput | None:
"""Run action"""
self.rsp = self.find_and_call_function(self._skill.name, args=self._args, **self.options)
return ActionOutput(content=self.rsp, instruct_content=self._skill.json())
@staticmethod
def find_and_call_function(function_name, args, **kwargs):
try:
module = importlib.import_module("metagpt.learn")
function = getattr(module, function_name)
# 调用函数并返回结果
result = function(**args, **kwargs)
return result
except (ModuleNotFoundError, AttributeError):
logger.error(f"{function_name} not found")
return None
if __name__ == '__main__':
ArgumentsParingAction.parse_arguments(skill_name="text_to_image",
txt='`text_to_image(text="Draw an apple", size_type="512x512")`')

View file

@ -4,7 +4,7 @@ from metagpt.logs import logger
class TalkAction(Action):
def __init__(self, options, name: str = '', talk='', history_summary='', context=None, llm=None):
def __init__(self, options, name: str = '', talk='', history_summary='', context=None, llm=None, **kwargs):
context = context or {}
context["talk"] = talk
context["history_summery"] = history_summary

View file

@ -5,3 +5,11 @@
@Author : alexanderwu
@File : __init__.py
"""
from metagpt.learn.text_to_image import text_to_image
from metagpt.learn.text_to_speech import text_to_speech
__all__ = [
"text_to_image",
"text_to_speech",
]

View file

@ -1,14 +1,26 @@
from pathlib import Path
from typing import List, Dict
from typing import List, Dict, Optional
import yaml
from pydantic import BaseModel
class Example(BaseModel):
ask: str
answer: str
class Returns(BaseModel):
type: str
format: Optional[str] = None
class Skill(BaseModel):
name: str
description: str
id: str
requisite: List[str]
arguments: Dict
examples: List[Example]
returns: Returns
class EntitySkills(BaseModel):
@ -26,13 +38,26 @@ class SkillLoader:
skills = yaml.safe_load(file)
self._skills = SkillsDeclaration(**skills)
def get_skill_list(self, entity_name: str = "Assistant"):
if not self._skills or entity_name not in self._skills.entities:
def get_skill_list(self, entity_name: str = "Assistant") -> Dict:
entity_skills = self.get_entity(entity_name)
if not entity_skills:
return {}
entity_skills = self._skills.entities.get(entity_name)
description_to_name_mappings = {}
for s in entity_skills.skills:
description_to_name_mappings[s.description] = s.name
return description_to_name_mappings
def get_skill(self, name, entity_name: str = "Assistant") -> Skill:
entity = self.get_entity(entity_name)
if not entity:
return None
for sk in entity.skills:
if sk.name == name:
return sk
def get_entity(self, name) -> EntitySkills:
if not self._skills:
return None
return self._skills.entities.get(name)

View file

@ -16,7 +16,7 @@ from metagpt.utils.common import initialize_environment
@skill_metadata(name="Text to Embedding",
description="Convert the text into embeddings.",
requisite="`OPENAI_API_KEY`")
def text_to_embedding(text, model="text-embedding-ada-002", openai_api_key=""):
def text_to_embedding(text, model="text-embedding-ada-002", openai_api_key="", **kwargs):
"""Text to embedding
:param text: The text used for embedding.

View file

@ -17,7 +17,7 @@ from metagpt.utils.common import initialize_environment
@skill_metadata(name="Text to image",
description="Create a drawing based on the text.",
requisite="`OPENAI_API_KEY` or `METAGPT_TEXT_TO_IMAGE_MODEL`")
def text_to_image(text, size_type: str = "512x512", openai_api_key="", model_url=""):
def text_to_image(text, size_type: str = "512x512", openai_api_key="", model_url="", **kwargs):
"""Text to image
:param text: The text used for image conversion.
@ -27,8 +27,14 @@ def text_to_image(text, size_type: str = "512x512", openai_api_key="", model_url
:return: The image data is returned in Base64 encoding.
"""
initialize_environment()
image_declaration = "data:image/png;base64,"
if os.environ.get("METAGPT_TEXT_TO_IMAGE_MODEL") or model_url:
return oas3_metagpt_text_to_image(text, size_type, model_url)
data = oas3_metagpt_text_to_image(text, size_type, model_url)
return image_declaration + data if data else ""
if os.environ.get("OPENAI_API_KEY") or openai_api_key:
return oas3_openai_text_to_image(text, size_type, openai_api_key)
data = oas3_openai_text_to_image(text, size_type, openai_api_key)
return image_declaration + data if data else ""
raise EnvironmentError

View file

@ -17,7 +17,7 @@ from metagpt.utils.common import initialize_environment
description="Text-to-speech",
requisite="`AZURE_TTS_SUBSCRIPTION_KEY` and `AZURE_TTS_REGION`")
def text_to_speech(text, lang="zh-CN", voice="zh-CN-XiaomoNeural", style="affectionate", role="Girl",
subscription_key="", region=""):
subscription_key="", region="", **kwargs):
"""Text to speech
For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts`
@ -32,8 +32,10 @@ def text_to_speech(text, lang="zh-CN", voice="zh-CN-XiaomoNeural", style="affect
"""
initialize_environment()
audio_declaration = "data:audio/wav;base64,"
if (os.environ.get("AZURE_TTS_SUBSCRIPTION_KEY") and os.environ.get("AZURE_TTS_REGION")) or \
(subscription_key and region):
return oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region)
data = oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region)
return audio_declaration + data if data else data
raise EnvironmentError

View file

@ -35,9 +35,15 @@ class BrainMemory(pydantic.BaseModel):
return "\n".join(texts)
def move_to_solution(self):
while len(self.history) > 1:
msg = self.history.pop()
self.solution.append(msg)
if len(self.history) < 2:
return
msgs = self.history[:-1]
self.solution.extend(msgs)
if not self.history[-1].is_contain(MessageType.Talk.value):
self.solution.append(self.history[-1])
self.history = []
else:
self.history = self.history[-1:]
@property
def last_talk(self):

View file

@ -153,26 +153,10 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
self.rpm = int(self._options.get("RPM", 10))
async def _achat_completion_stream(self, messages: list[dict]) -> str:
max_try = 5
response = None
for i in range(max_try):
try:
response = await openai.ChatCompletion.acreate(
response = await self.async_retry_call(openai.ChatCompletion.acreate,
**self._cons_kwargs(messages),
stream=True
)
break
except openai.error.RateLimitError as e:
random_time = random.uniform(0, 3) # 生成0到5秒之间的随机时间
rounded_time = round(random_time, 1) # 保留一位小数以实现0.1秒的精度
logger.warning(f"Exception:{e}, sleeping for {rounded_time} seconds")
await asyncio.sleep(rounded_time)
continue
except Exception as e:
error_str = traceback.format_exc()
logger.error(f"Exception:{e}, stack:{error_str}")
raise e
# create variables to collect the stream of chunks
collected_chunks = []
collected_messages = []
@ -213,12 +197,12 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
return kwargs
async def _achat_completion(self, messages: list[dict]) -> dict:
rsp = await self.llm.ChatCompletion.acreate(**self._cons_kwargs(messages))
rsp = await self.async_retry_call(self.llm.ChatCompletion.acreate, **self._cons_kwargs(messages))
self._update_costs(rsp.get("usage"))
return rsp
def _chat_completion(self, messages: list[dict]) -> dict:
rsp = self.llm.ChatCompletion.create(**self._cons_kwargs(messages))
rsp = self.retry_call(self.llm.ChatCompletion.create, **self._cons_kwargs(messages))
self._update_costs(rsp)
return rsp
@ -398,4 +382,43 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
if match:
return match.group(1), match.group(2)
else:
return None, input_string
return None, input_string
@staticmethod
async def async_retry_call(func, *args, **kwargs):
for i in range(OpenAIGPTAPI.MAX_TRY):
try:
rsp = await func(*args, **kwargs)
return rsp
except openai.error.RateLimitError as e:
random_time = random.uniform(0, 3) # 生成0到5秒之间的随机时间
rounded_time = round(random_time, 1) # 保留一位小数以实现0.1秒的精度
logger.warning(f"Exception:{e}, sleeping for {rounded_time} seconds")
await asyncio.sleep(rounded_time)
continue
except openai.error.APIConnectionError as e:
logger.warning(f"Exception:{e}")
continue
except Exception as e:
error_str = traceback.format_exc()
logger.error(f"Exception:{e}, stack:{error_str}")
raise e
@staticmethod
def retry_call(func, *args, **kwargs):
for i in range(OpenAIGPTAPI.MAX_TRY):
try:
rsp = func(*args, **kwargs)
return rsp
except openai.error.RateLimitError as e:
logger.warning(f"Exception:{e}")
continue
except openai.error.APIConnectionError as e:
logger.warning(f"Exception:{e}")
continue
except Exception as e:
error_str = traceback.format_exc()
logger.error(f"Exception:{e}, stack:{error_str}")
raise e
MAX_TRY = 5

View file

@ -15,8 +15,8 @@
"""
import asyncio
from metagpt.actions import ActionOutput
from metagpt.actions.skill_action import SkillAction, ArgumentsParingAction
from metagpt.actions.talk_action import TalkAction
from metagpt.config import Config
from metagpt.learn.skill_loader import SkillLoader
@ -53,7 +53,7 @@ class Assistant(Role):
logger.info(prompt)
rsp = await self._llm.aask(prompt, [])
logger.info(rsp)
return await self._plan(rsp)
return await self._plan(rsp, last_talk=last_talk)
async def act(self) -> ActionOutput:
result = await self._rc.todo.run(**self._options)
@ -88,8 +88,18 @@ class Assistant(Role):
return True
async def skill_handler(self, text, **kwargs) -> bool:
skill =
pass
last_talk = kwargs.get("last_talk")
skill = self.skills.get_skill(text)
logger.info(f"skill not found: {text}")
if not skill:
return await self.talk_handler(text=last_talk, **kwargs)
action = ArgumentsParingAction(options=self.options, skill=skill, llm=self._llm, **kwargs)
await action.run(**kwargs)
if action.args is None:
return await self.talk_handler(text=last_talk, **kwargs)
action = SkillAction(options=self.options, skill=skill, args=action.args, llm=self._llm)
self.add_to_do(action)
return True
async def refine_memory(self) -> str:
history_text = self.memory.history_text
@ -97,7 +107,7 @@ class Assistant(Role):
if history_text == "":
return last_talk
history_summary = await self._llm.get_context_title(history_text, max_words=20)
if await self._llm.is_related(last_talk, history_summary): # 合并相关内容
if last_talk and await self._llm.is_related(last_talk, history_summary): # 合并相关内容
last_talk = await self._llm.rewrite(sentence=last_talk, context=history_text)
return last_talk
@ -109,11 +119,20 @@ class Assistant(Role):
from metagpt.provider.openai_api import OpenAIGPTAPI
return OpenAIGPTAPI.extract_info(input_string)
def get_memory(self) -> str:
return self.memory.json()
def load_memory(self, jsn):
try:
self.memory = BrainMemory(**jsn)
except Exception as e:
logger.exception(f"load error:{e}, data:{jsn}")
async def main():
options = Config().runtime_options
cost_manager = CostManager(**options)
topic = "dataiku vs. datarobot"
topic = "draw an apple"
role = Assistant(options=options, cost_manager=cost_manager, language="Chinese")
await role.talk(topic)
while True:
@ -121,8 +140,9 @@ async def main():
if not has_action:
break
msg = await role.act()
print(msg)
logger.info(msg)
# 获取用户终端输入
logger.info("Enter prompt")
talk = input("You: ")
await role.talk(talk)

View file

@ -325,4 +325,12 @@ class Role:
self._actions.append(act)
def add_to_do(self, act):
self._rc.todo = act
self._rc.todo = act
async def think(self) -> bool:
return await self._think()
async def act(self) -> ActionOutput:
msg = await self._act()
return ActionOutput(content=msg.content,
instruct_content=msg.instruct_content)

View file

@ -67,6 +67,9 @@ class Message:
intersection = set(tags) & self.tags
return len(intersection) > 0
def is_contain(self, tag):
return self.is_contain_tags([tag])
@dataclass
class UserMessage(Message):