diff --git a/metagpt/learn/skill_loader.py b/metagpt/learn/skill_loader.py index 1cd83240d..83200bca6 100644 --- a/metagpt/learn/skill_loader.py +++ b/metagpt/learn/skill_loader.py @@ -7,11 +7,13 @@ @Desc : Skill YAML Configuration Loader. """ from pathlib import Path -from typing import List, Dict, Optional +from typing import Dict, List, Optional import yaml from pydantic import BaseModel, Field +from metagpt.config import CONFIG + class Example(BaseModel): ask: str @@ -52,7 +54,7 @@ class SkillLoader: def __init__(self, skill_yaml_file_name: Path = None): if not skill_yaml_file_name: skill_yaml_file_name = Path(__file__).parent.parent.parent / ".well-known/skills.yaml" - with open(str(skill_yaml_file_name), 'r') as file: + with open(str(skill_yaml_file_name), "r") as file: skills = yaml.safe_load(file) self._skills = SkillsDeclaration(**skills) @@ -62,8 +64,18 @@ class SkillLoader: if not entity_skills: return {} + agent_skills = CONFIG.agent_skills + if not agent_skills: + return {} + + class AgentSkill(BaseModel): + name: str + + names = [AgentSkill(**i).name for i in agent_skills] description_to_name_mappings = {} for s in entity_skills.skills: + if s.name not in names: + continue description_to_name_mappings[s.description] = s.name return description_to_name_mappings diff --git a/metagpt/learn/text_to_image.py b/metagpt/learn/text_to_image.py index c5f554ef3..dd85cf617 100644 --- a/metagpt/learn/text_to_image.py +++ b/metagpt/learn/text_to_image.py @@ -33,7 +33,7 @@ async def text_to_image(text, size_type: str = "512x512", openai_api_key="", mod raise openai.error.InvalidRequestError("缺少必要的参数") s3 = S3() - url = await s3.cache(base64_data, BASE64_FORMAT) + url = await s3.cache(data=base64_data, file_ext=".png", format=BASE64_FORMAT) if url: - return url + return f"[{text}]({url})" return image_declaration + base64_data if base64_data else "" diff --git a/metagpt/learn/text_to_speech.py b/metagpt/learn/text_to_speech.py index 7883ae9f3..819da2364 100644 --- a/metagpt/learn/text_to_speech.py +++ b/metagpt/learn/text_to_speech.py @@ -22,7 +22,7 @@ async def text_to_speech( role="Girl", subscription_key="", region="", - **kwargs + **kwargs, ): """Text to speech For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` @@ -41,9 +41,9 @@ async def text_to_speech( if (CONFIG.AZURE_TTS_SUBSCRIPTION_KEY and CONFIG.AZURE_TTS_REGION) or (subscription_key and region): base64_data = await oas3_azsure_tts(text, lang, voice, style, role, subscription_key, region) s3 = S3() - url = await s3.cache(base64_data, BASE64_FORMAT) + url = await s3.cache(data=base64_data, file_ext=".wav", format=BASE64_FORMAT) if url: - return url + return f"[{text}]({url})" return audio_declaration + base64_data if base64_data else base64_data raise openai.error.InvalidRequestError("缺少必要的参数") diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index c08a34f7e..b2a0faca5 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -226,38 +226,46 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter): return CONFIG.max_tokens_rsp return get_max_completion_tokens(messages, self.model, CONFIG.max_tokens_rsp) - async def get_summary(self, text: str, max_words=20): + async def get_summary(self, text: str, max_words=200): + max_token_count = DEFAULT_MAX_TOKENS + max_count = 100 + while max_count > 0: + if len(text) < max_token_count: + return await self._get_summary(text, max_words=max_words) + + padding_size = 20 if max_token_count > 20 else 0 + text_windows = self.split_texts(text, window_size=max_token_count - padding_size) + summaries = [] + for ws in text_windows: + response = await self._get_summary(ws, max_words=max_words) + summaries.append(response) + if len(summaries) == 1: + return summaries[0] + + # Merged and retry + text = "\n".join(summaries) + + max_count -= 1 # safeguard + raise openai.error.InvalidRequestError("text too long") + + async def _get_summary(self, text: str, max_words=20): """Generate text summary""" if len(text) < max_words: return text - language = CONFIG.language or DEFAULT_LANGUAGE - command = f"Translate the above content into a {language} summary of less than {max_words} words." + command = f"Translate the above content into a summary of less than {max_words} words." msg = text + "\n\n" + command logger.info(f"summary ask:{msg}") response = await self.aask(msg=msg, system_msgs=[]) logger.info(f"summary rsp: {response}") return response - async def get_context_title(self, text: str, max_token_count_per_ask=None, max_words=5) -> str: + async def get_context_title(self, text: str, max_words=5) -> str: """Generate text title""" - max_response_token_count = 50 - max_token_count = max_token_count_per_ask or CONFIG.MAX_TOKENS or DEFAULT_MAX_TOKENS - while True: - text_windows = self.split_texts(text, window_size=max_token_count - max_response_token_count) - - summaries = [] - for ws in text_windows: - response = await self.get_summary(ws, max_words=max_response_token_count) - summaries.append(response) - if len(summaries) == 1: - return summaries[0] - text = "\n".join(summaries) - if len(text) <= max_words * 2 and len(text) <= max_token_count: - break + summary = await self.get_summary(text, max_words=500) language = CONFIG.language or DEFAULT_LANGUAGE command = f"Translate the above summary into a {language} title of less than {max_words} words." - summaries.append(command) + summaries = [summary, command] msg = "\n".join(summaries) logger.info(f"title ask:{msg}") response = await self.aask(msg=msg, system_msgs=[]) diff --git a/metagpt/roles/assistant.py b/metagpt/roles/assistant.py index fdd697b59..c707cb6f1 100644 --- a/metagpt/roles/assistant.py +++ b/metagpt/roles/assistant.py @@ -123,7 +123,7 @@ class Assistant(Role): return None if history_text == "": return last_talk - history_summary = await self._llm.get_context_title(history_text, max_token_count_per_ask=1000, max_words=500) + history_summary = await self._llm.get_summary(history_text, max_words=500) if last_talk and await self._llm.is_related(last_talk, history_summary): # Merge relevant content. last_talk = await self._llm.rewrite(sentence=last_talk, context=history_text) return last_talk diff --git a/metagpt/utils/s3.py b/metagpt/utils/s3.py index 85837fedb..74c3f1654 100644 --- a/metagpt/utils/s3.py +++ b/metagpt/utils/s3.py @@ -1,13 +1,15 @@ import base64 +import os.path import traceback import uuid +from pathlib import Path from typing import Optional import aioboto3 import aiofiles from metagpt.config import CONFIG -from metagpt.const import BASE64_FORMAT, WORKSPACE_ROOT +from metagpt.const import BASE64_FORMAT from metagpt.logs import logger @@ -127,19 +129,26 @@ class S3: logger.error(f"Failed to download the file from S3: {e}") raise e - async def cache(self, data: str, format: str = "") -> str: + async def cache(self, data: str, file_ext: str, format: str = "") -> str: """Save data to remote S3 and return url""" - object_name = str(uuid.uuid4()).replace("-", "") - pathname = WORKSPACE_ROOT / "s3_tmp" / object_name + object_name = str(uuid.uuid4()).replace("-", "") + file_ext + path = Path(__file__).parent + pathname = path / object_name try: - async with aiofiles.open(pathname, mode="w") as file: + async with aiofiles.open(str(pathname), mode="wb") as file: if format == BASE64_FORMAT: data = base64.b64decode(data) await file.write(data) bucket = CONFIG.S3.get("bucket") - await self.upload_file(bucket=bucket, local_path=pathname, object_name=object_name) - return await self.get_object_url(bucket=bucket, object_name=object_name) + object_pathname = CONFIG.S3.get("path") or "system" + object_pathname += f"/{object_name}" + object_pathname = os.path.normpath(object_pathname) + await self.upload_file(bucket=bucket, local_path=str(pathname), object_name=object_pathname) + pathname.unlink(missing_ok=True) + + return await self.get_object_url(bucket=bucket, object_name=object_pathname) except Exception as e: logger.exception(f"{e}, stack:{traceback.format_exc()}") + pathname.unlink(missing_ok=True) return None