From c4bd06dcb8c062fca4c8be1948c6fdaa0eff47d3 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Mon, 12 Aug 2024 18:16:16 +0800 Subject: [PATCH 01/30] limit outputs to be only thought and response category --- metagpt/prompts/di/role_zero.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 765825198..ef71f5f85 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -133,7 +133,9 @@ For requests that are unclear, lack sufficient detail, or are outside the system {examples} -Respond with a concise thought, then provide the appropriate response category: QUICK, SEARCH, TASK, or AMBIGUOUS. Your response: +Respond with a concise thought, then provide the appropriate response category: QUICK, SEARCH, TASK, or AMBIGUOUS. +You should **only** provide the thought, response category, and nothing else. +Your response: """ From 7b49e4db381d179568401fc18213bc4a035f6bcf Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Mon, 12 Aug 2024 19:00:50 +0800 Subject: [PATCH 02/30] limit quickthink output --- metagpt/prompts/di/role_zero.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index cc593b434..895424e07 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -149,7 +149,7 @@ For requests that are unclear, lack sufficient detail, or are outside the system {examples} Respond with a concise thought, then provide the appropriate response category: QUICK, SEARCH, TASK, or AMBIGUOUS. -You should **only** provide the thought, response category, and nothing else. +Important: You should **only** provide the thought, response category, and nothing else. Your response: """ From b8f05f582762264361cea2cedc21a92bdf03ba62 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Tue, 13 Aug 2024 16:17:12 +0800 Subject: [PATCH 03/30] =?UTF-8?q?=E5=B0=86quick=20think=20prompt=E6=94=B9?= =?UTF-8?q?=E4=B8=BAsystem=20prompt=EF=BC=8C=E5=87=8F=E5=B0=91request?= =?UTF-8?q?=E7=A6=BB=E7=BB=93=E5=B0=BE=E7=9A=84=E8=B7=9D=E7=A6=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/prompts/di/role_zero.py | 47 ++++++++++++++++++++------------ metagpt/roles/di/data_analyst.py | 2 +- metagpt/roles/di/role_zero.py | 8 +++++- metagpt/roles/di/team_leader.py | 10 ++++--- 4 files changed, 43 insertions(+), 24 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 895424e07..1a340cdef 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -125,32 +125,45 @@ If no issues are detected, the original json data should be returned unchanged. Output the JSON data in a format that can be loaded by the json.loads() function. """ -QUICK_THINK_PROMPT = """ +QUICK_THINK_SYSTEM_PROMPT = """ +Your role is to determine the appropriate response category for the given request. + # Response Categories ## QUICK: -For straightforward questions or requests that can be answered directly. This includes common-sense inquiries, legal or logical questions, basic math, short coding tasks, multiple-choice questions, greetings, casual chat, and inquiries about you or your team. +For straightforward questions or requests that can be answered directly. This includes common-sense inquiries, legal or logical questions, basic math, short coding tasks, multiple-choice questions, greetings, casual chat, schedule planning, and inquiries about you or your team. ## SEARCH For queries that require retrieving up-to-date or detailed information. This includes time-sensitive or location-specific questions like current events or weather. Use this only if the information isn't readily available. ## TASK -For complex requests that involve multiple steps or detailed instructions. Examples include software development, project planning, or any task that requires a sequence of actions. +For complex requests that involve tool utilizations, multiple steps or detailed instructions. Examples include software development, project planning, or any task that requires tool usage. ## AMBIGUOUS For requests that are unclear, lack sufficient detail, or are outside the system's capabilities. Common characteristics of AMBIGUOUS requests: -- Incomplete Information: Requests that imply complex tasks but lack critical details (e.g., "Redesign this logo" without providing the original logo or specifying design requirements). +- Incomplete Information: Requests that imply complex tasks but lack critical details (e.g., "Redesign this logo" without specifying design requirements). - Vagueness: Broad, unspecified, or unclear requests that make it difficult to provide a precise answer. -- Out of Expertise: Requests for specialized advice (e.g., medical or legal advice) or highly technical tasks beyond the model's scope. - Unrealistic Scope: Overly broad requests that are impossible to address meaningfully in a single response (e.g., "Tell me everything about..."). +- Missing files: Requests that refer to specific documents, images, or data without providing them for reference. (when providing a file, website, or data, either the content, link, or path **must** be included) -**Note:** Before categorizing a request as TASK, consider whether the user has provided sufficient information to proceed with the task. If the request is complex but lacks essential details or the mentioned files, it should fall under AMBIGUOUS. +**Note:** Before categorizing a request as TASK: +1. Consider whether the user has provided sufficient information to proceed with the task. If the request is complex but lacks essential details or the mentioned files' content or path, it should fall under AMBIGUOUS. +2. If the request is a "how-to" question that asks for a general approach or strategy, it should be categorized as QUICK. +3. Writing a travel/learning plan or providing a general outline should be categorized as **QUICK* because it doesn't involve detailed instructions or tool usage. {examples} +""" +QUICK_THINK_PROMPT = """ +# Instruction +Determine the previous message's intent. Respond with a concise thought, then provide the appropriate response category: QUICK, SEARCH, TASK, or AMBIGUOUS. -Important: You should **only** provide the thought, response category, and nothing else. -Your response: + +# Format +Thought: [Your thought here] +Response Category: [QUICK/SEARCH/TASK/AMBIGUOUS] + +# Response: """ @@ -166,7 +179,7 @@ Thought: This is a general knowledge question that can be answered concisely. Response Category: QUICK. 3. Request: "Can you help me plan a healthy diet for a week?" -Thought: The user is requesting a simple plan that can be provided immediately. +Thought: Writing a diet plan is a general task that can be answered directly. Response Category: QUICK. 4. Request: "Can you help me find the latest research papers on deep learning?" @@ -178,18 +191,16 @@ Thought: This is a detailed software development task that requires multiple ste Response Category: TASK. 6. Request: "Summarize this document for me." -Thought: The request mentions summarizing a document but doesn't provide the document itself, making it impossible to fulfill. +Thought: The request mentions summarizing a document but doesn't provide the path or content of the document, making it impossible to fulfill. Response Category: AMBIGUOUS. -7. Request: "Optimize this process." +7. Request: "Summarize this document for me '/data/path/docmument.pdf'." +Thought: The request mentions summarizing a document and has provided the path to the document. It can be done by reading the document using a tool then summarizing it. +Response Category: TASK. + +8. Request: "Optimize this process." Thought: The request is vague and lacks specifics, requiring clarification on the process to optimize. Response Category: AMBIGUOUS. -8. Request: "Create a poster for our upcoming event." -Thought: Critical details like event theme, date, and location are missing, making it impossible to complete the task. -Response Category: AMBIGUOUS. - -# Instruction """ - -QUICK_THINK_PROMPT = QUICK_THINK_PROMPT.format(examples=QUICK_THINK_EXAMPLES) \ No newline at end of file +# QUICK_THINK_EXAMPLES = "" \ No newline at end of file diff --git a/metagpt/roles/di/data_analyst.py b/metagpt/roles/di/data_analyst.py index f65042217..78f0b3040 100644 --- a/metagpt/roles/di/data_analyst.py +++ b/metagpt/roles/di/data_analyst.py @@ -30,7 +30,7 @@ class DataAnalyst(RoleZero): instruction: str = ROLE_INSTRUCTION + EXTRA_INSTRUCTION task_type_desc: str = TASK_TYPE_DESC - tools: list[str] = ["Plan", "DataAnalyst", "RoleZero", "Browser"] + tools: list[str] = ["Plan", "DataAnalyst", "RoleZero", "Browser", "Editor:write,read"] custom_tools: list[str] = ["web scraping", "Terminal"] custom_tool_recommender: ToolRecommender = None experience_retriever: Annotated[ExpRetriever, Field(exclude=True)] = KeywordExpRetriever() diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 8d46b9c02..7cdd9bdea 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -21,6 +21,8 @@ from metagpt.prompts.di.role_zero import ( CMD_PROMPT, JSON_REPAIR_PROMPT, QUICK_THINK_PROMPT, + QUICK_THINK_EXAMPLES, + QUICK_THINK_SYSTEM_PROMPT, REGENERATE_PROMPT, ROLE_INSTRUCTION, SYSTEM_PROMPT, @@ -249,6 +251,10 @@ class RoleZero(Role): rsp = await self._act() actions_taken += 1 return rsp # return output from the last action + + def format_quick_system_prompt(self) -> str: + """Format the system prompt for quick thinking.""" + return QUICK_THINK_SYSTEM_PROMPT.format(examples=QUICK_THINK_EXAMPLES) async def _quick_think(self) -> Tuple[Message, str]: answer = "" @@ -260,7 +266,7 @@ class RoleZero(Role): # routing memory = self.get_memories(k=4) # FIXME: A magic number for two rounds of Q&A context = self.llm.format_msg(memory + [UserMessage(content=QUICK_THINK_PROMPT)]) - intent_result = await self.llm.aask(context) + intent_result = await self.llm.aask(context, system_msgs=self.format_quick_system_prompt()) if "QUICK" in intent_result or "AMBIGUOUS " in intent_result: # llm call with the original context async with ThoughtReporter(enable_llm_stream=True) as reporter: diff --git a/metagpt/roles/di/team_leader.py b/metagpt/roles/di/team_leader.py index 353e00620..f934c09f9 100644 --- a/metagpt/roles/di/team_leader.py +++ b/metagpt/roles/di/team_leader.py @@ -26,7 +26,7 @@ class TeamLeader(RoleZero): # TeamLeader only reacts once each time, but may encounter errors or need to ask human, thus allowing 2 more turns max_react_loop: int = 3 - tools: list[str] = ["Plan", "RoleZero", "TeamLeader"] + tools: list[str] = ["Plan", "RoleZero", "TeamLeader", "Editor:write,read"] experience_retriever: Annotated[ExpRetriever, Field(exclude=True)] = SimpleExpRetriever() @@ -48,12 +48,14 @@ class TeamLeader(RoleZero): team_info += f"{role.name}: {role.profile}, {role.goal}\n" return team_info - async def _quick_think(self) -> Message: - # insert team info for quick question - self.llm.system_prompt = QUICK_THINK_SYSTEM_PROMPT.format( + def format_quick_system_prompt(self) -> str: + qt_system_prompt = super().format_quick_system_prompt() + return qt_system_prompt + QUICK_THINK_SYSTEM_PROMPT.format( role_info=super()._get_prefix(), team_info=self._get_team_info(), ) + + async def _quick_think(self) -> Message: return await super()._quick_think() async def _think(self) -> bool: From 3e63ea6fa3f20a98b2c0b0f780063c24eea567ec Mon Sep 17 00:00:00 2001 From: seehi <6580@pm.me> Date: Tue, 13 Aug 2024 16:49:35 +0800 Subject: [PATCH 04/30] update prompt --- metagpt/actions/search_enhanced_qa.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/metagpt/actions/search_enhanced_qa.py b/metagpt/actions/search_enhanced_qa.py index 1d7944d61..fd5e3897b 100644 --- a/metagpt/actions/search_enhanced_qa.py +++ b/metagpt/actions/search_enhanced_qa.py @@ -45,7 +45,9 @@ Follow **Instructions**, generate output and make sure it follows the **Constrai SEARCH_ENHANCED_QA_SYSTEM_PROMPT = """ You are a large language AI assistant built by MGX. You are given a user question, and please write clean, concise and accurate answer to the question. You will be given a set of related contexts to the question, each starting with a reference number like [[citation:x]], where x is a number. Please use the context. -Your answer must be correct, accurate and written by an expert using an unbiased and professional tone. Please limit to 1024 tokens. Do not give any information that is not related to the question, and do not repeat. Say "information is missing on" followed by the related topic, if the given context do not provide sufficient information. Do not include [citation] in your anwser. +Your answer must be correct, accurate and written by an expert using an unbiased and professional tone. Please limit to 1024 tokens. Do not give any information that is not related to the question, and do not repeat. Say "information is missing on" followed by the related topic, if the given context do not provide sufficient information. + +Do not include [citation:x] in your anwser, where x is a number. Other than code and specific names and citations, your answer must be written in the same language as the question. Here are the set of contexts: @@ -93,7 +95,7 @@ class SearchEnhancedQA(Action): @model_validator(mode="after") def initialize(self): if self.web_browse_and_summarize_action is None: - self.web_browser_engine = WebBrowserEngine.from_browser_config( + web_browser_engine = WebBrowserEngine.from_browser_config( self.config.browser, proxy=self.config.proxy, java_script_enabled=self.java_script_enabled, @@ -101,7 +103,7 @@ class SearchEnhancedQA(Action): user_agent=self.user_agent, ) - self.web_browse_and_summarize_action = WebBrowseAndSummarize(web_browser_engine=self.web_browser_engine) + self.web_browse_and_summarize_action = WebBrowseAndSummarize(web_browser_engine=web_browser_engine) return self From 19dee63729f3dca4ea8d5811e90ec723cdee28f1 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Tue, 13 Aug 2024 17:12:01 +0800 Subject: [PATCH 05/30] optimize quick think system prompt --- metagpt/prompts/di/role_zero.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 1a340cdef..dfd8f0684 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -130,13 +130,13 @@ Your role is to determine the appropriate response category for the given reques # Response Categories ## QUICK: -For straightforward questions or requests that can be answered directly. This includes common-sense inquiries, legal or logical questions, basic math, short coding tasks, multiple-choice questions, greetings, casual chat, schedule planning, and inquiries about you or your team. +For straightforward questions or requests that can be answered directly. This includes common-sense inquiries, legal or logical questions, basic math, short coding tasks, multiple-choice questions, greetings, casual chat, daily planning, and inquiries about you or your team. ## SEARCH For queries that require retrieving up-to-date or detailed information. This includes time-sensitive or location-specific questions like current events or weather. Use this only if the information isn't readily available. ## TASK -For complex requests that involve tool utilizations, multiple steps or detailed instructions. Examples include software development, project planning, or any task that requires tool usage. +For requests that involve tool utilizations, computer operations, multiple steps or detailed instructions. Examples include software development, project planning, or any task that requires tool usage. ## AMBIGUOUS For requests that are unclear, lack sufficient detail, or are outside the system's capabilities. Common characteristics of AMBIGUOUS requests: @@ -148,8 +148,7 @@ For requests that are unclear, lack sufficient detail, or are outside the system **Note:** Before categorizing a request as TASK: 1. Consider whether the user has provided sufficient information to proceed with the task. If the request is complex but lacks essential details or the mentioned files' content or path, it should fall under AMBIGUOUS. -2. If the request is a "how-to" question that asks for a general approach or strategy, it should be categorized as QUICK. -3. Writing a travel/learning plan or providing a general outline should be categorized as **QUICK* because it doesn't involve detailed instructions or tool usage. +2. If the request is a "how-to" question that asks for a general plan, approach or strategy, it should be categorized as QUICK. {examples} """ From 8fa8b4b141249828c572c462289769d257ba82c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Tue, 13 Aug 2024 17:35:49 +0800 Subject: [PATCH 06/30] Try common encoding formats when reading a file. --- metagpt/tools/libs/editor.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index c2fdcb859..eed92e96b 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -219,9 +219,25 @@ class Editor(BaseModel): @staticmethod def _read_text(path: Union[str, Path]) -> List[str]: - with open(str(path), "r") as f: - lines = f.readlines() - return lines + encoding_format_list = [ + "utf-8", + "ascii", + "gb2312", + "gbk", + "iso-8859-1", + "cp1252", + "utf-16", + "utf-16-le", + "utf-16-be", + ] + for encoding in encoding_format_list: + try: + with open(str(path), "r", encoding=encoding) as f: + lines = f.readlines() + return lines + except: + pass + return [f"Reading failed: `{path}` cannot be decoded by `{encoding_format_list}`. Please ask a human for help."] @staticmethod async def _read_pdf(path: Union[str, Path]) -> List[str]: From d6fd6f5ca0f79a83df92ecb605218a34a3965607 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Tue, 13 Aug 2024 18:16:51 +0800 Subject: [PATCH 07/30] remove comment --- metagpt/prompts/di/role_zero.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index dfd8f0684..1bd2984a5 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -148,7 +148,7 @@ For requests that are unclear, lack sufficient detail, or are outside the system **Note:** Before categorizing a request as TASK: 1. Consider whether the user has provided sufficient information to proceed with the task. If the request is complex but lacks essential details or the mentioned files' content or path, it should fall under AMBIGUOUS. -2. If the request is a "how-to" question that asks for a general plan, approach or strategy, it should be categorized as QUICK. +2. If the request is a "how-to" question that asks for a general study plan, approach or strategy, it should be categorized as QUICK. {examples} """ @@ -177,8 +177,8 @@ Response Category: QUICK. Thought: This is a general knowledge question that can be answered concisely. Response Category: QUICK. -3. Request: "Can you help me plan a healthy diet for a week?" -Thought: Writing a diet plan is a general task that can be answered directly. +3. Request: "Please help me write a learning plan for Python web crawlers" +Thought: Writing a learning plan is a daily planning task that can be answered directly. Response Category: QUICK. 4. Request: "Can you help me find the latest research papers on deep learning?" @@ -201,5 +201,4 @@ Response Category: TASK. Thought: The request is vague and lacks specifics, requiring clarification on the process to optimize. Response Category: AMBIGUOUS. -""" -# QUICK_THINK_EXAMPLES = "" \ No newline at end of file +""" \ No newline at end of file From 91d5e8491e01d75eede6e23535e8b4ab88c1ac20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Tue, 13 Aug 2024 19:51:38 +0800 Subject: [PATCH 08/30] fix format issuse --- metagpt/tools/libs/editor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index eed92e96b..e314cb15d 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -237,7 +237,7 @@ class Editor(BaseModel): return lines except: pass - return [f"Reading failed: `{path}` cannot be decoded by `{encoding_format_list}`. Please ask a human for help."] + return [f"Reading failed: `{path}` cannot be decoded by `{encoding_format_list}`."] @staticmethod async def _read_pdf(path: Union[str, Path]) -> List[str]: From 2968c181c1fcbca137551d7a79aa0aa4a82d3837 Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Tue, 13 Aug 2024 22:34:12 +0800 Subject: [PATCH 09/30] remove global config --- examples/agent_creator.py | 3 +- metagpt/actions/rebuild_class_view.py | 4 +- metagpt/actions/rebuild_sequence_view.py | 4 +- metagpt/actions/research.py | 5 +-- metagpt/actions/talk_action.py | 3 +- metagpt/config2.py | 19 +++++---- metagpt/configs/browser_config.py | 13 +++++- metagpt/configs/search_config.py | 11 ++++- metagpt/context.py | 4 +- .../environment/minecraft/minecraft_env.py | 4 +- metagpt/exp_pool/decorator.py | 11 +++-- metagpt/exp_pool/manager.py | 6 +-- .../ext/stanford_town/actions/st_action.py | 7 ++-- metagpt/ext/stanford_town/utils/utils.py | 3 +- metagpt/learn/text_to_embedding.py | 6 ++- metagpt/learn/text_to_image.py | 5 ++- metagpt/learn/text_to_speech.py | 7 ++-- metagpt/memory/brain_memory.py | 23 ++++++---- metagpt/rag/factories/embedding.py | 42 ++++++++++--------- metagpt/rag/factories/llm.py | 24 ++++++++--- metagpt/rag/schema.py | 3 +- metagpt/software_company.py | 4 +- metagpt/tools/__init__.py | 27 +++--------- metagpt/tools/libs/gpt_v_generator.py | 6 ++- metagpt/tools/ut_writer.py | 3 +- metagpt/utils/embedding.py | 3 +- metagpt/utils/make_sk_kernel.py | 3 +- metagpt/utils/mermaid.py | 7 +++- metagpt/utils/mmdc_pyppeteer.py | 5 ++- metagpt/utils/repair_llm_raw_output.py | 16 +++++-- .../roles/di/run_swe_agent_for_benchmark.py | 3 +- tests/metagpt/test_document.py | 4 +- tests/metagpt/tools/test_azure_tts.py | 4 +- .../tools/test_metagpt_text_to_image.py | 4 +- tests/metagpt/tools/test_moderation.py | 4 +- .../tools/test_openai_text_to_image.py | 4 +- tests/metagpt/tools/test_ut_writer.py | 4 +- .../utils/test_repair_llm_raw_output.py | 4 +- tests/mock/mock_llm.py | 4 +- 39 files changed, 193 insertions(+), 123 deletions(-) diff --git a/examples/agent_creator.py b/examples/agent_creator.py index bd58840ce..34160d398 100644 --- a/examples/agent_creator.py +++ b/examples/agent_creator.py @@ -6,12 +6,13 @@ Author: garylin2099 import re from metagpt.actions import Action -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.const import METAGPT_ROOT from metagpt.logs import logger from metagpt.roles import Role from metagpt.schema import Message +config = Config.default() EXAMPLE_CODE_FILE = METAGPT_ROOT / "examples/build_customized_agent.py" MULTI_ACTION_AGENT_CODE_EXAMPLE = EXAMPLE_CODE_FILE.read_text() diff --git a/metagpt/actions/rebuild_class_view.py b/metagpt/actions/rebuild_class_view.py index ff030ec87..64f003f91 100644 --- a/metagpt/actions/rebuild_class_view.py +++ b/metagpt/actions/rebuild_class_view.py @@ -14,7 +14,6 @@ from typing import Optional, Set, Tuple import aiofiles from metagpt.actions import Action -from metagpt.config2 import config from metagpt.const import ( AGGREGATION, COMPOSITION, @@ -40,7 +39,7 @@ class RebuildClassView(Action): graph_db: Optional[GraphRepository] = None - async def run(self, with_messages=None, format=config.prompt_schema): + async def run(self, with_messages=None, format=None): """ Implementation of `Action`'s `run` method. @@ -48,6 +47,7 @@ class RebuildClassView(Action): with_messages (Optional[Type]): An optional argument specifying messages to react to. format (str): The format for the prompt schema. """ + format = format if format else self.config.prompt_schema graph_repo_pathname = self.context.git_repo.workdir / GRAPH_REPO_FILE_REPO / self.context.git_repo.workdir.name self.graph_db = await DiGraphRepository.load_from(str(graph_repo_pathname.with_suffix(".json"))) repo_parser = RepoParser(base_directory=Path(self.i_context)) diff --git a/metagpt/actions/rebuild_sequence_view.py b/metagpt/actions/rebuild_sequence_view.py index fd356d58f..e23487511 100644 --- a/metagpt/actions/rebuild_sequence_view.py +++ b/metagpt/actions/rebuild_sequence_view.py @@ -18,7 +18,6 @@ from pydantic import BaseModel from tenacity import retry, stop_after_attempt, wait_random_exponential from metagpt.actions import Action -from metagpt.config2 import config from metagpt.const import GRAPH_REPO_FILE_REPO from metagpt.logs import logger from metagpt.repo_parser import CodeBlockInfo, DotClassInfo @@ -84,7 +83,7 @@ class RebuildSequenceView(Action): graph_db: Optional[GraphRepository] = None - async def run(self, with_messages=None, format=config.prompt_schema): + async def run(self, with_messages=None, format=None): """ Implementation of `Action`'s `run` method. @@ -92,6 +91,7 @@ class RebuildSequenceView(Action): with_messages (Optional[Type]): An optional argument specifying messages to react to. format (str): The format for the prompt schema. """ + format = format if format else self.config.prompt_schema graph_repo_pathname = self.context.git_repo.workdir / GRAPH_REPO_FILE_REPO / self.context.git_repo.workdir.name self.graph_db = await DiGraphRepository.load_from(str(graph_repo_pathname.with_suffix(".json"))) if not self.i_context: diff --git a/metagpt/actions/research.py b/metagpt/actions/research.py index 5e670520c..98edfddb0 100644 --- a/metagpt/actions/research.py +++ b/metagpt/actions/research.py @@ -8,7 +8,6 @@ from typing import Any, Callable, Coroutine, Optional, Union from pydantic import TypeAdapter, model_validator from metagpt.actions import Action -from metagpt.config2 import config from metagpt.logs import logger from metagpt.tools.search_engine import SearchEngine from metagpt.tools.web_browser_engine import WebBrowserEngine @@ -134,8 +133,8 @@ class CollectLinks(Action): if len(remove) == 0: break - model_name = config.llm.model - prompt = reduce_message_length(gen_msg(), model_name, system_text, config.llm.max_token) + model_name = self.config.llm.model + prompt = reduce_message_length(gen_msg(), model_name, system_text, self.config.llm.max_token) logger.debug(prompt) queries = await self._aask(prompt, [system_text]) try: diff --git a/metagpt/actions/talk_action.py b/metagpt/actions/talk_action.py index 81f66f9a1..3fec32783 100644 --- a/metagpt/actions/talk_action.py +++ b/metagpt/actions/talk_action.py @@ -9,7 +9,6 @@ from typing import Optional from metagpt.actions import Action -from metagpt.config2 import config from metagpt.logs import logger from metagpt.schema import Message @@ -26,7 +25,7 @@ class TalkAction(Action): @property def language(self): - return self.context.kwargs.language or config.language + return self.context.kwargs.language or self.config.language @property def prompt(self): diff --git a/metagpt/config2.py b/metagpt/config2.py index 6588a6036..8ed9d3f6b 100644 --- a/metagpt/config2.py +++ b/metagpt/config2.py @@ -97,20 +97,21 @@ class Config(CLIParams, YamlModel): return Config.from_yaml_file(pathname) @classmethod - def default(cls): + def default(cls, reload: bool = False): """Load default config - Priority: env < default_config_paths - Inside default_config_paths, the latter one overwrites the former one """ - default_config_paths: List[Path] = [ + default_config_paths = ( METAGPT_ROOT / "config/config2.yaml", CONFIG_ROOT / "config2.yaml", - ] - - dicts = [dict(os.environ)] - dicts += [Config.read_yaml(path) for path in default_config_paths] - final = merge_dict(dicts) - return Config(**final) + ) + if reload or default_config_paths not in _CONFIG_CACHE: + dicts = [dict(os.environ)] + dicts += [Config.read_yaml(path) for path in default_config_paths] + final = merge_dict(dicts) + _CONFIG_CACHE[default_config_paths] = Config(**final) + return _CONFIG_CACHE[default_config_paths] @classmethod def from_llm_config(cls, llm_config: dict): @@ -160,4 +161,4 @@ def merge_dict(dicts: Iterable[Dict]) -> Dict: return result -config = Config.default() +_CONFIG_CACHE = {} diff --git a/metagpt/configs/browser_config.py b/metagpt/configs/browser_config.py index 2f8024f44..fafbaeeb8 100644 --- a/metagpt/configs/browser_config.py +++ b/metagpt/configs/browser_config.py @@ -5,12 +5,23 @@ @Author : alexanderwu @File : browser_config.py """ +from enum import Enum from typing import Literal -from metagpt.tools import WebBrowserEngineType from metagpt.utils.yaml_model import YamlModel +class WebBrowserEngineType(Enum): + PLAYWRIGHT = "playwright" + SELENIUM = "selenium" + CUSTOM = "custom" + + @classmethod + def __missing__(cls, key): + """Default type conversion""" + return cls.CUSTOM + + class BrowserConfig(YamlModel): """Config for Browser""" diff --git a/metagpt/configs/search_config.py b/metagpt/configs/search_config.py index 7b50fb6d3..2c773b685 100644 --- a/metagpt/configs/search_config.py +++ b/metagpt/configs/search_config.py @@ -5,14 +5,23 @@ @Author : alexanderwu @File : search_config.py """ +from enum import Enum from typing import Callable, Optional from pydantic import ConfigDict, Field -from metagpt.tools import SearchEngineType from metagpt.utils.yaml_model import YamlModel +class SearchEngineType(Enum): + SERPAPI_GOOGLE = "serpapi" + SERPER_GOOGLE = "serper" + DIRECT_GOOGLE = "google" + DUCK_DUCK_GO = "ddg" + CUSTOM_ENGINE = "custom" + BING = "bing" + + class SearchConfig(YamlModel): """Config for Search""" diff --git a/metagpt/context.py b/metagpt/context.py index 384e8da48..0769f78eb 100644 --- a/metagpt/context.py +++ b/metagpt/context.py @@ -10,7 +10,7 @@ from __future__ import annotations import os from typing import Any, Dict, Optional -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field from metagpt.config2 import Config from metagpt.configs.llm_config import LLMConfig, LLMType @@ -61,7 +61,7 @@ class Context(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) kwargs: AttrDict = AttrDict() - config: Config = Config.default() + config: Config = Field(default_factory=Config.default) cost_manager: CostManager = CostManager() diff --git a/metagpt/environment/minecraft/minecraft_env.py b/metagpt/environment/minecraft/minecraft_env.py index 0f39c9ccd..2bf39095c 100644 --- a/metagpt/environment/minecraft/minecraft_env.py +++ b/metagpt/environment/minecraft/minecraft_env.py @@ -11,7 +11,7 @@ from typing import Any, Iterable from llama_index.vector_stores.chroma import ChromaVectorStore from pydantic import ConfigDict, Field -from metagpt.config2 import config as CONFIG +from metagpt.config2 import Config from metagpt.environment.base_env import Environment from metagpt.environment.minecraft.const import MC_CKPT_DIR from metagpt.environment.minecraft.minecraft_ext_env import MinecraftExtEnv @@ -82,7 +82,7 @@ class MinecraftEnv(Environment, MinecraftExtEnv): persist_dir=f"{MC_CKPT_DIR}/skill/vectordb", ) - if CONFIG.resume: + if Config.default().resume: logger.info(f"Loading Action Developer from {MC_CKPT_DIR}/action") self.chest_memory = read_json_file(f"{MC_CKPT_DIR}/action/chest_memory.json") diff --git a/metagpt/exp_pool/decorator.py b/metagpt/exp_pool/decorator.py index 777d55ca9..888e61743 100644 --- a/metagpt/exp_pool/decorator.py +++ b/metagpt/exp_pool/decorator.py @@ -6,7 +6,7 @@ from typing import Any, Callable, Optional, TypeVar from pydantic import BaseModel, ConfigDict, model_validator -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.exp_pool.context_builders import BaseContextBuilder, SimpleContextBuilder from metagpt.exp_pool.manager import ExperienceManager, exp_manager from metagpt.exp_pool.perfect_judges import BasePerfectJudge, SimplePerfectJudge @@ -50,11 +50,14 @@ def exp_cache( """ def decorator(func: Callable[..., ReturnType]) -> Callable[..., ReturnType]: - if not config.exp_pool.enabled: - return func - @functools.wraps(func) async def get_or_create(args: Any, kwargs: Any) -> ReturnType: + config = Config.default() + + if not config.exp_pool.enabled: + rsp = func(*args, **kwargs) + return await rsp if asyncio.iscoroutine(rsp) else rsp + handler = ExpCacheHandler( func=func, args=args, diff --git a/metagpt/exp_pool/manager.py b/metagpt/exp_pool/manager.py index b6ae9c0a3..253d45508 100644 --- a/metagpt/exp_pool/manager.py +++ b/metagpt/exp_pool/manager.py @@ -2,9 +2,9 @@ from typing import TYPE_CHECKING, Any -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field -from metagpt.config2 import Config, config +from metagpt.config2 import Config from metagpt.exp_pool.schema import ( DEFAULT_COLLECTION_NAME, DEFAULT_SIMILARITY_TOP_K, @@ -29,7 +29,7 @@ class ExperienceManager(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) - config: Config = config + config: Config = Field(default_factory=Config.default) _storage: Any = None _vector_store: Any = None diff --git a/metagpt/ext/stanford_town/actions/st_action.py b/metagpt/ext/stanford_town/actions/st_action.py index 321676374..48cda353c 100644 --- a/metagpt/ext/stanford_town/actions/st_action.py +++ b/metagpt/ext/stanford_town/actions/st_action.py @@ -8,7 +8,6 @@ from pathlib import Path from typing import Any, Optional, Union from metagpt.actions.action import Action -from metagpt.config2 import config from metagpt.ext.stanford_town.utils.const import PROMPTS_DIR from metagpt.logs import logger @@ -62,13 +61,13 @@ class STAction(Action): async def _run_gpt35_max_tokens(self, prompt: str, max_tokens: int = 50, retry: int = 3): for idx in range(retry): try: - tmp_max_tokens_rsp = getattr(config.llm, "max_token", 1500) - setattr(config.llm, "max_token", max_tokens) + tmp_max_tokens_rsp = getattr(self.config.llm, "max_token", 1500) + setattr(self.config.llm, "max_token", max_tokens) self.llm.use_system_prompt = False # to make it behave like a non-chat completions llm_resp = await self._aask(prompt) - setattr(config.llm, "max_token", tmp_max_tokens_rsp) + setattr(self.config.llm, "max_token", tmp_max_tokens_rsp) logger.info(f"Action: {self.cls_name} llm _run_gpt35_max_tokens raw resp: {llm_resp}") if self._func_validate(llm_resp, prompt): return self._func_cleanup(llm_resp, prompt) diff --git a/metagpt/ext/stanford_town/utils/utils.py b/metagpt/ext/stanford_town/utils/utils.py index 3aa0e80e8..e09cce8fe 100644 --- a/metagpt/ext/stanford_town/utils/utils.py +++ b/metagpt/ext/stanford_town/utils/utils.py @@ -13,7 +13,7 @@ from typing import Union from openai import OpenAI -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.logs import logger @@ -48,6 +48,7 @@ def read_csv_to_list(curr_file: str, header=False, strip_trail=True): def get_embedding(text, model: str = "text-embedding-ada-002"): + config = Config.default() text = text.replace("\n", " ") if not text: text = "this is blank" diff --git a/metagpt/learn/text_to_embedding.py b/metagpt/learn/text_to_embedding.py index f859ab638..2b4adda80 100644 --- a/metagpt/learn/text_to_embedding.py +++ b/metagpt/learn/text_to_embedding.py @@ -6,12 +6,13 @@ @File : text_to_embedding.py @Desc : Text-to-Embedding skill, which provides text-to-embedding functionality. """ -import metagpt.config2 +from typing import Optional + from metagpt.config2 import Config from metagpt.tools.openai_text_to_embedding import oas3_openai_text_to_embedding -async def text_to_embedding(text, model="text-embedding-ada-002", config: Config = metagpt.config2.config): +async def text_to_embedding(text, model="text-embedding-ada-002", config: Optional[Config] = None): """Text to embedding :param text: The text used for embedding. @@ -19,6 +20,7 @@ async def text_to_embedding(text, model="text-embedding-ada-002", config: Config :param config: OpenAI config with API key, For more details, checkout: `https://platform.openai.com/account/api-keys` :return: A json object of :class:`ResultEmbedding` class if successful, otherwise `{}`. """ + config = config if config else Config.default() openai_api_key = config.get_openai_llm().api_key proxy = config.get_openai_llm().proxy return await oas3_openai_text_to_embedding(text, model=model, openai_api_key=openai_api_key, proxy=proxy) diff --git a/metagpt/learn/text_to_image.py b/metagpt/learn/text_to_image.py index 163859fc0..9bfed532b 100644 --- a/metagpt/learn/text_to_image.py +++ b/metagpt/learn/text_to_image.py @@ -7,8 +7,8 @@ @Desc : Text-to-Image skill, which provides text-to-image functionality. """ import base64 +from typing import Optional -import metagpt.config2 from metagpt.config2 import Config from metagpt.const import BASE64_FORMAT from metagpt.llm import LLM @@ -17,7 +17,7 @@ from metagpt.tools.openai_text_to_image import oas3_openai_text_to_image from metagpt.utils.s3 import S3 -async def text_to_image(text, size_type: str = "512x512", config: Config = metagpt.config2.config): +async def text_to_image(text, size_type: str = "512x512", config: Optional[Config] = None): """Text to image :param text: The text used for image conversion. @@ -25,6 +25,7 @@ async def text_to_image(text, size_type: str = "512x512", config: Config = metag :param config: Config :return: The image data is returned in Base64 encoding. """ + config = config if config else Config.default() image_declaration = "data:image/png;base64," model_url = config.metagpt_tti_url diff --git a/metagpt/learn/text_to_speech.py b/metagpt/learn/text_to_speech.py index 8dbd6d243..9d3dba685 100644 --- a/metagpt/learn/text_to_speech.py +++ b/metagpt/learn/text_to_speech.py @@ -6,7 +6,8 @@ @File : text_to_speech.py @Desc : Text-to-Speech skill, which provides text-to-speech functionality """ -import metagpt.config2 +from typing import Optional + from metagpt.config2 import Config from metagpt.const import BASE64_FORMAT from metagpt.tools.azure_tts import oas3_azsure_tts @@ -20,7 +21,7 @@ async def text_to_speech( voice="zh-CN-XiaomoNeural", style="affectionate", role="Girl", - config: Config = metagpt.config2.config, + config: Optional[Config] = None, ): """Text to speech For more details, check out:`https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts` @@ -38,7 +39,7 @@ async def text_to_speech( :return: Returns the Base64-encoded .wav/.mp3 file data if successful, otherwise an empty string. """ - + config = config if config else Config.default() subscription_key = config.azure_tts_subscription_key region = config.azure_tts_region if subscription_key and region: diff --git a/metagpt/memory/brain_memory.py b/metagpt/memory/brain_memory.py index c58148ead..8c2846d1d 100644 --- a/metagpt/memory/brain_memory.py +++ b/metagpt/memory/brain_memory.py @@ -12,9 +12,9 @@ import json import re from typing import Dict, List, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, field_validator -from metagpt.config2 import config +from metagpt.config2 import Config as _Config from metagpt.const import DEFAULT_MAX_TOKENS, DEFAULT_TOKEN_SIZE from metagpt.logs import logger from metagpt.provider import MetaGPTLLM @@ -32,6 +32,12 @@ class BrainMemory(BaseModel): last_talk: Optional[str] = None cacheable: bool = True llm: Optional[BaseLLM] = Field(default=None, exclude=True) + config: Optional[_Config] = None + + @field_validator("config") + @classmethod + def set_default_config(cls, config): + return config if config else _Config.default() class Config: arbitrary_types_allowed = True @@ -54,9 +60,8 @@ class BrainMemory(BaseModel): texts = [m.content for m in self.knowledge] return "\n".join(texts) - @staticmethod - async def loads(redis_key: str) -> "BrainMemory": - redis = Redis(config.redis) + async def loads(self, redis_key: str) -> "BrainMemory": + redis = Redis(self.config.redis) if not redis_key: return BrainMemory() v = await redis.get(key=redis_key) @@ -70,7 +75,7 @@ class BrainMemory(BaseModel): async def dumps(self, redis_key: str, timeout_sec: int = 30 * 60): if not self.is_dirty: return - redis = Redis(config.redis) + redis = Redis(self.config.redis) if not redis_key: return False v = self.model_dump_json() @@ -140,7 +145,7 @@ class BrainMemory(BaseModel): return text summary = await self._summarize(text=text, max_words=max_words, keep_language=keep_language, limit=limit) if summary: - await self.set_history_summary(history_summary=summary, redis_key=config.redis_key) + await self.set_history_summary(history_summary=summary, redis_key=self.config.redis_key) return summary raise ValueError(f"text too long:{text_length}") @@ -164,7 +169,7 @@ class BrainMemory(BaseModel): msgs.reverse() self.history = msgs self.is_dirty = True - await self.dumps(redis_key=config.redis.key) + await self.dumps(redis_key=self.config.redis.key) self.is_dirty = False return BrainMemory.to_metagpt_history_format(self.history) @@ -181,7 +186,7 @@ class BrainMemory(BaseModel): summary = await self.summarize(llm=llm, max_words=500) - language = config.language + language = self.config.language command = f"Translate the above summary into a {language} title of less than {max_words} words." summaries = [summary, command] msg = "\n".join(summaries) diff --git a/metagpt/rag/factories/embedding.py b/metagpt/rag/factories/embedding.py index 3613fd228..8a9d4bc95 100644 --- a/metagpt/rag/factories/embedding.py +++ b/metagpt/rag/factories/embedding.py @@ -1,7 +1,7 @@ """RAG Embedding Factory.""" from __future__ import annotations -from typing import Any +from typing import Any, Optional from llama_index.core.embeddings import BaseEmbedding from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding @@ -9,7 +9,7 @@ from llama_index.embeddings.gemini import GeminiEmbedding from llama_index.embeddings.ollama import OllamaEmbedding from llama_index.embeddings.openai import OpenAIEmbedding -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.configs.embedding_config import EmbeddingType from metagpt.configs.llm_config import LLMType from metagpt.rag.factories.base import GenericFactory @@ -18,7 +18,7 @@ from metagpt.rag.factories.base import GenericFactory class RAGEmbeddingFactory(GenericFactory): """Create LlamaIndex Embedding with MetaGPT's embedding config.""" - def __init__(self): + def __init__(self, config: Optional[Config] = None): creators = { EmbeddingType.OPENAI: self._create_openai, EmbeddingType.AZURE: self._create_azure, @@ -29,6 +29,7 @@ class RAGEmbeddingFactory(GenericFactory): LLMType.AZURE: self._create_azure, } super().__init__(creators) + self.config = config if self.config else Config.default() def get_rag_embedding(self, key: EmbeddingType = None) -> BaseEmbedding: """Key is EmbeddingType.""" @@ -40,18 +41,18 @@ class RAGEmbeddingFactory(GenericFactory): If the embedding type is not specified, for backward compatibility, it checks if the LLM API type is either OPENAI or AZURE. Raise TypeError if embedding type not found. """ - if config.embedding.api_type: - return config.embedding.api_type + if self.config.embedding.api_type: + return self.config.embedding.api_type - if config.llm.api_type in [LLMType.OPENAI, LLMType.AZURE]: - return config.llm.api_type + if self.config.llm.api_type in [LLMType.OPENAI, LLMType.AZURE]: + return self.config.llm.api_type raise TypeError("To use RAG, please set your embedding in config2.yaml.") def _create_openai(self) -> OpenAIEmbedding: params = dict( - api_key=config.embedding.api_key or config.llm.api_key, - api_base=config.embedding.base_url or config.llm.base_url, + api_key=self.config.embedding.api_key or self.config.llm.api_key, + api_base=self.config.embedding.base_url or self.config.llm.base_url, ) self._try_set_model_and_batch_size(params) @@ -60,9 +61,9 @@ class RAGEmbeddingFactory(GenericFactory): def _create_azure(self) -> AzureOpenAIEmbedding: params = dict( - api_key=config.embedding.api_key or config.llm.api_key, - azure_endpoint=config.embedding.base_url or config.llm.base_url, - api_version=config.embedding.api_version or config.llm.api_version, + api_key=self.config.embedding.api_key or self.config.llm.api_key, + azure_endpoint=self.config.embedding.base_url or self.config.llm.base_url, + api_version=self.config.embedding.api_version or self.config.llm.api_version, ) self._try_set_model_and_batch_size(params) @@ -71,8 +72,8 @@ class RAGEmbeddingFactory(GenericFactory): def _create_gemini(self) -> GeminiEmbedding: params = dict( - api_key=config.embedding.api_key, - api_base=config.embedding.base_url, + api_key=self.config.embedding.api_key, + api_base=self.config.embedding.base_url, ) self._try_set_model_and_batch_size(params) @@ -81,7 +82,7 @@ class RAGEmbeddingFactory(GenericFactory): def _create_ollama(self) -> OllamaEmbedding: params = dict( - base_url=config.embedding.base_url, + base_url=self.config.embedding.base_url, ) self._try_set_model_and_batch_size(params) @@ -90,14 +91,15 @@ class RAGEmbeddingFactory(GenericFactory): def _try_set_model_and_batch_size(self, params: dict): """Set the model_name and embed_batch_size only when they are specified.""" - if config.embedding.model: - params["model_name"] = config.embedding.model + if self.config.embedding.model: + params["model_name"] = self.config.embedding.model - if config.embedding.embed_batch_size: - params["embed_batch_size"] = config.embedding.embed_batch_size + if self.config.embedding.embed_batch_size: + params["embed_batch_size"] = self.config.embedding.embed_batch_size def _raise_for_key(self, key: Any): raise ValueError(f"The embedding type is currently not supported: `{type(key)}`, {key}") -get_rag_embedding = RAGEmbeddingFactory().get_rag_embedding +def get_rag_embedding(key: EmbeddingType = None, config: Optional[Config] = None): + return RAGEmbeddingFactory(config=config).get_rag_embedding(key) diff --git a/metagpt/rag/factories/llm.py b/metagpt/rag/factories/llm.py index 9fd19cab5..5d27cde3a 100644 --- a/metagpt/rag/factories/llm.py +++ b/metagpt/rag/factories/llm.py @@ -10,9 +10,9 @@ from llama_index.core.llms import ( LLMMetadata, ) from llama_index.core.llms.callbacks import llm_completion_callback -from pydantic import Field +from pydantic import Field, model_validator -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.llm import LLM from metagpt.provider.base_llm import BaseLLM from metagpt.utils.async_helper import NestAsyncio @@ -26,9 +26,23 @@ class RAGLLM(CustomLLM): """ model_infer: BaseLLM = Field(..., description="The MetaGPT's LLM.") - context_window: int = TOKEN_MAX.get(config.llm.model, DEFAULT_CONTEXT_WINDOW) - num_output: int = config.llm.max_token - model_name: str = config.llm.model + context_window: int = -1 + num_output: int = -1 + model_name: str = "" + + @model_validator(mode="after") + def update_from_config(self): + config = Config.default() + if self.context_window < 0: + self.context_window = TOKEN_MAX.get(config.llm.model, DEFAULT_CONTEXT_WINDOW) + + if self.num_output < 0: + self.num_output = config.llm.max_token + + if not self.model_name: + self.model_name = config.llm.model + + return self @property def metadata(self) -> LLMMetadata: diff --git a/metagpt/rag/schema.py b/metagpt/rag/schema.py index a8a10f90e..5e97e60c3 100644 --- a/metagpt/rag/schema.py +++ b/metagpt/rag/schema.py @@ -10,7 +10,7 @@ from llama_index.core.schema import TextNode from llama_index.core.vector_stores.types import VectorStoreQueryMode from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, model_validator -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.configs.embedding_config import EmbeddingType from metagpt.logs import logger from metagpt.rag.interface import RAGObject @@ -45,6 +45,7 @@ class FAISSRetrieverConfig(IndexRetrieverConfig): @model_validator(mode="after") def check_dimensions(self): if self.dimensions == 0: + config = Config.default() self.dimensions = config.embedding.dimensions or self._embedding_type_to_dimensions.get( config.embedding.api_type, 1536 ) diff --git a/metagpt/software_company.py b/metagpt/software_company.py index 2ea16f55f..f74b61191 100644 --- a/metagpt/software_company.py +++ b/metagpt/software_company.py @@ -27,7 +27,7 @@ def generate_repo( recover_path=None, ): """Run the startup logic. Can be called from CLI or other Python scripts.""" - from metagpt.config2 import config + from metagpt.config2 import Config from metagpt.context import Context from metagpt.roles import ( Architect, @@ -38,6 +38,8 @@ def generate_repo( ) from metagpt.team import Team + config = Config.default() + config.update_via_cli(project_path, project_name, inc, reqa_file, max_auto_summarize_code) ctx = Context(config=config) diff --git a/metagpt/tools/__init__.py b/metagpt/tools/__init__.py index 35fa04658..2027dbb1d 100644 --- a/metagpt/tools/__init__.py +++ b/metagpt/tools/__init__.py @@ -6,33 +6,18 @@ @File : __init__.py """ -from enum import Enum from metagpt.tools import libs # this registers all tools from metagpt.tools.tool_registry import TOOL_REGISTRY +from metagpt.configs.search_config import SearchEngineType +from metagpt.configs.browser_config import WebBrowserEngineType + _ = libs, TOOL_REGISTRY # Avoid pre-commit error -class SearchEngineType(Enum): - SERPAPI_GOOGLE = "serpapi" - SERPER_GOOGLE = "serper" - DIRECT_GOOGLE = "google" - DUCK_DUCK_GO = "ddg" - CUSTOM_ENGINE = "custom" - BING = "bing" - - -class WebBrowserEngineType(Enum): - PLAYWRIGHT = "playwright" - SELENIUM = "selenium" - CUSTOM = "custom" - - @classmethod - def __missing__(cls, key): - """Default type conversion""" - return cls.CUSTOM - - class SearchInterface: async def asearch(self, *args, **kwargs): ... + + +__all__ = ["SearchEngineType", "WebBrowserEngineType", "TOOL_REGISTRY"] diff --git a/metagpt/tools/libs/gpt_v_generator.py b/metagpt/tools/libs/gpt_v_generator.py index baedc3d61..66c023766 100644 --- a/metagpt/tools/libs/gpt_v_generator.py +++ b/metagpt/tools/libs/gpt_v_generator.py @@ -7,7 +7,9 @@ """ import re from pathlib import Path +from typing import Optional +from metagpt.config2 import Config from metagpt.const import DEFAULT_WORKSPACE_ROOT from metagpt.logs import logger from metagpt.tools.tool_registry import register_tool @@ -36,11 +38,11 @@ class GPTvGenerator: It utilizes a vision model to analyze the layout from an image and generate webpage codes accordingly. """ - def __init__(self): + def __init__(self, config: Optional[Config]): """Initialize GPTvGenerator class with default values from the configuration.""" - from metagpt.config2 import config from metagpt.llm import LLM + config = config if config else Config.default() self.llm = LLM(llm_config=config.get_openai_llm()) self.llm.model = "gpt-4-vision-preview" diff --git a/metagpt/tools/ut_writer.py b/metagpt/tools/ut_writer.py index 243871aff..9e67a3585 100644 --- a/metagpt/tools/ut_writer.py +++ b/metagpt/tools/ut_writer.py @@ -4,7 +4,7 @@ import json from pathlib import Path -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.provider.openai_api import OpenAILLM as GPTAPI from metagpt.utils.common import awrite @@ -282,6 +282,7 @@ class UTGenerator: """Choose based on different calling methods""" result = "" if self.chatgpt_method == "API": + config = Config.default() result = await GPTAPI(config.get_openai_llm()).aask_code(messages=messages) return result diff --git a/metagpt/utils/embedding.py b/metagpt/utils/embedding.py index 3d53a314c..3fcf1f25b 100644 --- a/metagpt/utils/embedding.py +++ b/metagpt/utils/embedding.py @@ -7,10 +7,11 @@ """ from llama_index.embeddings.openai import OpenAIEmbedding -from metagpt.config2 import config +from metagpt.config2 import Config def get_embedding() -> OpenAIEmbedding: + config = Config.default() llm = config.get_openai_llm() if llm is None: raise ValueError("To use OpenAIEmbedding, please ensure that config.llm.api_type is correctly set to 'openai'.") diff --git a/metagpt/utils/make_sk_kernel.py b/metagpt/utils/make_sk_kernel.py index 283a682d6..f0c55b07c 100644 --- a/metagpt/utils/make_sk_kernel.py +++ b/metagpt/utils/make_sk_kernel.py @@ -13,10 +13,11 @@ from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion impo OpenAIChatCompletion, ) -from metagpt.config2 import config +from metagpt.config2 import Config def make_sk_kernel(): + config = Config.default() kernel = sk.Kernel() if llm := config.get_azure_llm(): kernel.add_chat_service( diff --git a/metagpt/utils/mermaid.py b/metagpt/utils/mermaid.py index ba33b8d61..d87ae4f83 100644 --- a/metagpt/utils/mermaid.py +++ b/metagpt/utils/mermaid.py @@ -9,12 +9,14 @@ import asyncio import os from pathlib import Path -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.logs import logger from metagpt.utils.common import awrite, check_cmd_exists -async def mermaid_to_file(engine, mermaid_code, output_file_without_suffix, width=2048, height=2048) -> int: +async def mermaid_to_file( + engine, mermaid_code, output_file_without_suffix, width=2048, height=2048, config=None +) -> int: """suffix: png/svg/pdf :param mermaid_code: mermaid code @@ -24,6 +26,7 @@ async def mermaid_to_file(engine, mermaid_code, output_file_without_suffix, widt :return: 0 if succeed, -1 if failed """ # Write the Mermaid code to a temporary file + config = config if config else Config.default() dir_name = os.path.dirname(output_file_without_suffix) if dir_name and not os.path.exists(dir_name): os.makedirs(dir_name) diff --git a/metagpt/utils/mmdc_pyppeteer.py b/metagpt/utils/mmdc_pyppeteer.py index f029325f1..4e30ee538 100644 --- a/metagpt/utils/mmdc_pyppeteer.py +++ b/metagpt/utils/mmdc_pyppeteer.py @@ -10,11 +10,11 @@ from urllib.parse import urljoin from pyppeteer import launch -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.logs import logger -async def mermaid_to_file(mermaid_code, output_file_without_suffix, width=2048, height=2048) -> int: +async def mermaid_to_file(mermaid_code, output_file_without_suffix, width=2048, height=2048, config=None) -> int: """ Converts the given Mermaid code to various output formats and saves them to files. @@ -27,6 +27,7 @@ async def mermaid_to_file(mermaid_code, output_file_without_suffix, width=2048, Returns: int: Returns 1 if the conversion and saving were successful, -1 otherwise. """ + config = config if config else Config.default() suffixes = ["png", "svg", "pdf"] __dirname = os.path.dirname(os.path.abspath(__file__)) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index 68fa73108..5c57693f7 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -4,12 +4,12 @@ import copy from enum import Enum -from typing import Callable, Union +from typing import Callable, Optional, Union import regex as re from tenacity import RetryCallState, retry, stop_after_attempt, wait_fixed -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.logs import logger from metagpt.utils.custom_decoder import CustomDecoder @@ -154,7 +154,9 @@ def _repair_llm_raw_output(output: str, req_key: str, repair_type: RepairType = return output -def repair_llm_raw_output(output: str, req_keys: list[str], repair_type: RepairType = None) -> str: +def repair_llm_raw_output( + output: str, req_keys: list[str], repair_type: RepairType = None, config: Optional[Config] = None +) -> str: """ in open-source llm model, it usually can't follow the instruction well, the output may be incomplete, so here we try to repair it and use all repair methods by default. @@ -169,6 +171,7 @@ def repair_llm_raw_output(output: str, req_keys: list[str], repair_type: RepairT target: { xxx } output: { xxx }] """ + config = config if config else Config.default() if not config.repair_llm_output: return output @@ -256,6 +259,7 @@ def run_after_exp_and_passon_next_retry(logger: "loguru.Logger") -> Callable[["R "next_action":"None" } """ + config = Config.default() if retry_state.outcome.failed: if retry_state.args: # # can't be used as args=retry_state.args @@ -276,8 +280,12 @@ def run_after_exp_and_passon_next_retry(logger: "loguru.Logger") -> Callable[["R return run_and_passon +def repair_stop_after_attempt(retry_state): + return stop_after_attempt(3 if Config.default().repair_llm_output else 0)(retry_state) + + @retry( - stop=stop_after_attempt(3 if config.repair_llm_output else 0), + stop=repair_stop_after_attempt, wait=wait_fixed(1), after=run_after_exp_and_passon_next_retry(logger), ) diff --git a/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py b/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py index e2aa3d17f..207521c97 100644 --- a/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py +++ b/tests/metagpt/roles/di/run_swe_agent_for_benchmark.py @@ -2,13 +2,14 @@ import asyncio import json from datetime import datetime -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.const import DEFAULT_WORKSPACE_ROOT, METAGPT_ROOT from metagpt.logs import logger from metagpt.roles.di.swe_agent import SWEAgent from metagpt.tools.libs.terminal import Terminal from metagpt.tools.swe_agent_commands.swe_agent_utils import load_hf_dataset +config = Config.default() # Specify by yourself TEST_REPO_DIR = METAGPT_ROOT / "data" / "test_repo" DATA_DIR = METAGPT_ROOT / "data/hugging_face" diff --git a/tests/metagpt/test_document.py b/tests/metagpt/test_document.py index 9c076f4e6..29393bb13 100644 --- a/tests/metagpt/test_document.py +++ b/tests/metagpt/test_document.py @@ -5,10 +5,12 @@ @Author : alexanderwu @File : test_document.py """ -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.document import Repo from metagpt.logs import logger +config = Config.default() + def set_existing_repo(path): repo1 = Repo.from_path(path) diff --git a/tests/metagpt/tools/test_azure_tts.py b/tests/metagpt/tools/test_azure_tts.py index f72b5663b..ee55616d2 100644 --- a/tests/metagpt/tools/test_azure_tts.py +++ b/tests/metagpt/tools/test_azure_tts.py @@ -12,9 +12,11 @@ from pathlib import Path import pytest from azure.cognitiveservices.speech import ResultReason, SpeechSynthesizer -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.tools.azure_tts import AzureTTS +config = Config.default() + @pytest.mark.asyncio async def test_azure_tts(mocker): diff --git a/tests/metagpt/tools/test_metagpt_text_to_image.py b/tests/metagpt/tools/test_metagpt_text_to_image.py index d3797a460..bd0fcaf8b 100644 --- a/tests/metagpt/tools/test_metagpt_text_to_image.py +++ b/tests/metagpt/tools/test_metagpt_text_to_image.py @@ -10,9 +10,11 @@ from unittest.mock import AsyncMock import pytest -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.tools.metagpt_text_to_image import oas3_metagpt_text_to_image +config = Config.default() + @pytest.mark.asyncio async def test_draw(mocker): diff --git a/tests/metagpt/tools/test_moderation.py b/tests/metagpt/tools/test_moderation.py index 8dc9e9d5e..0f921887f 100644 --- a/tests/metagpt/tools/test_moderation.py +++ b/tests/metagpt/tools/test_moderation.py @@ -8,10 +8,12 @@ import pytest -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.llm import LLM from metagpt.tools.moderation import Moderation +config = Config.default() + @pytest.mark.asyncio @pytest.mark.parametrize( diff --git a/tests/metagpt/tools/test_openai_text_to_image.py b/tests/metagpt/tools/test_openai_text_to_image.py index 3f9169ddd..4856342d1 100644 --- a/tests/metagpt/tools/test_openai_text_to_image.py +++ b/tests/metagpt/tools/test_openai_text_to_image.py @@ -11,7 +11,7 @@ import openai import pytest from pydantic import BaseModel -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.llm import LLM from metagpt.tools.openai_text_to_image import ( OpenAIText2Image, @@ -19,6 +19,8 @@ from metagpt.tools.openai_text_to_image import ( ) from metagpt.utils.s3 import S3 +config = Config.default() + @pytest.mark.asyncio async def test_draw(mocker): diff --git a/tests/metagpt/tools/test_ut_writer.py b/tests/metagpt/tools/test_ut_writer.py index 3cc7e86bb..3ebbe6d9d 100644 --- a/tests/metagpt/tools/test_ut_writer.py +++ b/tests/metagpt/tools/test_ut_writer.py @@ -20,10 +20,12 @@ from openai.types.chat.chat_completion_message_tool_call import ( Function, ) -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.const import API_QUESTIONS_PATH, UT_PY_PATH from metagpt.tools.ut_writer import YFT_PROMPT_PREFIX, UTGenerator +config = Config.default() + class TestUTWriter: @pytest.mark.asyncio diff --git a/tests/metagpt/utils/test_repair_llm_raw_output.py b/tests/metagpt/utils/test_repair_llm_raw_output.py index 7a29ea3ee..75bd9f165 100644 --- a/tests/metagpt/utils/test_repair_llm_raw_output.py +++ b/tests/metagpt/utils/test_repair_llm_raw_output.py @@ -2,7 +2,9 @@ # -*- coding: utf-8 -*- # @Desc : unittest of repair_llm_raw_output -from metagpt.config2 import config +from metagpt.config2 import Config + +config = Config.default() """ CONFIG.repair_llm_output should be True before retry_parse_json_text imported. diff --git a/tests/mock/mock_llm.py b/tests/mock/mock_llm.py index 168125448..fdbf86825 100644 --- a/tests/mock/mock_llm.py +++ b/tests/mock/mock_llm.py @@ -1,7 +1,7 @@ import json from typing import Optional, Union -from metagpt.config2 import config +from metagpt.config2 import Config from metagpt.configs.llm_config import LLMType from metagpt.const import LLM_API_TIMEOUT from metagpt.logs import logger @@ -10,6 +10,8 @@ from metagpt.provider.constant import GENERAL_FUNCTION_SCHEMA from metagpt.provider.openai_api import OpenAILLM from metagpt.schema import Message +config = Config.default() + OriginalLLM = OpenAILLM if config.llm.api_type == LLMType.OPENAI else AzureOpenAILLM From a801a5cc3295cd963a93a5f185d7535f149619cc Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Tue, 13 Aug 2024 22:46:03 +0800 Subject: [PATCH 10/30] replace exp_manager by get_exp_manager --- examples/exp_pool/decorator.py | 4 ++-- metagpt/exp_pool/__init__.py | 4 ++-- metagpt/exp_pool/decorator.py | 4 ++-- metagpt/exp_pool/manager.py | 9 ++++++++- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/examples/exp_pool/decorator.py b/examples/exp_pool/decorator.py index d25949e8d..8ee00905d 100644 --- a/examples/exp_pool/decorator.py +++ b/examples/exp_pool/decorator.py @@ -5,7 +5,7 @@ This script demonstrates how to automatically store experiences using @exp_cache import asyncio import uuid -from metagpt.exp_pool import exp_cache, exp_manager +from metagpt.exp_pool import exp_cache, get_exp_manager from metagpt.logs import logger @@ -20,7 +20,7 @@ async def main(): resp = await produce(req=req) logger.info(f"The response of `produce({req})` is: {resp}") - exps = await exp_manager.query_exps(req) + exps = await get_exp_manager().query_exps(req) logger.info(f"Find experiences: {exps}") diff --git a/metagpt/exp_pool/__init__.py b/metagpt/exp_pool/__init__.py index aeeb94b38..97d45a278 100644 --- a/metagpt/exp_pool/__init__.py +++ b/metagpt/exp_pool/__init__.py @@ -1,6 +1,6 @@ """Experience pool init.""" -from metagpt.exp_pool.manager import exp_manager +from metagpt.exp_pool.manager import get_exp_manager from metagpt.exp_pool.decorator import exp_cache -__all__ = ["exp_manager", "exp_cache"] +__all__ = ["get_exp_manager", "exp_cache"] diff --git a/metagpt/exp_pool/decorator.py b/metagpt/exp_pool/decorator.py index 888e61743..ed5f5e068 100644 --- a/metagpt/exp_pool/decorator.py +++ b/metagpt/exp_pool/decorator.py @@ -8,7 +8,7 @@ from pydantic import BaseModel, ConfigDict, model_validator from metagpt.config2 import Config from metagpt.exp_pool.context_builders import BaseContextBuilder, SimpleContextBuilder -from metagpt.exp_pool.manager import ExperienceManager, exp_manager +from metagpt.exp_pool.manager import ExperienceManager, get_exp_manager from metagpt.exp_pool.perfect_judges import BasePerfectJudge, SimplePerfectJudge from metagpt.exp_pool.schema import Experience, Metric, QueryType, Score from metagpt.exp_pool.scorers import BaseScorer, SimpleScorer @@ -117,7 +117,7 @@ class ExpCacheHandler(BaseModel): self._validate_params() - self.exp_manager = self.exp_manager or exp_manager + self.exp_manager = self.exp_manager or get_exp_manager() self.exp_scorer = self.exp_scorer or SimpleScorer() self.exp_perfect_judge = self.exp_perfect_judge or SimplePerfectJudge() self.context_builder = self.context_builder or SimpleContextBuilder() diff --git a/metagpt/exp_pool/manager.py b/metagpt/exp_pool/manager.py index 253d45508..5f4d71edc 100644 --- a/metagpt/exp_pool/manager.py +++ b/metagpt/exp_pool/manager.py @@ -113,4 +113,11 @@ class ExperienceManager(BaseModel): return self.vector_store._collection.count() -exp_manager = ExperienceManager() +_exp_manager = None + + +def get_exp_manager(): + global _exp_manager + if _exp_manager is None: + _exp_manager = ExperienceManager() + return _exp_manager From f524bbceb8eade57860464ac33ff3a89bfda7126 Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Tue, 13 Aug 2024 22:49:33 +0800 Subject: [PATCH 11/30] use default_factory for SearchEnhancedQA.collect_links_action --- metagpt/actions/search_enhanced_qa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/actions/search_enhanced_qa.py b/metagpt/actions/search_enhanced_qa.py index 152e615b6..4d1393ab3 100644 --- a/metagpt/actions/search_enhanced_qa.py +++ b/metagpt/actions/search_enhanced_qa.py @@ -62,7 +62,7 @@ class SearchEnhancedQA(Action): desc: str = "Integrating search engine results to anwser the question." collect_links_action: CollectLinks = Field( - default=CollectLinks(), description="Action to collect relevant links from a search engine." + default_factory=CollectLinks, description="Action to collect relevant links from a search engine." ) web_browse_and_summarize_action: WebBrowseAndSummarize = Field( default=None, From d4d581f3914b10945122e4f1503b209678301536 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Wed, 14 Aug 2024 09:58:36 +0800 Subject: [PATCH 12/30] tl remove editor tool --- metagpt/roles/di/team_leader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/roles/di/team_leader.py b/metagpt/roles/di/team_leader.py index f934c09f9..8a2584905 100644 --- a/metagpt/roles/di/team_leader.py +++ b/metagpt/roles/di/team_leader.py @@ -26,7 +26,7 @@ class TeamLeader(RoleZero): # TeamLeader only reacts once each time, but may encounter errors or need to ask human, thus allowing 2 more turns max_react_loop: int = 3 - tools: list[str] = ["Plan", "RoleZero", "TeamLeader", "Editor:write,read"] + tools: list[str] = ["Plan", "RoleZero", "TeamLeader"] experience_retriever: Annotated[ExpRetriever, Field(exclude=True)] = SimpleExpRetriever() From f14bbf9fc5f8d6547abfa4b0eb17b48dcde69aad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Wed, 14 Aug 2024 10:14:38 +0800 Subject: [PATCH 13/30] =?UTF-8?q?=E4=BD=BF=E7=94=A8=20comon.aread=20?= =?UTF-8?q?=E8=AF=BB=E5=8F=96=E6=9C=AA=E7=9F=A5=E7=BC=96=E7=A0=81=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F=E7=9A=84=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/tools/libs/editor.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index e314cb15d..f1eb9d933 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -10,7 +10,7 @@ from pydantic import BaseModel, ConfigDict from metagpt.logs import logger from metagpt.tools.tool_registry import register_tool from metagpt.utils import read_docx -from metagpt.utils.common import aread_bin, awrite_bin +from metagpt.utils.common import aread, aread_bin, awrite_bin from metagpt.utils.repo_to_markdown import is_text_file from metagpt.utils.report import EditorReporter @@ -48,7 +48,7 @@ class Editor(BaseModel): """Read the whole content of a file. Using absolute paths as the argument for specifying the file location.""" is_text, mime_type = await is_text_file(path) if is_text: - lines = self._read_text(path) + lines = await self._read_text(path) elif mime_type == "application/pdf": lines = await self._read_pdf(path) elif mime_type in { @@ -218,26 +218,10 @@ class Editor(BaseModel): return lint_passed, lint_message @staticmethod - def _read_text(path: Union[str, Path]) -> List[str]: - encoding_format_list = [ - "utf-8", - "ascii", - "gb2312", - "gbk", - "iso-8859-1", - "cp1252", - "utf-16", - "utf-16-le", - "utf-16-be", - ] - for encoding in encoding_format_list: - try: - with open(str(path), "r", encoding=encoding) as f: - lines = f.readlines() - return lines - except: - pass - return [f"Reading failed: `{path}` cannot be decoded by `{encoding_format_list}`."] + async def _read_text(path: Union[str, Path]) -> List[str]: + content = await aread(path) + lines = content.split("\n") + return lines @staticmethod async def _read_pdf(path: Union[str, Path]) -> List[str]: From f8c690804c4de2763a0e47ee20053e70060c5a3d Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Wed, 14 Aug 2024 10:16:28 +0800 Subject: [PATCH 14/30] =?UTF-8?q?role=20zero=E7=9A=84quick=20think=20syste?= =?UTF-8?q?m=20prompt=E5=A2=9E=E5=8A=A0role=20info?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/prompts/di/role_zero.py | 2 ++ metagpt/roles/di/role_zero.py | 2 +- metagpt/roles/di/team_leader.py | 10 +++++++--- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 1bd2984a5..71625c780 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -151,6 +151,8 @@ For requests that are unclear, lack sufficient detail, or are outside the system 2. If the request is a "how-to" question that asks for a general study plan, approach or strategy, it should be categorized as QUICK. {examples} + +{role_info} """ QUICK_THINK_PROMPT = """ diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 7cdd9bdea..f5278bda7 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -254,7 +254,7 @@ class RoleZero(Role): def format_quick_system_prompt(self) -> str: """Format the system prompt for quick thinking.""" - return QUICK_THINK_SYSTEM_PROMPT.format(examples=QUICK_THINK_EXAMPLES) + return QUICK_THINK_SYSTEM_PROMPT.format(examples=QUICK_THINK_EXAMPLES, role_info=super()._get_prefix()) async def _quick_think(self) -> Tuple[Message, str]: answer = "" diff --git a/metagpt/roles/di/team_leader.py b/metagpt/roles/di/team_leader.py index 8a2584905..a10b61843 100644 --- a/metagpt/roles/di/team_leader.py +++ b/metagpt/roles/di/team_leader.py @@ -49,13 +49,17 @@ class TeamLeader(RoleZero): return team_info def format_quick_system_prompt(self) -> str: - qt_system_prompt = super().format_quick_system_prompt() - return qt_system_prompt + QUICK_THINK_SYSTEM_PROMPT.format( - role_info=super()._get_prefix(), + quick_system_prompt = super().format_quick_system_prompt() + return quick_system_prompt + QUICK_THINK_SYSTEM_PROMPT.format( + role_info="", # rolezero's quick think system prompt will include role_info team_info=self._get_team_info(), ) async def _quick_think(self) -> Message: + self.llm.system_prompt = QUICK_THINK_SYSTEM_PROMPT.format( + role_info=self._get_role_info(), + team_info=self._get_team_info(), + ) return await super()._quick_think() async def _think(self) -> bool: From 27a5288596e5de5ccd6444bd93674d55ebe173b7 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Wed, 14 Aug 2024 10:22:17 +0800 Subject: [PATCH 15/30] use get_prefix --- metagpt/roles/di/team_leader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/roles/di/team_leader.py b/metagpt/roles/di/team_leader.py index a10b61843..0a8fd7acb 100644 --- a/metagpt/roles/di/team_leader.py +++ b/metagpt/roles/di/team_leader.py @@ -57,7 +57,7 @@ class TeamLeader(RoleZero): async def _quick_think(self) -> Message: self.llm.system_prompt = QUICK_THINK_SYSTEM_PROMPT.format( - role_info=self._get_role_info(), + role_info=super()._get_prefix(), team_info=self._get_team_info(), ) return await super()._quick_think() From 1dcb7984c555a700a737c1b7f3c65e65ad81a0b0 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Wed, 14 Aug 2024 10:54:37 +0800 Subject: [PATCH 16/30] make prompt more generalized --- metagpt/prompts/di/role_zero.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 0af2b92d7..b1165473f 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -148,7 +148,7 @@ For requests that are unclear, lack sufficient detail, or are outside the system **Note:** Before categorizing a request as TASK: 1. Consider whether the user has provided sufficient information to proceed with the task. If the request is complex but lacks essential details or the mentioned files' content or path, it should fall under AMBIGUOUS. -2. If the request is a "how-to" question that asks for a general study plan, approach or strategy, it should be categorized as QUICK. +2. If the request is a "how-to" question that asks for a general plan, approach or strategy, it should be categorized as QUICK. {examples} From b6cf6048ec944b418698e2155c4b16bf0d53d4ba Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Wed, 14 Aug 2024 11:06:31 +0800 Subject: [PATCH 17/30] =?UTF-8?q?=E4=B8=8D=E4=BD=BF=E7=94=A8SEARCH?= =?UTF-8?q?=EF=BC=8C=E5=A6=82=E6=9E=9C=E6=96=87=E4=BB=B6=E5=B7=B2=E6=8F=90?= =?UTF-8?q?=E4=BE=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/prompts/di/role_zero.py | 1 + 1 file changed, 1 insertion(+) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index b1165473f..0060ac7ec 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -134,6 +134,7 @@ For straightforward questions or requests that can be answered directly. This in ## SEARCH For queries that require retrieving up-to-date or detailed information. This includes time-sensitive or location-specific questions like current events or weather. Use this only if the information isn't readily available. +If a file or link is provided, you don't need to search for additional information. ## TASK For requests that involve tool utilizations, computer operations, multiple steps or detailed instructions. Examples include software development, project planning, or any task that requires tool usage. From 0315a218bec040b7f9a1fdd4ceaaff1b4aa92a02 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Wed, 14 Aug 2024 11:50:20 +0800 Subject: [PATCH 18/30] =?UTF-8?q?=E5=A2=9E=E5=8A=A0TL=5FINFO=EF=BC=8C?= =?UTF-8?q?=E6=9B=B4=E6=94=B9TL=E7=9A=84=5Fget=5Fprefix,=E4=B8=8D=E7=94=A8?= =?UTF-8?q?=E9=A2=9D=E5=A4=96=E7=BB=B4=E6=8A=A4TL=E7=9A=84SYSTEM=5FPROMPT?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/prompts/di/role_zero.py | 3 +-- metagpt/prompts/di/team_leader.py | 6 ++++++ metagpt/roles/di/role_zero.py | 2 +- metagpt/roles/di/team_leader.py | 15 +++++++-------- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 0060ac7ec..b7b32b7f3 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -126,6 +126,7 @@ Output the JSON data in a format that can be loaded by the json.loads() function """ QUICK_THINK_SYSTEM_PROMPT = """ +{role_info} Your role is to determine the appropriate response category for the given request. # Response Categories @@ -152,8 +153,6 @@ For requests that are unclear, lack sufficient detail, or are outside the system 2. If the request is a "how-to" question that asks for a general plan, approach or strategy, it should be categorized as QUICK. {examples} - -{role_info} """ QUICK_THINK_PROMPT = """ diff --git a/metagpt/prompts/di/team_leader.py b/metagpt/prompts/di/team_leader.py index ad473dfff..7c536875c 100644 --- a/metagpt/prompts/di/team_leader.py +++ b/metagpt/prompts/di/team_leader.py @@ -42,6 +42,12 @@ Your team member: However, you MUST respond to the user message by yourself directly, DON'T ask your team members. """ +TL_INFO = """ +{role_info} +Your team member: +{team_info} +""" + FINISH_CURRENT_TASK_CMD = """ ```json [ diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 681a82402..e287471f6 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -258,7 +258,7 @@ class RoleZero(Role): def format_quick_system_prompt(self) -> str: """Format the system prompt for quick thinking.""" - return QUICK_THINK_SYSTEM_PROMPT.format(examples=QUICK_THINK_EXAMPLES, role_info=super()._get_prefix()) + return QUICK_THINK_SYSTEM_PROMPT.format(examples=QUICK_THINK_EXAMPLES, role_info=self._get_prefix()) async def _quick_think(self) -> Tuple[Message, str]: answer = "" diff --git a/metagpt/roles/di/team_leader.py b/metagpt/roles/di/team_leader.py index 0a8fd7acb..305d546d8 100644 --- a/metagpt/roles/di/team_leader.py +++ b/metagpt/roles/di/team_leader.py @@ -8,6 +8,7 @@ from metagpt.actions.di.run_command import RunCommand from metagpt.prompts.di.team_leader import ( FINISH_CURRENT_TASK_CMD, QUICK_THINK_SYSTEM_PROMPT, + TL_INFO, TL_INSTRUCTION, TL_THOUGHT_GUIDANCE, ) @@ -47,14 +48,12 @@ class TeamLeader(RoleZero): # continue team_info += f"{role.name}: {role.profile}, {role.goal}\n" return team_info - - def format_quick_system_prompt(self) -> str: - quick_system_prompt = super().format_quick_system_prompt() - return quick_system_prompt + QUICK_THINK_SYSTEM_PROMPT.format( - role_info="", # rolezero's quick think system prompt will include role_info - team_info=self._get_team_info(), - ) - + + def _get_prefix(self) -> str: + role_info = super()._get_prefix() + team_info = self._get_team_info() + return TL_INFO.format(role_info=role_info, team_info=team_info) + async def _quick_think(self) -> Message: self.llm.system_prompt = QUICK_THINK_SYSTEM_PROMPT.format( role_info=super()._get_prefix(), From 3b7dbf5e4ee1826c2240698c948979dd4c820c38 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Wed, 14 Aug 2024 14:46:38 +0800 Subject: [PATCH 19/30] Remove TL's quick think system prompt --- metagpt/prompts/di/team_leader.py | 6 ------ metagpt/roles/di/team_leader.py | 5 ----- 2 files changed, 11 deletions(-) diff --git a/metagpt/prompts/di/team_leader.py b/metagpt/prompts/di/team_leader.py index 7c536875c..d7eb33442 100644 --- a/metagpt/prompts/di/team_leader.py +++ b/metagpt/prompts/di/team_leader.py @@ -35,12 +35,6 @@ Sixth, describe the requirements as they pertain to software development, data a Seventh, describe the technologies you must use. """ ) -QUICK_THINK_SYSTEM_PROMPT = """ -{role_info} -Your team member: -{team_info} -However, you MUST respond to the user message by yourself directly, DON'T ask your team members. -""" TL_INFO = """ {role_info} diff --git a/metagpt/roles/di/team_leader.py b/metagpt/roles/di/team_leader.py index 305d546d8..82288a7ed 100644 --- a/metagpt/roles/di/team_leader.py +++ b/metagpt/roles/di/team_leader.py @@ -7,7 +7,6 @@ from pydantic import Field from metagpt.actions.di.run_command import RunCommand from metagpt.prompts.di.team_leader import ( FINISH_CURRENT_TASK_CMD, - QUICK_THINK_SYSTEM_PROMPT, TL_INFO, TL_INSTRUCTION, TL_THOUGHT_GUIDANCE, @@ -55,10 +54,6 @@ class TeamLeader(RoleZero): return TL_INFO.format(role_info=role_info, team_info=team_info) async def _quick_think(self) -> Message: - self.llm.system_prompt = QUICK_THINK_SYSTEM_PROMPT.format( - role_info=super()._get_prefix(), - team_info=self._get_team_info(), - ) return await super()._quick_think() async def _think(self) -> bool: From 3490a4e2344cc7082499f2ef94e19a41a78f5728 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Wed, 14 Aug 2024 14:52:14 +0800 Subject: [PATCH 20/30] remove tl's qt --- metagpt/roles/di/team_leader.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/metagpt/roles/di/team_leader.py b/metagpt/roles/di/team_leader.py index 82288a7ed..4a39193a2 100644 --- a/metagpt/roles/di/team_leader.py +++ b/metagpt/roles/di/team_leader.py @@ -53,9 +53,6 @@ class TeamLeader(RoleZero): team_info = self._get_team_info() return TL_INFO.format(role_info=role_info, team_info=team_info) - async def _quick_think(self) -> Message: - return await super()._quick_think() - async def _think(self) -> bool: self.instruction = TL_INSTRUCTION.format(team_info=self._get_team_info()) return await super()._think() From 0469d1ed7d5b07f222aa594b97459ab38e471f65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Wed, 14 Aug 2024 15:51:05 +0800 Subject: [PATCH 21/30] =?UTF-8?q?fixbug:u=E4=BC=98=E5=8C=96=E4=BA=86?= =?UTF-8?q?=E9=87=8D=E5=A4=8D=E6=89=A7=E8=A1=8C=E7=9A=84=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E5=92=8Cengineer=E6=8F=90=E6=97=A9=E7=BB=93=E6=9D=9F=E4=BB=BB?= =?UTF-8?q?=E5=8A=A1=E7=9A=84=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/roles/di/role_zero.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 815768fb1..1fa1930e8 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -155,6 +155,8 @@ class RoleZero(Role): ### 2. Plan Status ### plan_status, current_task = self._get_plan_status() + plan_status_formated = self._format_plan_status(plan_status) + ### 3. Tool/Command Info ### tools = await self.tool_recommender.recommend_tools() tool_info = json.dumps({tool.name: tool.schemas for tool in tools}) @@ -168,7 +170,7 @@ class RoleZero(Role): ### Make Decision Dynamically ### prompt = self.cmd_prompt.format( current_state=self.cmd_prompt_current_state, - plan_status=plan_status, + plan_status=plan_status_formated, current_task=current_task, requirements_constraints=self.requirements_constraints, ) @@ -384,11 +386,10 @@ class RoleZero(Role): """command requiring special check or parsing""" command_output = "" - if cmd["command_name"] == "Plan.finish_current_task" and not self.planner.plan.is_plan_finished(): - # task_result = TaskResult(code=str(commands), result=outputs, is_success=is_success) - # self.planner.plan.current_task.update_task_result(task_result=task_result) - self.planner.plan.finish_current_task() - command_output = "Current task is finished. " + if cmd["command_name"] == "Plan.finish_current_task": + if not self.planner.plan.is_plan_finished(): + self.planner.plan.finish_current_task() + command_output = "Current task is finished. If all tasks are finished, use 'end' to stop." elif cmd["command_name"] == "end": self._set_state(-1) @@ -420,6 +421,21 @@ class RoleZero(Role): ) return plan_status, current_task + def _format_plan_status(self, plan_status): + """format plan status""" + # Example: + # [GOAL] create a 2048 game + # [TASK_ID 1] (finished) Create a Product Requirement Document (PRD) for the 2048 game. This task depends on tasks[]. [Assign to Alice] + # [TASK_ID 2] ( ) Design the system architecture for the 2048 game. This task depends on tasks[1]. [Assign to Bob] + plan_status_formated = f"[GOAL] {plan_status['goal']}\n" + if len(plan_status["tasks"]) > 0: + plan_status_formated += "[Plan]\n" + for task in plan_status["tasks"]: + plan_status_formated += f"[TASK_ID {task['task_id']}] ({'finished' if task['is_finished'] else ' '}){task['instruction']} This task depends on tasks{task['dependent_task_ids']}. [Assign to {task['assignee']}]\n" + else: + plan_status_formated += "No Plan \n" + return plan_status_formated + def _retrieve_experience(self) -> str: """Default implementation of experience retrieval. Can be overwritten in subclasses.""" context = [str(msg) for msg in self.rc.memory.get(self.memory_k)] From 22841804a575c1812f211e07398f0461ca53c95c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Wed, 14 Aug 2024 16:02:35 +0800 Subject: [PATCH 22/30] fix format issues --- metagpt/prompts/di/role_zero.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index b6faa8cb3..22222478d 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -38,7 +38,6 @@ class Task(BaseModel): {available_commands} Special Command: Use {{"command_name": "end"}} to do nothing or indicate completion of all requirements and the end of actions. - # Example {example} @@ -46,7 +45,8 @@ Special Command: Use {{"command_name": "end"}} to do nothing or indicate complet # Instruction {instruction} """ - +EXAMPLE = "" +# Example CMD_PROMPT = """ {current_state} From 91a0637174ef9f170605fbb2582f0d89ede18210 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Wed, 14 Aug 2024 16:07:11 +0800 Subject: [PATCH 23/30] fix format issues --- metagpt/prompts/di/role_zero.py | 5 ++--- metagpt/roles/di/role_zero.py | 14 +++++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 22222478d..3da073dfd 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -38,15 +38,14 @@ class Task(BaseModel): {available_commands} Special Command: Use {{"command_name": "end"}} to do nothing or indicate completion of all requirements and the end of actions. + # Example {example} - # Instruction {instruction} """ -EXAMPLE = "" -# Example + CMD_PROMPT = """ {current_state} diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 1fa1930e8..39f0cbd1c 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -155,7 +155,7 @@ class RoleZero(Role): ### 2. Plan Status ### plan_status, current_task = self._get_plan_status() - plan_status_formated = self._format_plan_status(plan_status) + formatted_plan_status = self._format_plan_status(plan_status) ### 3. Tool/Command Info ### tools = await self.tool_recommender.recommend_tools() @@ -170,7 +170,7 @@ class RoleZero(Role): ### Make Decision Dynamically ### prompt = self.cmd_prompt.format( current_state=self.cmd_prompt_current_state, - plan_status=plan_status_formated, + plan_status=formatted_plan_status, current_task=current_task, requirements_constraints=self.requirements_constraints, ) @@ -427,14 +427,14 @@ class RoleZero(Role): # [GOAL] create a 2048 game # [TASK_ID 1] (finished) Create a Product Requirement Document (PRD) for the 2048 game. This task depends on tasks[]. [Assign to Alice] # [TASK_ID 2] ( ) Design the system architecture for the 2048 game. This task depends on tasks[1]. [Assign to Bob] - plan_status_formated = f"[GOAL] {plan_status['goal']}\n" + formatted_plan_status = f"[GOAL] {plan_status['goal']}\n" if len(plan_status["tasks"]) > 0: - plan_status_formated += "[Plan]\n" + formatted_plan_status += "[Plan]\n" for task in plan_status["tasks"]: - plan_status_formated += f"[TASK_ID {task['task_id']}] ({'finished' if task['is_finished'] else ' '}){task['instruction']} This task depends on tasks{task['dependent_task_ids']}. [Assign to {task['assignee']}]\n" + formatted_plan_status += f"[TASK_ID {task['task_id']}] ({'finished' if task['is_finished'] else ' '}){task['instruction']} This task depends on tasks{task['dependent_task_ids']}. [Assign to {task['assignee']}]\n" else: - plan_status_formated += "No Plan \n" - return plan_status_formated + formatted_plan_status += "No Plan \n" + return formatted_plan_status def _retrieve_experience(self) -> str: """Default implementation of experience retrieval. Can be overwritten in subclasses.""" From 6be9e10175197e7f73f93e27645532eac138ed89 Mon Sep 17 00:00:00 2001 From: seehi <6580@pm.me> Date: Wed, 14 Aug 2024 16:07:24 +0800 Subject: [PATCH 24/30] update comment --- metagpt/schema.py | 2 +- metagpt/tools/libs/browser.py | 2 +- metagpt/utils/common.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/metagpt/schema.py b/metagpt/schema.py index ad8c8f1d7..8ef7dd0bb 100644 --- a/metagpt/schema.py +++ b/metagpt/schema.py @@ -121,7 +121,7 @@ class SerializationMixin(BaseModel, extra="forbid"): if class_type is None: # TODO could try dynamic import - raise TypeError("Trying to instantiate {class_full_name}, which has not yet been defined!") + raise TypeError(f"Trying to instantiate {class_full_name}, which has not yet been defined!") return class_type(**value) diff --git a/metagpt/tools/libs/browser.py b/metagpt/tools/libs/browser.py index 3efddd2e8..bba7fa5a8 100644 --- a/metagpt/tools/libs/browser.py +++ b/metagpt/tools/libs/browser.py @@ -75,7 +75,7 @@ class Browser(BaseModel): page: Optional[Page] = None accessibility_tree: list = Field(default_factory=list) headless: bool = True - proxy: Optional[str] = Field(default_factory=get_proxy_from_env) + proxy: Optional[dict] = Field(default_factory=get_proxy_from_env) is_empty_page: bool = True reporter: BrowserReporter = Field(default_factory=BrowserReporter) diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index 65bfa480d..1ba6f8239 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -566,7 +566,7 @@ def general_after_log(i: "loguru.Logger", sec_format: str = "%0.3f") -> Callable return log_it -def read_json_file(json_file: str, encoding="utf-8") -> list[Any]: +def read_json_file(json_file: str, encoding: str = "utf-8") -> list[Any]: if not Path(json_file).exists(): raise FileNotFoundError(f"json_file: {json_file} not exist, return []") @@ -595,7 +595,7 @@ def handle_unknown_serialization(x: Any) -> str: return f"" -def write_json_file(json_file: str, data: Any, encoding: str = None, indent: int = 4, use_fallback: bool = False): +def write_json_file(json_file: str, data: Any, encoding: str = "utf-8", indent: int = 4, use_fallback: bool = False): folder_path = Path(json_file).parent if not folder_path.exists(): folder_path.mkdir(parents=True, exist_ok=True) From c757d3985211e2865840acaaa1ecd2e253099e63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Wed, 14 Aug 2024 16:09:37 +0800 Subject: [PATCH 25/30] fix format issues --- metagpt/prompts/di/role_zero.py | 1 - 1 file changed, 1 deletion(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 3da073dfd..c26bad164 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -38,7 +38,6 @@ class Task(BaseModel): {available_commands} Special Command: Use {{"command_name": "end"}} to do nothing or indicate completion of all requirements and the end of actions. - # Example {example} From e5587500135c4770f09bea9f305c98e6e3fcf69b Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Wed, 14 Aug 2024 19:00:35 +0800 Subject: [PATCH 26/30] add quick response system prompt --- metagpt/prompts/di/role_zero.py | 5 +++++ metagpt/roles/di/role_zero.py | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index b7b32b7f3..9dcc97b96 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -204,3 +204,8 @@ Thought: The request is vague and lacks specifics, requiring clarification on th Response Category: AMBIGUOUS. """ + +QUICK_RESPONSE_SYSTEM_PROMPT = """ +{role_info} +However, you MUST respond to the user message by yourself directly, DON'T ask your team members. +""" \ No newline at end of file diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index e287471f6..fff14a883 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -23,6 +23,7 @@ from metagpt.prompts.di.role_zero import ( QUICK_THINK_PROMPT, QUICK_THINK_EXAMPLES, QUICK_THINK_SYSTEM_PROMPT, + QUICK_RESPONSE_SYSTEM_PROMPT, REGENERATE_PROMPT, ROLE_INSTRUCTION, SYSTEM_PROMPT, @@ -275,7 +276,7 @@ class RoleZero(Role): if "QUICK" in intent_result or "AMBIGUOUS " in intent_result: # llm call with the original context async with ThoughtReporter(enable_llm_stream=True) as reporter: await reporter.async_report({"type": "quick"}) - answer = await self.llm.aask(self.llm.format_msg(memory)) + answer = await self.llm.aask(self.llm.format_msg(memory), system_msgs=QUICK_RESPONSE_SYSTEM_PROMPT.format(role_info=self._get_prefix())) elif "SEARCH" in intent_result: query = "\n".join(str(msg) for msg in memory) answer = await SearchEnhancedQA().run(query) From 7a96978e149dec6e0dcf2636299fd77665c5934e Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Wed, 14 Aug 2024 19:13:45 +0800 Subject: [PATCH 27/30] remove space --- metagpt/roles/di/role_zero.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index fff14a883..3e79b6a2c 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -273,7 +273,7 @@ class RoleZero(Role): context = self.llm.format_msg(memory + [UserMessage(content=QUICK_THINK_PROMPT)]) intent_result = await self.llm.aask(context, system_msgs=self.format_quick_system_prompt()) - if "QUICK" in intent_result or "AMBIGUOUS " in intent_result: # llm call with the original context + if "QUICK" in intent_result or "AMBIGUOUS" in intent_result: # llm call with the original context async with ThoughtReporter(enable_llm_stream=True) as reporter: await reporter.async_report({"type": "quick"}) answer = await self.llm.aask(self.llm.format_msg(memory), system_msgs=QUICK_RESPONSE_SYSTEM_PROMPT.format(role_info=self._get_prefix())) From a26199afd9ac4aabcb5196f75d5bdee95edfc376 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Wed, 14 Aug 2024 19:29:04 +0800 Subject: [PATCH 28/30] make system message a list --- metagpt/roles/di/role_zero.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 3e79b6a2c..1fa128ee6 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -271,12 +271,12 @@ class RoleZero(Role): # routing memory = self.get_memories(k=4) # FIXME: A magic number for two rounds of Q&A context = self.llm.format_msg(memory + [UserMessage(content=QUICK_THINK_PROMPT)]) - intent_result = await self.llm.aask(context, system_msgs=self.format_quick_system_prompt()) + intent_result = await self.llm.aask(context, system_msgs=[self.format_quick_system_prompt()]) if "QUICK" in intent_result or "AMBIGUOUS" in intent_result: # llm call with the original context async with ThoughtReporter(enable_llm_stream=True) as reporter: await reporter.async_report({"type": "quick"}) - answer = await self.llm.aask(self.llm.format_msg(memory), system_msgs=QUICK_RESPONSE_SYSTEM_PROMPT.format(role_info=self._get_prefix())) + answer = await self.llm.aask(self.llm.format_msg(memory), system_msgs=[QUICK_RESPONSE_SYSTEM_PROMPT.format(role_info=self._get_prefix())]) elif "SEARCH" in intent_result: query = "\n".join(str(msg) for msg in memory) answer = await SearchEnhancedQA().run(query) From e9984f2bf82d9cd86d0056d142b44471da9078e6 Mon Sep 17 00:00:00 2001 From: garylin2099 Date: Wed, 14 Aug 2024 20:12:17 +0800 Subject: [PATCH 29/30] attach images to message --- metagpt/environment/mgx/mgx_env.py | 17 ++++++++-- metagpt/provider/base_llm.py | 12 ++++++-- metagpt/roles/di/role_zero.py | 25 ++++++--------- metagpt/utils/common.py | 14 +++++---- tests/metagpt/provider/test_base_llm.py | 41 ++++++++++++++++++++++++- tests/metagpt/utils/test_common.py | 7 ++--- 6 files changed, 83 insertions(+), 33 deletions(-) diff --git a/metagpt/environment/mgx/mgx_env.py b/metagpt/environment/mgx/mgx_env.py index fae386952..8bb3fc823 100644 --- a/metagpt/environment/mgx/mgx_env.py +++ b/metagpt/environment/mgx/mgx_env.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from metagpt.actions import ( UserRequirement, WriteDesign, @@ -6,12 +8,12 @@ from metagpt.actions import ( WriteTest, ) from metagpt.actions.summarize_code import SummarizeCode -from metagpt.const import AGENT +from metagpt.const import AGENT, IMAGES from metagpt.environment.base_env import Environment from metagpt.logs import get_human_input from metagpt.roles import Architect, ProductManager, ProjectManager, Role from metagpt.schema import Message, SerializationMixin -from metagpt.utils.common import any_to_str, any_to_str_set +from metagpt.utils.common import any_to_str, any_to_str_set, extract_and_encode_images class MGXEnv(Environment, SerializationMixin): @@ -27,6 +29,8 @@ class MGXEnv(Environment, SerializationMixin): def publish_message(self, message: Message, user_defined_recipient: str = "", publicer: str = "") -> bool: """let the team leader take over message publishing""" + message = self.attach_images(message) # for multi-modal message + tl = self.get_role("Mike") # TeamLeader's name is Mike if user_defined_recipient: @@ -119,9 +123,16 @@ class MGXEnv(Environment, SerializationMixin): converted_msg.role = "assistant" sent_from = converted_msg.metadata[AGENT] if AGENT in converted_msg.metadata else converted_msg.sent_from converted_msg.content = ( - f"[Message] from {sent_from if sent_from else 'User'} to {converted_msg.send_to}: {converted_msg.content}" + f"[Message] from {sent_from or 'User'} to {converted_msg.send_to}: {converted_msg.content}" ) return converted_msg + def attach_images(self, message: Message) -> Message: + if message.role == "user": + images = extract_and_encode_images(message.content) + if images: + message.add_metadata(IMAGES, images) + return message + def __repr__(self): return "MGXEnv()" diff --git a/metagpt/provider/base_llm.py b/metagpt/provider/base_llm.py index ac09c19f7..813e77d95 100644 --- a/metagpt/provider/base_llm.py +++ b/metagpt/provider/base_llm.py @@ -24,8 +24,9 @@ from tenacity import ( from metagpt.configs.compress_msg_config import CompressType from metagpt.configs.llm_config import LLMConfig -from metagpt.const import LLM_API_TIMEOUT, USE_CONFIG_TIMEOUT +from metagpt.const import IMAGES, LLM_API_TIMEOUT, USE_CONFIG_TIMEOUT from metagpt.logs import logger +from metagpt.provider.constant import MULTI_MODAL_MODELS from metagpt.schema import Message from metagpt.utils.common import log_and_reraise from metagpt.utils.cost_manager import CostManager, Costs @@ -50,7 +51,7 @@ class BaseLLM(ABC): pass def _user_msg(self, msg: str, images: Optional[Union[str, list[str]]] = None) -> dict[str, Union[str, dict]]: - if images: + if images and self.support_image_input(): # as gpt-4v, chat with image return self._user_msg_with_imgs(msg, images) else: @@ -76,6 +77,9 @@ class BaseLLM(ABC): def _system_msg(self, msg: str) -> dict[str, str]: return {"role": "system", "content": msg} + def support_image_input(self) -> bool: + return any([m in self.config.model for m in MULTI_MODAL_MODELS]) + def format_msg(self, messages: Union[str, Message, list[dict], list[Message], list[str]]) -> list[dict]: """convert messages to list[dict].""" from metagpt.schema import Message @@ -91,7 +95,9 @@ class BaseLLM(ABC): assert set(msg.keys()) == set(["role", "content"]) processed_messages.append(msg) elif isinstance(msg, Message): - processed_messages.append(msg.to_dict()) + images = msg.metadata.get(IMAGES) + processed_msg = self._user_msg(msg=msg.content, images=images) if images else msg.to_dict() + processed_messages.append(processed_msg) else: raise ValueError( f"Only support message type are: str, Message, dict, but got {type(messages).__name__}!" diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index f1339ef32..cc9d1d1aa 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -2,7 +2,6 @@ from __future__ import annotations import inspect import json -import os import re import traceback from typing import Annotated, Callable, Dict, List, Literal, Optional, Tuple @@ -13,6 +12,7 @@ from metagpt.actions import Action, UserRequirement from metagpt.actions.analyze_requirements import AnalyzeRequirementsRestrictions from metagpt.actions.di.run_command import RunCommand from metagpt.actions.search_enhanced_qa import SearchEnhancedQA +from metagpt.const import IMAGES from metagpt.exp_pool import exp_cache from metagpt.exp_pool.context_builders import RoleZeroContextBuilder from metagpt.exp_pool.serializers import RoleZeroSerializer @@ -35,13 +35,7 @@ from metagpt.tools.libs.browser import Browser from metagpt.tools.libs.editor import Editor from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender from metagpt.tools.tool_registry import register_tool -from metagpt.utils.common import ( - CodeParser, - any_to_str, - encode_image, - extract_image_paths, - is_support_image_input, -) +from metagpt.utils.common import CodeParser, any_to_str, extract_and_encode_images from metagpt.utils.repair_llm_raw_output import ( RepairType, repair_escape_error, @@ -219,15 +213,14 @@ class RoleZero(Role): return memory def parse_images(self, memory: list[Message]) -> list[Message]: - if not is_support_image_input(self.llm.model): + if not self.llm.support_image_input(): return memory - for i, msg in enumerate(memory): - if msg.role == "user" and isinstance(msg.content, str) and extract_image_paths(msg.content): - images = [] - for path in extract_image_paths(msg.content): - if os.path.exists(path): - images.append(encode_image(path)) - memory[i] = self.llm._user_msg_with_imgs(msg.content, images=images) + for msg in memory: + if IMAGES in msg.metadata or msg.role != "user": + continue + images = extract_and_encode_images(msg.content) + if images: + msg.add_metadata(IMAGES, images) return memory async def _act(self) -> Message: diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index 8f55df8ba..0d8c03a02 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -840,12 +840,6 @@ def decode_image(img_url_or_b64: str) -> Image: return img -def is_support_image_input(model_name: str) -> bool: - # model name can be gpt-4o-2024-08-06 - support_models = ["gpt-4o", "gpt-4o-mini"] # FIXME: hard code for now - return any([m in model_name for m in support_models]) - - def extract_image_paths(content: str) -> bool: # We require that the path must have a space preceding it, like "xxx /an/absolute/path.jpg xxx" pattern = r"[^\s]+\.(?:png|jpe?g|gif|bmp|tiff)" @@ -853,6 +847,14 @@ def extract_image_paths(content: str) -> bool: return image_paths +def extract_and_encode_images(content: str) -> list[str]: + images = [] + for path in extract_image_paths(content): + if os.path.exists(path): + images.append(encode_image(path)) + return images + + def log_and_reraise(retry_state: RetryCallState): logger.error(f"Retry attempts exhausted. Last exception: {retry_state.outcome.exception()}") logger.warning( diff --git a/tests/metagpt/provider/test_base_llm.py b/tests/metagpt/provider/test_base_llm.py index d34ed62f1..62083a769 100644 --- a/tests/metagpt/provider/test_base_llm.py +++ b/tests/metagpt/provider/test_base_llm.py @@ -10,8 +10,9 @@ import pytest from metagpt.configs.compress_msg_config import CompressType from metagpt.configs.llm_config import LLMConfig +from metagpt.const import IMAGES from metagpt.provider.base_llm import BaseLLM -from metagpt.schema import Message +from metagpt.schema import AIMessage, Message, UserMessage from tests.metagpt.provider.mock_llm_config import mock_llm_config from tests.metagpt.provider.req_resp_const import ( default_resp_cont, @@ -163,3 +164,41 @@ def test_compress_messages_long_no_sys_msg(compress_type): print(compressed) assert compressed assert len(compressed[0]["content"]) < len(messages[0]["content"]) + + +def test_format_msg(mocker): + base_llm = MockBaseLLM() + messages = [UserMessage(content="req"), AIMessage(content="rsp")] + formatted_msgs = base_llm.format_msg(messages) + assert formatted_msgs == [{"role": "user", "content": "req"}, {"role": "assistant", "content": "rsp"}] + + +def test_format_msg_w_images(mocker): + base_llm = MockBaseLLM() + base_llm.config.model = "gpt-4o" + msg_w_images = UserMessage(content="req1") + msg_w_images.add_metadata(IMAGES, ["base64 string 1", "base64 string 2"]) + msg_w_empty_images = UserMessage(content="req2") + msg_w_empty_images.add_metadata(IMAGES, []) + messages = [ + msg_w_images, # should be converted + AIMessage(content="rsp"), + msg_w_empty_images, # should not be converted + ] + formatted_msgs = base_llm.format_msg(messages) + assert formatted_msgs == [ + { + "role": "user", + "content": [ + {"type": "text", "text": "req1"}, + {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,base64 string 1"}}, + {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,base64 string 2"}}, + ], + }, + {"role": "assistant", "content": "rsp"}, + {"role": "user", "content": "req2"}, + ] + + +if name == "__main__": + pytest.main([__file__, "-s"]) diff --git a/tests/metagpt/utils/test_common.py b/tests/metagpt/utils/test_common.py index 06838b7c7..b85fe229b 100644 --- a/tests/metagpt/utils/test_common.py +++ b/tests/metagpt/utils/test_common.py @@ -29,9 +29,9 @@ from metagpt.utils.common import ( awrite, check_cmd_exists, concat_namespace, + extract_and_encode_images, extract_image_paths, import_class_inst, - is_support_image_input, parse_recipient, print_members, read_file_block, @@ -231,9 +231,8 @@ def test_extract_image_paths(): assert not extract_image_paths(content) -def test_is_support_image_input(): - assert is_support_image_input("gpt-4o-2024-08-06") - assert not is_support_image_input("deepseek-coder") +def test_extract_and_encode_images(): + assert not extract_and_encode_images("a non-existing.jpg") if __name__ == "__main__": From 31082399c99553f225237db720b0858ce67cd21c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Wed, 14 Aug 2024 20:39:53 +0800 Subject: [PATCH 30/30] combine _format_plan_status and _get_plan_status --- metagpt/roles/di/role_zero.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 39f0cbd1c..e12bd97c9 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -155,8 +155,6 @@ class RoleZero(Role): ### 2. Plan Status ### plan_status, current_task = self._get_plan_status() - formatted_plan_status = self._format_plan_status(plan_status) - ### 3. Tool/Command Info ### tools = await self.tool_recommender.recommend_tools() tool_info = json.dumps({tool.name: tool.schemas for tool in tools}) @@ -170,7 +168,7 @@ class RoleZero(Role): ### Make Decision Dynamically ### prompt = self.cmd_prompt.format( current_state=self.cmd_prompt_current_state, - plan_status=formatted_plan_status, + plan_status=plan_status, current_task=current_task, requirements_constraints=self.requirements_constraints, ) @@ -419,10 +417,7 @@ class RoleZero(Role): if self.planner.plan.current_task else "" ) - return plan_status, current_task - - def _format_plan_status(self, plan_status): - """format plan status""" + # format plan status # Example: # [GOAL] create a 2048 game # [TASK_ID 1] (finished) Create a Product Requirement Document (PRD) for the 2048 game. This task depends on tasks[]. [Assign to Alice] @@ -434,7 +429,7 @@ class RoleZero(Role): formatted_plan_status += f"[TASK_ID {task['task_id']}] ({'finished' if task['is_finished'] else ' '}){task['instruction']} This task depends on tasks{task['dependent_task_ids']}. [Assign to {task['assignee']}]\n" else: formatted_plan_status += "No Plan \n" - return formatted_plan_status + return formatted_plan_status, current_task def _retrieve_experience(self) -> str: """Default implementation of experience retrieval. Can be overwritten in subclasses."""