Merge branch 'opt_speed_quality' into 'mgx_ops'

Improve speed by estimating req complexity See merge request pub/MetaGPT!363
2026-07-17 16:41:05 +02:00 · 2024-09-04 06:43:35 +00:00 · 2024-09-04 06:43:35 +00:00 · efd253ab11
commit efd253ab11
parent 59fc7b32c7 9c4788ac3f
7 changed files with 23 additions and 15 deletions
--- a/metagpt/actions/search_enhanced_qa.py
+++ b/metagpt/actions/search_enhanced_qa.py
@ -9,6 +9,7 @@ from pydantic import Field, PrivateAttr, model_validator
 from metagpt.actions import Action
 from metagpt.actions.research import CollectLinks, WebBrowseAndSummarize
 from metagpt.logs import logger
+from metagpt.tools.tool_registry import register_tool
 from metagpt.tools.web_browser_engine import WebBrowserEngine
 from metagpt.utils.common import CodeParser
 from metagpt.utils.parse_html import WebPage
@ -57,8 +58,9 @@ Remember, don't blindly repeat the contexts verbatim. And here is the user quest
 """


+@register_tool(include_functions=["run"])
 class SearchEnhancedQA(Action):
-    """Enhancing question-answering capabilities through search engine augmentation."""
+    """Question answering and info searching through search engine."""

    name: str = "SearchEnhancedQA"
    desc: str = "Integrating search engine results to anwser the question."
--- a/metagpt/prompts/di/data_analyst.py
+++ b/metagpt/prompts/di/data_analyst.py
@ -1,12 +1,12 @@
 from metagpt.strategy.task_type import TaskType

 EXTRA_INSTRUCTION = """
-6. Carefully choose to use or not use the browser tool to assist you in web tasks. 
-    - When no click action is required, no need to use the Browser tool to navigate to the webpage before scraping.
-    - Write code to view the HTML content rather than using the Browser tool.
-    - Make sure the command_name are certainly in Available Commands when you use the Browser tool.
-    - For information searching requirement, you should use the Browser tool instead of web scraping.
-    - When no link is provided, you should use the Browser tool to search for the information.
+6. Carefully consider how you handle web tasks:
+ - Use SearchEnhancedQA for general information searching, i.e. querying search engines, such as googling news, weather, wiki, etc. Usually, no link is provided.
+ - Use Browser for reading, navigating, or in-domain searching within a specific web, such as reading a blog, searching products from a given e-commerce web link, or interacting with a web app.
+ - Use DataAnalyst.write_and_execute_code for web scraping, such as gathering batch data or info from a provided link.
+ - Write code to view the HTML content rather than using the Browser tool.
+ - Make sure the command_name are certainly in Available Commands when you use the Browser tool.
 7. When you are making plan. It is highly recommend to plan and append all the tasks in first response once time, except for 7.1.
 7.1. When the requirement is inquiring about a pdf, docx, md, or txt document, read the document first through either Editor.read WITHOUT a plan. After reading the document, use RoleZero.reply_to_human if the requirement can be answered straightaway, otherwise, make a plan if further calculation is needed.
 8. Don't finish_current_task multiple times for the same task.
--- a/metagpt/prompts/di/role_zero.py
+++ b/metagpt/prompts/di/role_zero.py
@ -79,7 +79,7 @@ Output should adhere to the following format.
 ```json
 [
    {{
-        "command_name": str,
+        "command_name": "ClassName.method_name" or "function_name",
        "args": {{"arg_name": arg_value, ...}}
    }},
    ...
--- a/metagpt/prompts/di/team_leader.py
+++ b/metagpt/prompts/di/team_leader.py
@ -14,8 +14,15 @@ Pay close attention to new user message, review the conversation history, use Ro
 Pay close attention to messages from team members. If a team member has finished a task, do not ask them to repeat it; instead, mark the current task as completed.
 Note:
 1. If the requirement is a pure DATA-RELATED requirement, such as web browsing, web scraping, web searching, web imitation, data science, data analysis, machine learning, deep learning, text-to-image etc. DON'T decompose it, assign a single task with the original user requirement as instruction directly to Data Analyst.
-2. If the requirement is developing a software, game, app, or website, excluding the above data-related tasks, you should decompose the requirement into multiple tasks and assign them to different team members based on their expertise. The software default development process has four steps: creating a Product Requirement Document (PRD) by the Product Manager -> writing a System Design by the Architect -> creating tasks by the Project Manager -> and coding by the Engineer. You may choose to execute any of these steps. When publishing message to Product Manager, you should directly copy the full original user requirement.
+2. If the requirement is developing a software, game, app, or website, excluding the above data-related tasks, you should decompose the requirement into multiple tasks and assign them to different team members based on their expertise. The standard software development process has four steps: creating a Product Requirement Document (PRD) by the Product Manager -> writing a System Design by the Architect -> creating tasks by the Project Manager -> and coding by the Engineer. You may choose to execute any of these steps. When publishing message to Product Manager, you should directly copy the full original user requirement.
 2.1. If the requirement contains both DATA-RELATED part mentioned in 1 and software development part mentioned in 2, you should decompose the software development part and assign them to different team members based on their expertise, and assign the DATA-RELATED part to Data Analyst David directly.
+2.2. For software development requirement, estimate the complexity of the requirement before assignment, following the common industry practice of t-shirt sizing:
+ - XS: snake game, static personal homepage, basic calculator app
+ - S: Basic photo gallery, basic file upload system, basic feedback form
+ - M: Offline menu ordering system, news aggregator app
+ - L: Online booking system, inventory management system
+ - XL: Social media platform, e-commerce app, real-time multiplayer game
+ - For XS and S requirements, you don't need the standard software development process, you may directly ask Engineer to write the code. Otherwise, estimate if any part of the standard software development process may contribute to a better final code. If so, assign team members accordingly.
 3.1 If the task involves code review (CR) or code checking, you should assign it to Engineer.
 3.2. If the requirement is to fix a bug or issue, you should assign it to Issue Solver. However, if the code is written by Engineer, Engineer must maintain the code.
 4. If the requirement is a common-sense, logical, or math problem, you should respond directly without assigning any task to team members.
--- a/metagpt/roles/di/data_analyst.py
+++ b/metagpt/roles/di/data_analyst.py
@ -30,8 +30,8 @@ class DataAnalyst(RoleZero):
    instruction: str = ROLE_INSTRUCTION + EXTRA_INSTRUCTION
    task_type_desc: str = TASK_TYPE_DESC

-    tools: list[str] = ["Plan", "DataAnalyst", "RoleZero", "Browser", "Editor:write,read"]
-    custom_tools: list[str] = ["web scraping", "Terminal"]
+    tools: list[str] = ["Plan", "DataAnalyst", "RoleZero", "Browser", "Editor:write,read", "SearchEnhancedQA"]
+    custom_tools: list[str] = ["web scraping", "Terminal", "Editor:write,read"]
    custom_tool_recommender: ToolRecommender = None
    experience_retriever: Annotated[ExpRetriever, Field(exclude=True)] = KeywordExpRetriever()

--- a/metagpt/roles/di/engineer2.py
+++ b/metagpt/roles/di/engineer2.py
@ -28,7 +28,7 @@ class Engineer2(RoleZero):

    terminal: Terminal = Field(default_factory=Terminal, exclude=True)

-    tools: list[str] = ["Plan", "Editor:read", "RoleZero", "Terminal:run_command", "Engineer2"]
+    tools: list[str] = ["Plan", "Editor:read", "RoleZero", "Terminal:run_command", "SearchEnhancedQA", "Engineer2"]

    def _update_tool_execution(self):
        # validate = ValidateAndRewriteCode()
--- a/metagpt/roles/di/role_zero.py
+++ b/metagpt/roles/di/role_zero.py
@ -31,7 +31,6 @@ from metagpt.prompts.di.role_zero import (
    ROLE_INSTRUCTION,
    SUMMARY_PROMPT,
    SYSTEM_PROMPT,
-    THOUGHT_GUIDANCE,
 )
 from metagpt.roles import Role
 from metagpt.schema import AIMessage, Message, UserMessage
@ -62,7 +61,6 @@ class RoleZero(Role):
    system_prompt: str = SYSTEM_PROMPT  # Use None to conform to the default value at llm.aask
    cmd_prompt: str = CMD_PROMPT
    cmd_prompt_current_state: str = ""
-    thought_guidance: str = THOUGHT_GUIDANCE
    instruction: str = ROLE_INSTRUCTION
    task_type_desc: Optional[str] = None

@ -85,7 +83,7 @@ class RoleZero(Role):
    # Others
    command_rsp: str = ""  # the raw string containing the commands
    commands: list[dict] = []  # commands to be executed
-    memory_k: int = 20  # number of memories (messages) to use as historical context
+    memory_k: int = 100  # number of memories (messages) to use as historical context
    use_fixed_sop: bool = False
    requirements_constraints: str = ""  # the constraints in user requirements
    use_summary: bool = True  # whether to summarize at the end
@ -115,6 +113,7 @@ class RoleZero(Role):
            "Plan.replace_task": self.planner.plan.replace_task,
            "RoleZero.ask_human": self.ask_human,
            "RoleZero.reply_to_human": self.reply_to_human,
+            "SearchEnhancedQA.run": SearchEnhancedQA().run,
        }
        self.tool_execution_map.update(
            {