remove task_type and parse_browser_action to DataAnalyst

2026-05-02 12:22:39 +02:00 · 2024-07-10 11:29:27 +08:00 · 2024-07-10 11:29:27 +08:00 · b03ce58fde
commit b03ce58fde
parent be607ba3e3
5 changed files with 46 additions and 67 deletions
--- a/metagpt/actions/di/write_analysis_code.py
+++ b/metagpt/actions/di/write_analysis_code.py
@ -42,7 +42,7 @@ class WriteAnalysisCode(Action):
        tool_info: str = "",
        working_memory: list[Message] = None,
        use_reflection: bool = False,
-        browser_memory: list[dict] = None,
+        browser_actions: list[dict] = None,
        **kwargs,
    ) -> str:
        structual_prompt = STRUCTUAL_PROMPT.format(
@ -51,8 +51,8 @@ class WriteAnalysisCode(Action):
            tool_info=tool_info,
        )
        message = [Message(content=structual_prompt, role="user")]
-        if browser_memory:
-            browser_prompt = BROWSER_INFO.format(browser_memory=browser_memory)
+        if browser_actions:
+            browser_prompt = BROWSER_INFO.format(browser_actions=browser_actions)
            message = [Message(content=browser_prompt, role="user")] + message

        working_memory = working_memory or []
--- a/metagpt/prompts/di/data_analyst.py
+++ b/metagpt/prompts/di/data_analyst.py
@ -1,47 +1,4 @@
-CMD_PROMPT = """
-# Data Structure
-class Task(BaseModel):
-    task_id: str = ""
-    dependent_task_ids: list[str] = []
-    instruction: str = ""
-    task_type: str = ""
-    assignee: str = "David"
-
-# Available Commands
-{available_commands}
-
-# Current Plan
-{plan_status}
-
-# Example
-{example}
-
-# Instructions
-Based on the context, write a plan or modify an existing plan to achieve the goal. A plan consists of one to 3 tasks.
-If plan is created, you should track the progress and update the plan accordingly, such as finish_current_task, append_task, reset_task, replace_task, etc.
-Pay close attention to new user message, review the conversation history, use reply_to_human to respond to new user requirement.
-Note:
-1. If you keeping encountering errors, unexpected situation, or you are not sure of proceeding, use ask_human to ask for help.
-2. Carefully review your progress at the current task, if your actions so far has not fulfilled the task instruction, you should continue with current task. Otherwise, finish current task.
-3. Each time you finish a task, use reply_to_human to report your progress.
-Pay close attention to the Example provided, you can reuse the example for your current situation if it fits.
-
-You may use any of the available commands to create a plan or update the plan. You may output mutiple commands, they will be executed sequentially.
-If you finish current task, you will automatically take the next task in the existing plan, use finish_task, DON'T append a new task.
-
-# Your commands in a json array, in the following output format, always output a json array, if there is nothing to do, use the pass command:
-Some text indicating your thoughts, such as how you should update the plan status, respond to inquiry, or seek for help. Then a json array of commands.
-```json
-[
-    {{
-        "command_name": str,
-        "args": {{"arg_name": arg_value, ...}}
-    }},
-    ...
-]
-```
-Notice: your output JSON data section must start with **```json [**
-"""
+from metagpt.strategy.task_type import TaskType

 BROWSER_INSTRUCTION = """
 4. Carefully choose to use or not use the browser tool to assist you in web tasks. 
@ -49,3 +6,14 @@ BROWSER_INSTRUCTION = """
    - If you need detail HTML content, write code to get it but not to use the browser tool.
    - Make sure the command_name are certainly in Available Commands when you use the browser tool.
 """
+
+TASK_TYPE_DESC = "\n".join([f"- **{tt.type_name}**: {tt.value.desc}" for tt in TaskType])
+
+
+CODE_STATUS = """
+**Code written**:
+{code}
+
+**Execution status**: {status}
+**Execution result**: {result}
+"""
--- a/metagpt/prompts/di/write_analysis_code.py
+++ b/metagpt/prompts/di/write_analysis_code.py
@ -122,6 +122,6 @@ Latest data info after previous tasks:

 BROWSER_INFO = """
 Here are ordered web actions in the browser environment, note that you can not use the browser tool in the current environment.
-{browser_memory}
+{browser_actions}
 The latest url is the one you should use to view the page. If view page has been done, directly use the variable and html content in executing result.
 """
--- a/metagpt/roles/di/data_analyst.py
+++ b/metagpt/roles/di/data_analyst.py
@ -1,15 +1,18 @@
 from __future__ import annotations

+import re
+from typing import List
+
 from pydantic import Field, model_validator

 from metagpt.actions.di.execute_nb_code import ExecuteNbCode
 from metagpt.actions.di.write_analysis_code import WriteAnalysisCode
 from metagpt.logs import logger
-from metagpt.prompts.di.data_analyst import BROWSER_INSTRUCTION
+from metagpt.prompts.di.data_analyst import BROWSER_INSTRUCTION, TASK_TYPE_DESC, CODE_STATUS
 from metagpt.prompts.di.role_zero import ROLE_INSTRUCTION
 from metagpt.roles.di.role_zero import RoleZero
 from metagpt.schema import TaskResult, Message
-from metagpt.strategy.experience_retriever import ExpRetriever, WebExpRetriever
+from metagpt.strategy.experience_retriever import ExpRetriever, KeywordExpRetriever
 from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender
 from metagpt.tools.tool_registry import register_tool

@ -20,11 +23,12 @@ class DataAnalyst(RoleZero):
    profile: str = "DataAnalyst"
    goal: str = "Take on any data-related tasks, such as data analysis, machine learning, deep learning, web browsing, web scraping, web searching, web deployment, terminal operation, git and github operation, etc."
    instruction: str = ROLE_INSTRUCTION + BROWSER_INSTRUCTION
+    task_type_desc: str = TASK_TYPE_DESC

    tools: list[str] = ["Plan", "DataAnalyst", "RoleZero", "Browser"]
    custom_tools: list[str] = ["machine learning", "web scraping", "Terminal"]
    custom_tool_recommender: ToolRecommender = None
-    experience_retriever: ExpRetriever = WebExpRetriever()
+    experience_retriever: ExpRetriever = KeywordExpRetriever()

    use_reflection: bool = True
    write_code: WriteAnalysisCode = Field(default_factory=WriteAnalysisCode, exclude=True)
@ -40,6 +44,17 @@ class DataAnalyst(RoleZero):
            "DataAnalyst.write_and_exec_code": self.write_and_exec_code,
        })

+    def parse_browser_actions(self, memory: List[Message]):
+        for index, msg in enumerate(memory):
+            if msg.cause_by == "browser":
+                browser_url = re.search('URL: (.*?)\\n', msg.content).group(1)
+                pattern = re.compile(r"Command Browser\.(\w+) executed")
+                browser_action = {
+                    'command': pattern.match(memory[index - 1].content).group(1),
+                    'current url': browser_url
+                }
+                self.browser_actions.append(browser_action)
+
    async def write_and_exec_code(self):
        """Write a code block for current task and execute it in an interactive notebook environment."""
        counter = 0
@ -68,7 +83,7 @@ class DataAnalyst(RoleZero):
                tool_info=tool_info,
                working_memory=self.rc.working_memory.get() if use_reflection else None,
                use_reflection=use_reflection,
-                browser_memory=self.browser_memory
+                browser_actions=self.browser_actions
            )
            self.rc.working_memory.add(Message(content=code, role="assistant", cause_by=WriteAnalysisCode))

@ -83,11 +98,8 @@ class DataAnalyst(RoleZero):
            if success:
                task_result = TaskResult(code=code, result=result, is_success=success)
                self.planner.current_task.update_task_result(task_result)
-        output = f"""
-        **Code written**:
-        {code}
-        **Execution status**:{'Success' if success else 'Failed'}
-        **Execution result**: {result}
-        """
+
+        status = 'Success' if success else 'Failed'
+        output = CODE_STATUS.format(code=code, status=status, result=result)
        self.rc.working_memory.clear()
        return output
--- a/metagpt/roles/di/role_zero.py
+++ b/metagpt/roles/di/role_zero.py
@ -6,7 +6,6 @@ import re
 import traceback
 from typing import Callable, Dict, List, Literal, Tuple

-from metagpt.strategy.task_type import TaskType
 from pydantic import model_validator

 from metagpt.actions import Action
@ -41,6 +40,7 @@ class RoleZero(Role):
    system_msg: list[str] = None  # Use None to conform to the default value at llm.aask
    cmd_prompt: str = CMD_PROMPT
    instruction: str = ROLE_INSTRUCTION
+    task_type_desc: str = None

    # React Mode
    react_mode: Literal["react"] = "react"
@ -54,7 +54,7 @@ class RoleZero(Role):
    # Equipped with three basic tools by default for optional use
    editor: Editor = Editor()
    browser: Browser = Browser()
-    browser_memory: list[dict] = []  # store the memory of browser
+    browser_actions: list[dict] = []  # store the browser history actions
    # terminal: Terminal = Terminal()  # FIXME: TypeError: cannot pickle '_thread.lock' object

    # Experience
@ -137,7 +137,6 @@ class RoleZero(Role):

        ### 2. Plan Status ###
        plan_status, current_task = self._get_plan_status()
-        task_type_desc = "\n".join([f"- **{tt.type_name}**: {tt.value.desc}" for tt in TaskType])

        ### 3. Tool/Command Info ###
        tools = await self.tool_recommender.recommend_tools()
@ -150,19 +149,16 @@ class RoleZero(Role):
            example=example,
            available_commands=tool_info,
            instruction=self.instruction.strip(),
-            task_type_desc=task_type_desc,
+            task_type_desc=self.task_type_desc,
        )
        memory = self.rc.memory.get(self.memory_k)
        if not self.browser.is_empty_page:
            pattern = re.compile(r"Command Browser\.(\w+) executed")
            for index, msg in zip(range(len(memory), 0, -1), memory[::-1]):
                if pattern.match(msg.content):
-                    content = await self.browser.view()
-                    memory.insert(index, UserMessage(cause_by="browser", content=content))
-                    browser_url = re.search('URL: (.*?)\\n', content).group(1)
-                    browser_action = {'command': pattern.match(msg.content).group(1), 'current url': browser_url}
-                    self.browser_memory.append(browser_action)
+                    memory.insert(index, UserMessage(cause_by="browser", content=await self.browser.view()))
                    break
+        self.parse_browser_actions(memory=memory)
        context = self.llm.format_msg(memory + [UserMessage(content=prompt)])
        # print(*context, sep="\n" + "*" * 5 + "\n")
        async with ThoughtReporter(enable_llm_stream=True):
@ -171,6 +167,9 @@ class RoleZero(Role):

        return True

+    def parse_browser_actions(self, memory: List[Message]):
+        pass
+
    async def _act(self) -> Message:
        if self.use_fixed_sop:
            return await super()._act()