Merge branch 'dev_tool_selection' of https://gitlab.deepwisdomai.com/agents/data_agents_opt into dev_tool_selection

2026-05-05 22:02:38 +02:00 · 2023-12-14 15:43:24 +08:00 · 2023-12-14 15:43:24 +08:00 · 9d39a058aa
commit 9d39a058aa
parent 70fdb1905f 2da141abbe
36 changed files with 3953 additions and 916 deletions
--- a/metagpt/actions/debug_code.py
+++ b/metagpt/actions/debug_code.py
@ -1,57 +1,56 @@
 from typing import Dict, List, Union, Tuple, Optional, Any

-from metagpt.actions import Action
 from metagpt.logs import logger
 from metagpt.schema import Message, Plan
 from metagpt.utils.common import CodeParser, create_func_config
 from metagpt.actions.write_analysis_code import BaseWriteAnalysisCode

-DEBUG_REFLECTION_EXAMPLE = '''Example 1:
-                           [previous impl]:
-                           ```python
-                           def add(a: int, b: int) -> int:
-                               """
-                               Given integers a and b, return the total value of a and b.
-                               """
-                               return a - b
-                           ```
+DEBUG_REFLECTION_EXAMPLE = '''
+Example 1:
+[previous impl]:
+```python
+def add(a: int, b: int) -> int:
+   """
+   Given integers a and b, return the total value of a and b.
+   """
+   return a - b
+```

-                           [runtime Error]:
-                           Tested passed:
+[runtime Error]:
+Tested passed:

-                           Tests failed:
-                           assert add(1, 2) == 3 # output: -1
-                           assert add(1, 2) == 4 # output: -1
+Tests failed:
+assert add(1, 2) == 3 # output: -1
+assert add(1, 2) == 4 # output: -1

-                           [reflection on previous impl]:
-                           The implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.
+[reflection on previous impl]:
+The implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.

-                           [improved impl]:
-                           ```python
-                           def add(a: int, b: int) -> int:
-                               """
-                               Given integers a and b, return the total value of a and b.
-                               """
-                               return a + b
-                           ```
-                           '''
+[improved impl]:
+```python
+def add(a: int, b: int) -> int:
+   """
+   Given integers a and b, return the total value of a and b.
+   """
+   return a + b
+```
+'''

 REFLECTION_PROMPT = """
-                       Here is an example for you.
-                       {debug_example}
-                       [context]
-                       {context}
-                       
-                       [previous impl]
-                       {code}
-                       [runtime Error]
-                       {runtime_result}
+Here is an example for you.
+{debug_example}
+[context]
+{context}

-                        Analysis the error step by step, provide me improve method and code. Remember to follow [context] requirement.
-                        [reflection on previous impl]:
-                        xxx
+[previous impl]
+{code}
+[runtime Error]
+{runtime_result}

-                       """
+Analysis the error step by step, provide me improve method and code. Remember to follow [context] rerquirement. Don't forget write code for steps behind the error step.
+[reflection on previous impl]:
+xxx
+"""

 CODE_REFLECTION = {
    "name": "execute_reflection_code",
@ -85,10 +84,10 @@ class DebugCode(BaseWriteAnalysisCode):
    name: str = "debugcode"
    context: Optional[str] = None
    llm: None
-    
+
    def __init__(self, **kwargs: Any):
        super().__init__(**kwargs)
-    
+
    async def run_reflection(
        self,
        # goal,
@ -100,23 +99,26 @@ class DebugCode(BaseWriteAnalysisCode):
    ) -> dict:
        info = []
        # finished_code_and_result = finished_code + "\n [finished results]\n\n" + finished_code_result
-        reflection_prompt = REFLECTION_PROMPT.format(debug_example=DEBUG_REFLECTION_EXAMPLE,
-                                                     context=context,
-                                                     # goal=goal,
-                                                     # finished_code=finished_code_and_result,
-                                                     code=code,
-                                                     runtime_result=runtime_result
-                                                     )
+        reflection_prompt = REFLECTION_PROMPT.format(
+            debug_example=DEBUG_REFLECTION_EXAMPLE,
+            context=context,
+            # goal=goal,
+            # finished_code=finished_code_and_result,
+            code=code,
+            runtime_result=runtime_result,
+        )
        system_prompt = "You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Write your full implementation "
        info.append(Message(role="system", content=system_prompt))
        info.append(Message(role="user", content=reflection_prompt))
-        
+
        # msg = messages_to_str(info)
        # resp = await self.llm.aask(msg=msg)
-        resp = await self.llm.aask_code(messages=info, **create_func_config(CODE_REFLECTION))
+        resp = await self.llm.aask_code(
+            messages=info, **create_func_config(CODE_REFLECTION)
+        )
        logger.info(f"reflection is {resp}")
        return resp
-    
+
    # async def rewrite_code(self, reflection: str = "", context: List[Message] = None) -> str:
    #     """
    #     根据reflection重写代码
@ -131,14 +133,16 @@ class DebugCode(BaseWriteAnalysisCode):
    #     resp = await self.llm.aask(msg=msg)
    #     improv_code = CodeParser.parse_code(block=None, text=resp)
    #     return improv_code
-    
-    async def run(self,
-                  context: List[Message] = None,
-                  plan: str = "",
-                  # finished_code: str = "",
-                  # finished_code_result: str = "",
-                  code: str = "",
-                  runtime_result: str = "") -> str:
+
+    async def run(
+        self,
+        context: List[Message] = None,
+        plan: str = "",
+        # finished_code: str = "",
+        # finished_code_result: str = "",
+        code: str = "",
+        runtime_result: str = "",
+    ) -> str:
        """
        根据当前运行代码和报错信息进行reflection和纠错
        """
@ -152,5 +156,5 @@ class DebugCode(BaseWriteAnalysisCode):
        )
        # 根据reflection结果重写代码
        # improv_code = await self.rewrite_code(reflection, context=context)
-        improv_code = reflection['improved_impl']
+        improv_code = reflection["improved_impl"]
        return improv_code
--- a/metagpt/actions/execute_code.py
+++ b/metagpt/actions/execute_code.py
@ -8,6 +8,7 @@ from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Dict, List, Tuple, Union
 import traceback
+import re

 import nbformat
 from nbclient import NotebookClient
@ -180,11 +181,31 @@ class ExecutePyCode(ExecuteCode, Action):
                # TODO: add max_tries for run code.
                cell_index = len(self.nb.cells) - 1
                await self.nb_client.async_execute_cell(self.nb.cells[-1], cell_index)
-                return self.parse_outputs(self.nb.cells[-1].outputs), True
+                outputs = self.parse_outputs(self.nb.cells[-1].outputs)
+                success = True
            except Exception as e:
-                # FIXME: CellExecutionError is hard to read. for example `1\0` raise ZeroDivisionError:
-                #  CellExecutionError('An error occurred while executing the following cell:\n------------------\nz=1/0\n------------------\n\n\n\x1b[0;31m---------------------------------------------------------------------------\x1b[0m\n\x1b[0;31mZeroDivisionError\x1b[0m                         Traceback (most recent call last)\nCell \x1b[0;32mIn[1], line 1\x1b[0m\n\x1b[0;32m----> 1\x1b[0m z\x1b[38;5;241m=\x1b[39m\x1b[38;5;241;43m1\x1b[39;49m\x1b[38;5;241;43m/\x1b[39;49m\x1b[38;5;241;43m0\x1b[39;49m\n\n\x1b[0;31mZeroDivisionError\x1b[0m: division by zero\n')
-                return traceback.format_exc(), False
+                outputs = traceback.format_exc()
+                success = False
+            return truncate(remove_escape_and_color_codes(outputs)), success
        else:
            # TODO: markdown
            raise NotImplementedError(f"Not support this code type : {language}, Only support code!")
+
+
+def truncate(result: str, keep_len: int = 2000) -> str:
+    desc = f"Truncated to show only the last {keep_len} characters\n"
+    if result.startswith(desc):
+        result = result[len(desc) :]
+
+    if len(result) > keep_len:
+        result = result[-keep_len:]
+        return desc + result
+
+    return result
+
+
+def remove_escape_and_color_codes(input_str):
+    # 使用正则表达式去除转义字符和颜色代码
+    pattern = re.compile(r'\x1b\[[0-9;]*[mK]')
+    result = pattern.sub('', input_str)
+    return result
--- a/metagpt/actions/ml_da_action.py
+++ b/metagpt/actions/ml_da_action.py
@ -0,0 +1,116 @@
+import json
+from typing import Dict, List, Union
+
+from metagpt.actions import Action
+from metagpt.schema import Message, Plan
+from metagpt.utils.common import CodeParser
+from metagpt.logs import logger
+
+
+class ReviewConst:
+    TASK_REVIEW_TRIGGER = "task"
+    CODE_REVIEW_TRIGGER = "code"
+    CONTINUE_WORD = ["confirm", "continue", "c", "yes", "y"]
+    CHANGE_WORD = ["change"]
+    EXIT_WORD = ["exit"]
+    TASK_REVIEW_INSTRUCTION = (
+        f"If you want to change, add, delete a task or merge tasks in the plan, say '{CHANGE_WORD[0]} task task_id or current task, ... (things to change)' "
+        f"If you confirm the output from the current task and wish to continue, type: {CONTINUE_WORD[0]}"
+    )
+    CODE_REVIEW_INSTRUCTION = (
+        f"If you want the codes to be rewritten, say '{CHANGE_WORD[0]} ... (your change advice)' "
+        f"If you want to leave it as is, type: {CONTINUE_WORD[0]} or {CONTINUE_WORD[1]}"
+    )
+    EXIT_INSTRUCTION = f"If you want to terminate the process, type: {EXIT_WORD[0]}"
+
+
+class AskReview(Action):
+    async def run(
+        self, context: List[Message], plan: Plan = None, trigger: str = "task"
+    ):
+        logger.info("Current overall plan:")
+        logger.info(
+            "\n".join(
+                [
+                    f"{task.task_id}: {task.instruction}, is_finished: {task.is_finished}"
+                    for task in plan.tasks
+                ]
+            )
+        )
+
+        logger.info("most recent context:")
+        latest_action = context[-1].cause_by.__name__ if context[-1].cause_by else ""
+        review_instruction = (
+            ReviewConst.TASK_REVIEW_INSTRUCTION
+            if trigger == ReviewConst.TASK_REVIEW_TRIGGER
+            else ReviewConst.CODE_REVIEW_INSTRUCTION
+        )
+        prompt = (
+            f"This is a <{trigger}> review. Please review output from {latest_action}\n"
+            f"{review_instruction}\n"
+            f"{ReviewConst.EXIT_INSTRUCTION}\n"
+            "Please type your review below:\n"
+        )
+
+        rsp = input(prompt)
+
+        if rsp.lower() in ReviewConst.EXIT_WORD:
+            exit()
+
+        # Confirmation can be one of "confirm", "continue", "c", "yes", "y" exactly, or sentences containing "confirm".
+        # One could say "confirm this task, but change the next task to ..."
+        confirmed = rsp.lower() in ReviewConst.CONTINUE_WORD or ReviewConst.CONTINUE_WORD[0] in rsp.lower()
+
+        return rsp, confirmed
+
+
+class SummarizeAnalysis(Action):
+    PROMPT_TEMPLATE = """
+    # Context
+    {context}
+    # Summary
+    Output a 30-word summary on analysis tool and modeling algorithms you have used, and the corresponding result. Make sure to announce the complete path to your test prediction file. Your summary:
+    """
+
+    def __init__(self, name: str = "", context=None, llm=None) -> str:
+        super().__init__(name, context, llm)
+
+    async def run(self, conmpleted_plan: Plan) -> str:
+        tasks = json.dumps(
+            [task.dict() for task in conmpleted_plan.tasks],
+            indent=4,
+            ensure_ascii=False,
+        )  # all tasks finished, return all task outputs
+        prompt = self.PROMPT_TEMPLATE.format(context=tasks)
+        summary = await self._aask(prompt)
+        return summary
+
+
+class Reflect(Action):
+    PROMPT_TEMPLATE = """
+    # Context
+    __context__
+    # Latest User Requirement
+    __user_requirement__
+    # Summary
+    Above is all your attempts to tackle the user requirement. You plan, act, submit your output, and get the result and feedback.
+    Output a json following the format:
+    ```json
+    {
+        "summary": str = "summarize each of your previous trial in a triple of (your methods, the corresponding result, potential improvement), list them out",
+        "takeaways": str = "carefully find key takeaways from your summarization",
+        "reflection": str = "give specific instruction to improve your next trial in a step-by-step thinking process",
+    }
+    ```
+    """
+    REWRITE_PLAN_INSTRUCTION = """Take this reflection for rewriting plan, modify the current plan in place, make reference to your specific instruction, think about you should
+    change which task, add or delete what tasks in the plan. Only make necessary changes, keep reusable tasks unchanged, output the COMPLETE new plan starting from the first task. Your plan should have no more than 5 tasks."""
+
+    async def run(self, context: str, user_requirement: str = "") -> str:
+        user_requirement = user_requirement or "Score as high as possible in a data modeling competition"
+        # prompt = self.PROMPT_TEMPLATE.format(context=context, user_requirement=user_requirement)
+        prompt = self.PROMPT_TEMPLATE.replace("__context__", context).replace("__user_requirement__", user_requirement)
+        rsp_json = await self._aask(prompt)
+        rsp = CodeParser.parse_code(block=None, text=rsp_json)
+        reflection = json.loads(rsp)["reflection"]
+        return reflection
--- a/metagpt/actions/write_code_steps.py
+++ b/metagpt/actions/write_code_steps.py
@ -120,6 +120,5 @@ class WriteCodeSteps(Action):
        context = STRUCTURAL_CONTEXT.format(
            user_requirement=user_requirement, tasks=tasks, codes=codes, current_task=current_task
        )
-        print(context)
        # print(context)
        return context
--- a/metagpt/actions/write_plan.py
+++ b/metagpt/actions/write_plan.py
@ -4,13 +4,16 @@
@Author  :   orange-crow
@File    :   plan.py
 """
-from typing import List, Dict
+from typing import List, Dict, Tuple
 import json
+from copy import deepcopy
+import traceback

 from metagpt.actions import Action
 from metagpt.prompts.ml_engineer import ASSIGN_TASK_TYPE_PROMPT, ASSIGN_TASK_TYPE
-from metagpt.schema import Message, Task
+from metagpt.schema import Message, Task, Plan
 from metagpt.utils.common import CodeParser, create_func_config
+from metagpt.logs import logger


 class WritePlan(Action):
@ -19,7 +22,8 @@ class WritePlan(Action):
    __context__
    # Task:
    Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to __max_tasks__ tasks.
-    If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes.
+    If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.
+    If you encounter errors on the current task, revise and output the current single task only.
    Output a list of jsons following the format:
    ```json
    [
@ -67,8 +71,36 @@ class WritePlan(Action):
            rsp = await self.assign_task_type(json.loads(rsp))
        return rsp

-    @staticmethod
-    def rsp_to_tasks(rsp: str) -> List[Task]:
-        rsp = json.loads(rsp)
-        tasks = [Task(**task_config) for task_config in rsp]
-        return tasks
+def rsp_to_tasks(rsp: str) -> List[Task]:
+    rsp = json.loads(rsp)
+    tasks = [Task(**task_config) for task_config in rsp]
+    return tasks
+
+def update_plan_from_rsp(rsp: str, current_plan: Plan):
+    tasks = rsp_to_tasks(rsp)
+    if len(tasks) == 1 or tasks[0].dependent_task_ids:
+        if tasks[0].dependent_task_ids and len(tasks) > 1:
+            # tasks[0].dependent_task_ids means the generated tasks are not a complete plan
+            # for they depend on tasks in the current plan, in this case, we only support updating one task each time
+            logger.warning(
+                "Current plan will take only the first generated task if the generated tasks are not a complete plan"
+            )
+        # handle a single task
+        if current_plan.has_task_id(tasks[0].task_id):
+            # replace an existing task
+            current_plan.replace_task(tasks[0])
+        else:
+            # append one task
+            current_plan.append_task(tasks[0])
+
+    else:
+        # add tasks in general
+        current_plan.add_tasks(tasks)
+
+def precheck_update_plan_from_rsp(rsp: str, current_plan: Plan) -> Tuple[bool, str]:
+    temp_plan = deepcopy(current_plan)
+    try:
+        update_plan_from_rsp(rsp, temp_plan)
+        return True, ""
+    except Exception as e:
+        return False, e