Merge branch 'dev_tool_selection' of https://gitlab.deepwisdomai.com/agents/data_agents_opt into dev_tool_selection

2026-05-02 20:32:38 +02:00 · 2023-12-14 15:43:24 +08:00 · 2023-12-14 15:43:24 +08:00 · 9d39a058aa
commit 9d39a058aa
parent 70fdb1905f 2da141abbe
36 changed files with 3953 additions and 916 deletions
--- a/.gitignore
+++ b/.gitignore
@ -167,3 +167,4 @@ tmp
 output.wav
 metagpt/roles/idea_agent.py
 .aider*
+/tests/metagpt/actions/check_data.py
--- a/kaggle_team.py
+++ b/kaggle_team.py
@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import asyncio
+
+import fire
+
+from metagpt.roles.kaggle_manager import KaggleManager
+from metagpt.roles.ml_engineer import MLEngineer
+from metagpt.team import Team
+
+async def main(
+    # competition: str,
+    # data_desc: str,
+    # requirement: str,
+    investment: float = 5.0,
+    n_round: int = 10,
+    auto_run: bool = False,
+):
+    competition, data_desc, requirement = (
+        "titanic",
+        "Training set is train.csv.\nTest set is test.csv. We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.",
+        # "Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
+        # "generate a random prediction, replace the Survived column of gender_submission.csv, and save the prediction to a new submission file",
+        "Score as high as possible for the provided dataset, save the test prediction to a csv with two columns PassengerId and Survived"
+    )
+
+    team = Team()
+    team.hire(
+        [
+            KaggleManager(competition=competition, data_desc=data_desc),
+            MLEngineer(goal=requirement, auto_run=auto_run),
+        ]
+    )
+
+    team.invest(investment)
+    team.start_project(requirement)
+    await team.run(n_round=n_round)
+
+if __name__ == '__main__':
+    fire.Fire(main)
--- a/metagpt/actions/debug_code.py
+++ b/metagpt/actions/debug_code.py
@ -1,57 +1,56 @@
 from typing import Dict, List, Union, Tuple, Optional, Any

-from metagpt.actions import Action
 from metagpt.logs import logger
 from metagpt.schema import Message, Plan
 from metagpt.utils.common import CodeParser, create_func_config
 from metagpt.actions.write_analysis_code import BaseWriteAnalysisCode

-DEBUG_REFLECTION_EXAMPLE = '''Example 1:
-                           [previous impl]:
-                           ```python
-                           def add(a: int, b: int) -> int:
-                               """
-                               Given integers a and b, return the total value of a and b.
-                               """
-                               return a - b
-                           ```
+DEBUG_REFLECTION_EXAMPLE = '''
+Example 1:
+[previous impl]:
+```python
+def add(a: int, b: int) -> int:
+   """
+   Given integers a and b, return the total value of a and b.
+   """
+   return a - b
+```

-                           [runtime Error]:
-                           Tested passed:
+[runtime Error]:
+Tested passed:

-                           Tests failed:
-                           assert add(1, 2) == 3 # output: -1
-                           assert add(1, 2) == 4 # output: -1
+Tests failed:
+assert add(1, 2) == 3 # output: -1
+assert add(1, 2) == 4 # output: -1

-                           [reflection on previous impl]:
-                           The implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.
+[reflection on previous impl]:
+The implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.

-                           [improved impl]:
-                           ```python
-                           def add(a: int, b: int) -> int:
-                               """
-                               Given integers a and b, return the total value of a and b.
-                               """
-                               return a + b
-                           ```
-                           '''
+[improved impl]:
+```python
+def add(a: int, b: int) -> int:
+   """
+   Given integers a and b, return the total value of a and b.
+   """
+   return a + b
+```
+'''

 REFLECTION_PROMPT = """
-                       Here is an example for you.
-                       {debug_example}
-                       [context]
-                       {context}
-                       
-                       [previous impl]
-                       {code}
-                       [runtime Error]
-                       {runtime_result}
+Here is an example for you.
+{debug_example}
+[context]
+{context}

-                        Analysis the error step by step, provide me improve method and code. Remember to follow [context] requirement.
-                        [reflection on previous impl]:
-                        xxx
+[previous impl]
+{code}
+[runtime Error]
+{runtime_result}

-                       """
+Analysis the error step by step, provide me improve method and code. Remember to follow [context] rerquirement. Don't forget write code for steps behind the error step.
+[reflection on previous impl]:
+xxx
+"""

 CODE_REFLECTION = {
    "name": "execute_reflection_code",
@ -85,10 +84,10 @@ class DebugCode(BaseWriteAnalysisCode):
    name: str = "debugcode"
    context: Optional[str] = None
    llm: None
-    
+
    def __init__(self, **kwargs: Any):
        super().__init__(**kwargs)
-    
+
    async def run_reflection(
        self,
        # goal,
@ -100,23 +99,26 @@ class DebugCode(BaseWriteAnalysisCode):
    ) -> dict:
        info = []
        # finished_code_and_result = finished_code + "\n [finished results]\n\n" + finished_code_result
-        reflection_prompt = REFLECTION_PROMPT.format(debug_example=DEBUG_REFLECTION_EXAMPLE,
-                                                     context=context,
-                                                     # goal=goal,
-                                                     # finished_code=finished_code_and_result,
-                                                     code=code,
-                                                     runtime_result=runtime_result
-                                                     )
+        reflection_prompt = REFLECTION_PROMPT.format(
+            debug_example=DEBUG_REFLECTION_EXAMPLE,
+            context=context,
+            # goal=goal,
+            # finished_code=finished_code_and_result,
+            code=code,
+            runtime_result=runtime_result,
+        )
        system_prompt = "You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Write your full implementation "
        info.append(Message(role="system", content=system_prompt))
        info.append(Message(role="user", content=reflection_prompt))
-        
+
        # msg = messages_to_str(info)
        # resp = await self.llm.aask(msg=msg)
-        resp = await self.llm.aask_code(messages=info, **create_func_config(CODE_REFLECTION))
+        resp = await self.llm.aask_code(
+            messages=info, **create_func_config(CODE_REFLECTION)
+        )
        logger.info(f"reflection is {resp}")
        return resp
-    
+
    # async def rewrite_code(self, reflection: str = "", context: List[Message] = None) -> str:
    #     """
    #     根据reflection重写代码
@ -131,14 +133,16 @@ class DebugCode(BaseWriteAnalysisCode):
    #     resp = await self.llm.aask(msg=msg)
    #     improv_code = CodeParser.parse_code(block=None, text=resp)
    #     return improv_code
-    
-    async def run(self,
-                  context: List[Message] = None,
-                  plan: str = "",
-                  # finished_code: str = "",
-                  # finished_code_result: str = "",
-                  code: str = "",
-                  runtime_result: str = "") -> str:
+
+    async def run(
+        self,
+        context: List[Message] = None,
+        plan: str = "",
+        # finished_code: str = "",
+        # finished_code_result: str = "",
+        code: str = "",
+        runtime_result: str = "",
+    ) -> str:
        """
        根据当前运行代码和报错信息进行reflection和纠错
        """
@ -152,5 +156,5 @@ class DebugCode(BaseWriteAnalysisCode):
        )
        # 根据reflection结果重写代码
        # improv_code = await self.rewrite_code(reflection, context=context)
-        improv_code = reflection['improved_impl']
+        improv_code = reflection["improved_impl"]
        return improv_code
--- a/metagpt/actions/execute_code.py
+++ b/metagpt/actions/execute_code.py
@ -8,6 +8,7 @@ from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Dict, List, Tuple, Union
 import traceback
+import re

 import nbformat
 from nbclient import NotebookClient
@ -180,11 +181,31 @@ class ExecutePyCode(ExecuteCode, Action):
                # TODO: add max_tries for run code.
                cell_index = len(self.nb.cells) - 1
                await self.nb_client.async_execute_cell(self.nb.cells[-1], cell_index)
-                return self.parse_outputs(self.nb.cells[-1].outputs), True
+                outputs = self.parse_outputs(self.nb.cells[-1].outputs)
+                success = True
            except Exception as e:
-                # FIXME: CellExecutionError is hard to read. for example `1\0` raise ZeroDivisionError:
-                #  CellExecutionError('An error occurred while executing the following cell:\n------------------\nz=1/0\n------------------\n\n\n\x1b[0;31m---------------------------------------------------------------------------\x1b[0m\n\x1b[0;31mZeroDivisionError\x1b[0m                         Traceback (most recent call last)\nCell \x1b[0;32mIn[1], line 1\x1b[0m\n\x1b[0;32m----> 1\x1b[0m z\x1b[38;5;241m=\x1b[39m\x1b[38;5;241;43m1\x1b[39;49m\x1b[38;5;241;43m/\x1b[39;49m\x1b[38;5;241;43m0\x1b[39;49m\n\n\x1b[0;31mZeroDivisionError\x1b[0m: division by zero\n')
-                return traceback.format_exc(), False
+                outputs = traceback.format_exc()
+                success = False
+            return truncate(remove_escape_and_color_codes(outputs)), success
        else:
            # TODO: markdown
            raise NotImplementedError(f"Not support this code type : {language}, Only support code!")
+
+
+def truncate(result: str, keep_len: int = 2000) -> str:
+    desc = f"Truncated to show only the last {keep_len} characters\n"
+    if result.startswith(desc):
+        result = result[len(desc) :]
+
+    if len(result) > keep_len:
+        result = result[-keep_len:]
+        return desc + result
+
+    return result
+
+
+def remove_escape_and_color_codes(input_str):
+    # 使用正则表达式去除转义字符和颜色代码
+    pattern = re.compile(r'\x1b\[[0-9;]*[mK]')
+    result = pattern.sub('', input_str)
+    return result
--- a/metagpt/actions/ml_da_action.py
+++ b/metagpt/actions/ml_da_action.py
@ -0,0 +1,116 @@
+import json
+from typing import Dict, List, Union
+
+from metagpt.actions import Action
+from metagpt.schema import Message, Plan
+from metagpt.utils.common import CodeParser
+from metagpt.logs import logger
+
+
+class ReviewConst:
+    TASK_REVIEW_TRIGGER = "task"
+    CODE_REVIEW_TRIGGER = "code"
+    CONTINUE_WORD = ["confirm", "continue", "c", "yes", "y"]
+    CHANGE_WORD = ["change"]
+    EXIT_WORD = ["exit"]
+    TASK_REVIEW_INSTRUCTION = (
+        f"If you want to change, add, delete a task or merge tasks in the plan, say '{CHANGE_WORD[0]} task task_id or current task, ... (things to change)' "
+        f"If you confirm the output from the current task and wish to continue, type: {CONTINUE_WORD[0]}"
+    )
+    CODE_REVIEW_INSTRUCTION = (
+        f"If you want the codes to be rewritten, say '{CHANGE_WORD[0]} ... (your change advice)' "
+        f"If you want to leave it as is, type: {CONTINUE_WORD[0]} or {CONTINUE_WORD[1]}"
+    )
+    EXIT_INSTRUCTION = f"If you want to terminate the process, type: {EXIT_WORD[0]}"
+
+
+class AskReview(Action):
+    async def run(
+        self, context: List[Message], plan: Plan = None, trigger: str = "task"
+    ):
+        logger.info("Current overall plan:")
+        logger.info(
+            "\n".join(
+                [
+                    f"{task.task_id}: {task.instruction}, is_finished: {task.is_finished}"
+                    for task in plan.tasks
+                ]
+            )
+        )
+
+        logger.info("most recent context:")
+        latest_action = context[-1].cause_by.__name__ if context[-1].cause_by else ""
+        review_instruction = (
+            ReviewConst.TASK_REVIEW_INSTRUCTION
+            if trigger == ReviewConst.TASK_REVIEW_TRIGGER
+            else ReviewConst.CODE_REVIEW_INSTRUCTION
+        )
+        prompt = (
+            f"This is a <{trigger}> review. Please review output from {latest_action}\n"
+            f"{review_instruction}\n"
+            f"{ReviewConst.EXIT_INSTRUCTION}\n"
+            "Please type your review below:\n"
+        )
+
+        rsp = input(prompt)
+
+        if rsp.lower() in ReviewConst.EXIT_WORD:
+            exit()
+
+        # Confirmation can be one of "confirm", "continue", "c", "yes", "y" exactly, or sentences containing "confirm".
+        # One could say "confirm this task, but change the next task to ..."
+        confirmed = rsp.lower() in ReviewConst.CONTINUE_WORD or ReviewConst.CONTINUE_WORD[0] in rsp.lower()
+
+        return rsp, confirmed
+
+
+class SummarizeAnalysis(Action):
+    PROMPT_TEMPLATE = """
+    # Context
+    {context}
+    # Summary
+    Output a 30-word summary on analysis tool and modeling algorithms you have used, and the corresponding result. Make sure to announce the complete path to your test prediction file. Your summary:
+    """
+
+    def __init__(self, name: str = "", context=None, llm=None) -> str:
+        super().__init__(name, context, llm)
+
+    async def run(self, conmpleted_plan: Plan) -> str:
+        tasks = json.dumps(
+            [task.dict() for task in conmpleted_plan.tasks],
+            indent=4,
+            ensure_ascii=False,
+        )  # all tasks finished, return all task outputs
+        prompt = self.PROMPT_TEMPLATE.format(context=tasks)
+        summary = await self._aask(prompt)
+        return summary
+
+
+class Reflect(Action):
+    PROMPT_TEMPLATE = """
+    # Context
+    __context__
+    # Latest User Requirement
+    __user_requirement__
+    # Summary
+    Above is all your attempts to tackle the user requirement. You plan, act, submit your output, and get the result and feedback.
+    Output a json following the format:
+    ```json
+    {
+        "summary": str = "summarize each of your previous trial in a triple of (your methods, the corresponding result, potential improvement), list them out",
+        "takeaways": str = "carefully find key takeaways from your summarization",
+        "reflection": str = "give specific instruction to improve your next trial in a step-by-step thinking process",
+    }
+    ```
+    """
+    REWRITE_PLAN_INSTRUCTION = """Take this reflection for rewriting plan, modify the current plan in place, make reference to your specific instruction, think about you should
+    change which task, add or delete what tasks in the plan. Only make necessary changes, keep reusable tasks unchanged, output the COMPLETE new plan starting from the first task. Your plan should have no more than 5 tasks."""
+
+    async def run(self, context: str, user_requirement: str = "") -> str:
+        user_requirement = user_requirement or "Score as high as possible in a data modeling competition"
+        # prompt = self.PROMPT_TEMPLATE.format(context=context, user_requirement=user_requirement)
+        prompt = self.PROMPT_TEMPLATE.replace("__context__", context).replace("__user_requirement__", user_requirement)
+        rsp_json = await self._aask(prompt)
+        rsp = CodeParser.parse_code(block=None, text=rsp_json)
+        reflection = json.loads(rsp)["reflection"]
+        return reflection
--- a/metagpt/actions/write_code_steps.py
+++ b/metagpt/actions/write_code_steps.py
@ -120,6 +120,5 @@ class WriteCodeSteps(Action):
        context = STRUCTURAL_CONTEXT.format(
            user_requirement=user_requirement, tasks=tasks, codes=codes, current_task=current_task
        )
-        print(context)
        # print(context)
        return context
--- a/metagpt/actions/write_plan.py
+++ b/metagpt/actions/write_plan.py
@ -4,13 +4,16 @@
@Author  :   orange-crow
@File    :   plan.py
 """
-from typing import List, Dict
+from typing import List, Dict, Tuple
 import json
+from copy import deepcopy
+import traceback

 from metagpt.actions import Action
 from metagpt.prompts.ml_engineer import ASSIGN_TASK_TYPE_PROMPT, ASSIGN_TASK_TYPE
-from metagpt.schema import Message, Task
+from metagpt.schema import Message, Task, Plan
 from metagpt.utils.common import CodeParser, create_func_config
+from metagpt.logs import logger


 class WritePlan(Action):
@ -19,7 +22,8 @@ class WritePlan(Action):
    __context__
    # Task:
    Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to __max_tasks__ tasks.
-    If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes.
+    If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.
+    If you encounter errors on the current task, revise and output the current single task only.
    Output a list of jsons following the format:
    ```json
    [
@ -67,8 +71,36 @@ class WritePlan(Action):
            rsp = await self.assign_task_type(json.loads(rsp))
        return rsp

-    @staticmethod
-    def rsp_to_tasks(rsp: str) -> List[Task]:
-        rsp = json.loads(rsp)
-        tasks = [Task(**task_config) for task_config in rsp]
-        return tasks
+def rsp_to_tasks(rsp: str) -> List[Task]:
+    rsp = json.loads(rsp)
+    tasks = [Task(**task_config) for task_config in rsp]
+    return tasks
+
+def update_plan_from_rsp(rsp: str, current_plan: Plan):
+    tasks = rsp_to_tasks(rsp)
+    if len(tasks) == 1 or tasks[0].dependent_task_ids:
+        if tasks[0].dependent_task_ids and len(tasks) > 1:
+            # tasks[0].dependent_task_ids means the generated tasks are not a complete plan
+            # for they depend on tasks in the current plan, in this case, we only support updating one task each time
+            logger.warning(
+                "Current plan will take only the first generated task if the generated tasks are not a complete plan"
+            )
+        # handle a single task
+        if current_plan.has_task_id(tasks[0].task_id):
+            # replace an existing task
+            current_plan.replace_task(tasks[0])
+        else:
+            # append one task
+            current_plan.append_task(tasks[0])
+
+    else:
+        # add tasks in general
+        current_plan.add_tasks(tasks)
+
+def precheck_update_plan_from_rsp(rsp: str, current_plan: Plan) -> Tuple[bool, str]:
+    temp_plan = deepcopy(current_plan)
+    try:
+        update_plan_from_rsp(rsp, temp_plan)
+        return True, ""
+    except Exception as e:
+        return False, e
--- a/metagpt/config.py
+++ b/metagpt/config.py
@ -95,6 +95,9 @@ class Config(metaclass=Singleton):

        self.prompt_format = self._get("PROMPT_FORMAT", "markdown")

+        self.kaggle_username = self._get("KAGGLE_USERNAME", "")
+        self.kaggle_key = self._get("KAGGLE_KEY", "")
+
    def _init_with_config_files_and_env(self, configs: dict, yaml_file):
        """Load from config/key.yaml, config/config.yaml, and env in decreasing order of priority"""
        configs.update(os.environ)
--- a/metagpt/prompts/ml_engineer.py
+++ b/metagpt/prompts/ml_engineer.py
@ -155,49 +155,72 @@ PRINT_DATA_COLUMNS = {

 GENERATE_CODE_PROMPT = """
 # Background
-Assist in completing [{user_requirement}] in a Jupyter notebook.
+As a data scientist, you need to help user to achieve their goal [{user_requirement}] step-by-step in an continuous Jupyter notebook.

-## Task Progress
-### Done Tasks
+## Done Tasks
 ```python
 {history_code}
 ```end

-### Current Task
+## Current Task
 {current_task}

-## Latest Data Info
+# Latest Data Info
+Latest data info after previous tasks:
 {column_info}

 # Task
-Fully implement 'Current Task', ensuring all necessary steps are covered without repeating code from 'Done Tasks'. Specifically, {special_prompt}
+Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
+Specifically, {special_prompt}

 # Code Steps:
-Follow steps below when you writing code if it's convenient.
+Strictly follow steps below when you writing code if it's convenient.
 {code_steps}
+
+# Output Example:
+when current task is "train a lightgbm model on training data", and their are two steps in 'Code Steps', the code be like:
+```python
+# Step 1: check data type and convert to numeric
+ojb_cols = train.select_dtypes(include='object').columns.tolist()
+
+for col in obj_cols:
+    encoder = LabelEncoder()
+    train[col] = encoder.fit_transform(train[col])
+    test[col] = test[col].apply(lambda x: x if x in encoder.classes_ else 'unknown')
+    test[col] = encoder.transform(test[col])
+
+# Step 2: train lightgbm model
+model = LGBMClassifier()
+model.fit(train, y_train)
+```end
+
+# Constraints:
+- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
+- The output code should contain all steps implemented in 'Code Steps'.
 """

 TOOL_USAGE_PROMPT = """
 # Background
-Assist in completing [{user_requirement}] in a Jupyter notebook.
+As a data scientist, you need to help user to achieve their goal [{user_requirement}] step-by-step in an continuous Jupyter notebook.

-## Task Progress
-### Done Tasks
+## Done Tasks
 ```python
 {history_code}
 ```end

-### Current Task
+## Current Task
 {current_task}

-## Latest Data Info
+# Latest Data Info
+Latest data info after previous tasks:
 {column_info}

 # Task
-Fully implement 'Current Task', ensuring all necessary steps are covered without repeating code from 'Done Tasks'. Specifically, {special_prompt}
+Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
+Specifically, {special_prompt}

 # Code Steps:
-Follow steps below when you writing code if it's convenient.
+Strictly follow steps below when you writing code if it's convenient.
 {code_steps}

 # Capabilities
@ -205,14 +228,13 @@ Follow steps below when you writing code if it's convenient.
 - You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..

 # Available Tools:
-Each Class tool is described in JSON format. When you call it, import the tool from `{module_name}` first.
+Each Class tool is described in JSON format. When you call a tool, import the tool from `{module_name}` first.
 {tool_catalog}

-# Step Example:
-Here is a coding example for each code step:
-[Step 1]: Handle missing values by imputing or dropping them. For numerical columns, use median or mean imputation
-[Code]
+# Output Example:
+when current task is "do data preprocess, like fill missing value, handle outliers, etc.", and their are two steps in 'Code Steps', the code be like:
 ```python
+# Step 1: fill missing value
 # Tools used: ['FillMissingValue']
 from metagpt.tools.functions.libs.data_preprocess import FillMissingValue

@ -224,31 +246,20 @@ fill_missing_value.fit(train_processed)
 train_processed = fill_missing_value.transform(train_processed)
 test_processed = fill_missing_value.transform(test_processed)

+# Step 2: handle outliers
 for col in num_cols:
    low, high = train_processed[col].quantile([0.01, 0.99])
    train_processed[col] = train_processed[col].clip(low, high)
    test_processed[col] = test_processed[col].clip(low, high)
 ```end
-[Step 2]: xxx
-[Code]:
-```python
-# Tools used: [xxx]
-from metagpt.tools.functions.libs.xxx import
-```end
-[Step 3]: xxx
-[Code]:
-```python
-# Tools used: [xxx]
-from metagpt.tools.functions.libs.xxx import
-```end

 # Constraints:
- Prioritize using pre-defined tools for the same functionality.
- Copy DataFrame before processing if needed.
- Strictly follow the code steps to write code
+- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
+- Always prioritize using pre-defined tools for the same functionality.
+- Always copy the DataFrame before processing it and use the copy to process.
+- The output code should contain all steps implemented correctly in 'Code Steps'.
 """
 #- If 'Code Steps' contains step done in 'Done Tasks', such as reading data, don't repeat it.
-#For "fill missing value and handle outliers", the output code be like when there are training data and test data:

 DATA_PREPROCESS_PROMPT = """
 The current task is about data preprocessing, please note the following:
@ -276,7 +287,7 @@ The current task is about training a model, please ensure high performance:

 MODEL_EVALUATE_PROMPT = """
 The current task is about evaluating a model, please note the following:
- Ensure that the evaluated data is same processed as the training data.
+- Ensure that the evaluated data is same processed as the training data. If not, remember use object in 'Done Tasks' to transform the data.
 - Use trained model from previous task result directly, do not mock or reload model yourself.
 """

@ -291,3 +302,14 @@ ML_MODULE_MAP = {
    "data_preprocess": "metagpt.tools.functions.libs.data_preprocess",
    "feature_engineering": "metagpt.tools.functions.libs.feature_engineering",
 }
+
+STRUCTURAL_CONTEXT = """
+## User Requirement
+{user_requirement}
+## Data Description
+{data_desc}
+## Current Plan
+{tasks}
+## Current Task
+{current_task}
+"""
--- a/metagpt/roles/catboost_info/catboost_training.json
+++ b/metagpt/roles/catboost_info/catboost_training.json
--- a/metagpt/roles/catboost_info/learn/events.out.tfevents
+++ b/metagpt/roles/catboost_info/learn/events.out.tfevents
--- a/metagpt/roles/catboost_info/learn_error.tsv
+++ b/metagpt/roles/catboost_info/learn_error.tsv
--- a/metagpt/roles/catboost_info/time_left.tsv
+++ b/metagpt/roles/catboost_info/time_left.tsv
--- a/metagpt/roles/kaggle_manager.py
+++ b/metagpt/roles/kaggle_manager.py
@ -0,0 +1,153 @@
+from typing import Dict, List, Union, Tuple
+import json
+import subprocess
+import os
+
+import fire
+import pandas as pd
+
+from metagpt.config import CONFIG
+from metagpt.const import WORKSPACE_ROOT
+from metagpt.roles import Role
+from metagpt.actions import Action, BossRequirement
+from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis
+from metagpt.schema import Message, Task, Plan
+from metagpt.logs import logger
+from metagpt.utils.common import CodeParser
+
+
+os.environ["KAGGLE_USERNAME"] = CONFIG.kaggle_username
+os.environ["KAGGLE_KEY"] = CONFIG.kaggle_key
+
+def run_command(cmd):
+    print(cmd)
+    output = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+    if output.returncode != 0:
+        print("Error output:", output.stderr)
+        exit()
+    else:
+        print(output.stdout)
+    return output.stdout
+
+class DownloadData(Action):
+
+    async def run(self, competition, data_desc="") -> str:
+        data_path = WORKSPACE_ROOT / competition
+        
+        output = run_command(f"kaggle competitions list --search {competition}")
+        assert output != "No competitions found", "You must provide the correct competition name"
+        
+        run_command(f"kaggle competitions download {competition} --path {WORKSPACE_ROOT}")
+        
+        if not os.path.exists(data_path):
+        # if True:
+            # run_command(f"rm -r {data_path / '*'}")
+            run_command(f"unzip -o {WORKSPACE_ROOT / '*.zip'} -d {data_path}")  # FIXME: not safe
+        
+        file_list = run_command(f"ls {data_path}")
+
+        rsp = f"""
+        Location:
+        Data downloaded at {data_path} folder, including {file_list}
+        Data Description:
+        {data_desc}
+        """
+        return rsp
+
+class SubmitResult(Action):
+    PROMPT_TEMPLATE = """
+    # Summary
+    __summary__
+    # Your task
+    Extract the file path for test set prediction from the summary above, output a json following the format:
+    ```json
+    {"file_path": str = "the file path, for example, /path/to/the/prediction/file/xxx.csv, /path/to/the/prediction/file/xxx.xlsx"}
+    ```
+    """
+
+    def __init__(self, name: str = "", context=None, llm=None) -> str:
+        super().__init__(name, context, llm)
+
+    async def _parse_submit_file_path(self, context) -> str:
+        prompt = self.PROMPT_TEMPLATE.replace("__summary__", context)
+        rsp = await self._aask(prompt)
+        rsp = CodeParser.parse_code(block=None, text=rsp)
+        file_path = json.loads(rsp)["file_path"]
+        return file_path
+
+    async def run(self, competition, submit_message="") -> str:
+        submit_file_path = await self._parse_submit_file_path(submit_message)
+
+        data_path = WORKSPACE_ROOT / competition
+        submit_message = submit_message.replace("'", "")
+
+        run_command(f"kaggle competitions submit {competition} -f {submit_file_path} -m '{submit_message}'")
+        run_command(f"kaggle competitions leaderboard --show --csv {competition} > {data_path / 'leaderboard.csv'}")
+        run_command(f"kaggle competitions submissions --csv {competition} > {data_path / 'submission.csv'}")
+        
+        leaderboard = pd.read_csv(data_path / 'leaderboard.csv')
+        submission = pd.read_csv(data_path / 'submission.csv')
+        print(submission)  # submission.to_json(orient="records")
+
+        submission_score = submission.loc[0, "publicScore"]
+        best_score = max(submission["publicScore"])  # might be min
+        rank = leaderboard.loc[leaderboard["score"] == best_score].index[0]
+        rank_pct = round(rank / len(leaderboard), 4) * 100
+
+        submission_summary = f"""
+        # All histories:
+        {submission.head(5).to_string()}
+        # Current
+        Current submission score: {submission_score}, best score: {best_score}, best rank: {rank} (top {rank_pct}%)
+        """
+        logger.info(submission_summary)
+        return submission_summary
+
+
+class KaggleManager(Role):
+    def __init__(
+        self, name="ABC", profile="KaggleManager", goal="", competition="titanic", data_desc=""
+    ):
+        super().__init__(name=name, profile=profile, goal=goal)
+        self._init_actions([DownloadData, SubmitResult])
+        self._watch([BossRequirement, SummarizeAnalysis])
+        self.competition = competition
+        self.data_desc = data_desc  # currently passed in, later can be scrapped down from web by another Role
+
+    async def _think(self):
+        observed = self.get_memories()[-1].cause_by
+        if observed == BossRequirement:
+            self._set_state(0)  # DownloadData, get competition of interest from human, download datasets
+        elif observed == SummarizeAnalysis:
+            self._set_state(1)  # SubmitResult, get prediction from MLEngineer and submit it to Kaggle
+
+    async def _act(self):
+        todo = self._rc.todo
+        logger.info(f"{self._setting}: ready to {self._rc.todo}")
+
+        if isinstance(todo, DownloadData):
+            rsp = await todo.run(self.competition, self.data_desc)
+
+        elif isinstance(todo, SubmitResult):
+            submit_message = self.get_memories()[-1].content  # use analysis summary from MLEngineer as submission message
+            rsp = await todo.run(competition=self.competition, submit_message=submit_message)
+
+        msg = Message(content=rsp, role="user", cause_by=type(todo))
+
+        return msg
+
+if __name__ == "__main__":
+    competition, data_desc, requirement = (
+        "titanic",
+        "Training set is train.csv.\nTest set is test.csv. We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.",
+        "Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
+    )
+
+    summary = "I used Python with pandas for data preprocessing, sklearn's RandomForestClassifier for modeling, and achieved 82.12% accuracy on validation. Predictions saved at '/Users/gary/Desktop/data_agents_opt/workspace/titanic/gender_submission.csv'."
+
+    async def main(requirement: str = requirement):
+        role = KaggleManager(competition=competition, data_desc=data_desc)
+        # await role.run(Message(content="", cause_by=BossRequirement))
+        await role.run(Message(content=summary, cause_by=SummarizeAnalysis))
+
+    fire.Fire(main)
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@ -1,90 +1,32 @@
+from typing import  List
 import json
-import re
 from datetime import datetime
-from typing import List

 import fire
 import nbformat
 from pathlib import Path

 from metagpt.actions import Action
+from metagpt.actions.debug_code import DebugCode
 from metagpt.actions.execute_code import ExecutePyCode
+from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis, Reflect, ReviewConst
 from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
 from metagpt.actions.write_code_steps import WriteCodeSteps
 from metagpt.actions.write_plan import WritePlan
+from metagpt.actions.write_plan import update_plan_from_rsp, precheck_update_plan_from_rsp
 from metagpt.const import DATA_PATH, PROJECT_ROOT
 from metagpt.logs import logger
+from metagpt.memory import Memory
+from metagpt.prompts.ml_engineer import STRUCTURAL_CONTEXT
 from metagpt.prompts.ml_engineer import (
-    GEN_DATA_DESC_PROMPT,
    UPDATE_DATA_COLUMNS,
    PRINT_DATA_COLUMNS
 )
 from metagpt.roles import Role
+from metagpt.roles.kaggle_manager import DownloadData, SubmitResult
 from metagpt.schema import Message, Plan
-from metagpt.utils.common import CodeParser, remove_comments, create_func_config
-from metagpt.actions.debug_code import DebugCode
-
-STRUCTURAL_CONTEXT = """
-## User Requirement
-{user_requirement}
-## Dataset Description
-{data_desc}
-## Current Plan
-{tasks}
-## Current Task
-{current_task}
-## Packages Installed
-pandas
-numpy
-"""
-
-
-# scikit-learn
-# lightgbm
-# xgboost
-# catboost
-
-def truncate(result: str, keep_len: int = 1000) -> str:
-    desc = "Truncated to show only the last 1000 characters\n"
-    if result.startswith(desc):
-        result = result[-len(desc):]
-    
-    if len(result) > keep_len:
-        result = result[-keep_len:]
-    
-    if not result.startswith(desc):
-        return desc + result
-    return desc
-
-
-def remove_escape_and_color_codes(input_str):
-    # 使用正则表达式去除转义字符和颜色代码
-    pattern = re.compile(r'\x1b\[[0-9;]*[mK]')
-    result = pattern.sub('', input_str)
-    return result
-
-
-class AskReview(Action):
-    async def run(self, context: List[Message], plan: Plan = None):
-        logger.info("Current overall plan:")
-        logger.info(
-            "\n".join([f"{task.task_id}: {task.instruction}, is_finished: {task.is_finished}" for task in plan.tasks])
-        )
-        
-        logger.info("most recent context:")
-        latest_action = context[-1].cause_by.__name__ if context[-1].cause_by else ""
-        prompt = f"\nPlease review output from {latest_action}:\n" \
-                 "If you want to change a task in the plan, say 'change task task_id, ... (things to change)'\n" \
-                 "If you confirm the output and wish to continue with the current process, type CONFIRM\n" \
-                 "If you want to terminate the process, type exit:\n"
-        rsp = input(prompt)
-        
-        if rsp.lower() in ("exit"):
-            exit()
-        
-        confirmed = rsp.lower() in ("confirm", "yes", "y")
-        
-        return rsp, confirmed
+from metagpt.utils.common import remove_comments, create_func_config
+from metagpt.utils.save_code import save_code_file


 class UpdateDataColumns(Action):
@ -100,50 +42,95 @@ class UpdateDataColumns(Action):

 class MLEngineer(Role):
    def __init__(
-            self, name="ABC", profile="MLEngineer", goal="", auto_run: bool = False,
+        self, name="ABC", profile="MLEngineer", goal="", auto_run: bool = False
    ):
        super().__init__(name=name, profile=profile, goal=goal)
        self._set_react_mode(react_mode="plan_and_act")
+        self._watch([DownloadData, SubmitResult])
+
        self.plan = Plan(goal=goal)
-        self.use_tools = True
-        self.use_code_steps = True
+        self.use_tools = False
+        self.use_code_steps = False
        self.execute_code = ExecutePyCode()
        self.auto_run = auto_run
        self.data_desc = {}
-    
+
+        # memory for working on each task, discarded each time a task is done
+        self.working_memory = Memory()
+
    async def _plan_and_act(self):
+
+        ### Actions in a multi-agent multi-turn setting ###
+        memories = self.get_memories()
+        if memories:
+            latest_event = memories[-1].cause_by
+            if latest_event == DownloadData:
+                self.plan.context = memories[-1].content
+            elif latest_event == SubmitResult:
+                # self reflect on previous plan outcomes and think about how to improve the plan, add to working  memory
+                await self._reflect()
+
+                # get feedback for improvement from human, add to working memory
+                await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
+
+        ### Common Procedure in both single- and multi-agent setting ###
        # create initial plan and update until confirmation
        await self._update_plan()
-        
+
        while self.plan.current_task:
            task = self.plan.current_task
            logger.info(f"ready to take on task {task}")
-            
+
            # take on current task
-            code, result, success, code_steps = await self._write_and_exec_code()
-            
+            code, result, success = await self._write_and_exec_code()
+
            # ask for acceptance, users can other refuse and change tasks in the plan
-            task_result_confirmed = await self._ask_review()
-            
-            if success and task_result_confirmed:
+            review, task_result_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
+
+            if self.auto_run:
+                # if human confirms the task result, then we deem the task completed, regardless of whether the code run succeeds;
+                # if auto mode, then the code run has to succeed for the task to be considered completed
+                task_result_confirmed = success
+
+            if task_result_confirmed:
                # tick off this task and record progress
                task.code = code
                task.result = result
-                task.code_steps = code_steps
                self.plan.finish_current_task()
                self.working_memory.clear()
                
+                if self.use_tools:
                success, new_code = await self._update_data_columns()
                if success:
                    task.code = task.code + "\n\n" + new_code
+
+                confirmed_and_more = (ReviewConst.CONTINUE_WORD[0] in review.lower()
+                    and review.lower() not in ReviewConst.CONTINUE_WORD[0])  # "confirm, ... (more content, such as changing downstream tasks)"
+                if confirmed_and_more:
+                    self.working_memory.add(Message(content=review, role="user", cause_by=AskReview))
+                    await self._update_plan(review)
            
+            elif "redo" in review:
+                # Ask the Role to redo this task with help of review feedback,
+                # useful when the code run is successful but the procedure or result is not what we want
+                continue
+
            else:
                # update plan according to user's feedback and to take on changed tasks
-                await self._update_plan()
+                await self._update_plan(review)
        
-        time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
-        self.execute_code.save_notebook(f"{DATA_PATH}/notebooks/ml_{time}.ipynb")
+        completed_plan_memory = self.get_useful_memories()  # completed plan as a outcome
+        self._rc.memory.add(completed_plan_memory[0])  # add to persistent memory
    
+        summary = await SummarizeAnalysis().run(self.plan)
+        rsp = Message(content=summary, cause_by=SummarizeAnalysis)
+        self._rc.memory.add(rsp)
+
+        # save code using datetime.now or  keywords related to the goal of your project (plan.goal).
+        project_record = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+        save_code_file(name=project_record, code_context=self.execute_code.nb, file_format="ipynb")
+        return rsp
+
    async def _update_data_columns(self):
        rsp = await UpdateDataColumns().run(self.plan)
        is_update, code = rsp["is_update"], rsp["code"]
@ -155,34 +142,36 @@ class MLEngineer(Role):
        return success, code
    
    async def _write_and_exec_code(self, max_retry: int = 3):
-        code_steps = (
+        self.plan.current_task.code_steps = (
            await WriteCodeSteps().run(self.plan)
            if self.use_code_steps
            else ""
        )
-        
+
        counter = 0
-        improve_code = ""
        success = False
        debug_context = []
-        
+
        while not success and counter < max_retry:
            context = self.get_useful_memories()
            
-            if counter > 0:
-                improve_code = await DebugCode().run(plan=self.plan.current_task.instruction,
+            # print("*" * 10)
+            # print(context)
+            # print("*" * 10)
+            # breakpoint()
+            if counter > 0 and self.use_tools:
+                code = await DebugCode().run(
+                    plan=self.plan.current_task.instruction,
                                                     code=code,
                                                     runtime_result=self.working_memory.get(),
-                                                     context=debug_context)
-            
-            if improve_code != "":
-                code = improve_code
-                logger.info(f"new code \n{improve_code}")
+                    context=debug_context
+                )
+                logger.info(f"new code \n{code}")
                cause_by = DebugCode
            elif not self.use_tools or self.plan.current_task.task_type == "other":
                logger.info("Write code with pure generation")
                code = await WriteCodeByGenerate().run(
-                    context=context, plan=self.plan, code_steps=code_steps, temperature=0.0
+                    context=context, plan=self.plan, temperature=0.0
                )
                debug_context = [self.get_useful_memories(task_exclude_field={'result', 'code_steps'})[0]]
                cause_by = WriteCodeByGenerate
@ -192,47 +181,46 @@ class MLEngineer(Role):
                tool_context, code = await WriteCodeWithTools(schema_path=schema_path).run(
                    context=context,
                    plan=self.plan,
-                    code_steps=code_steps,
                    column_info=self.data_desc.get("column_info", ""),
                )
                debug_context = tool_context
                cause_by = WriteCodeWithTools
-            
+
            self.working_memory.add(
                Message(content=code, role="assistant", cause_by=cause_by)
            )
-            
-            # debug on code, run on runcode with finished code and new_df
-            # runcode = code_context + "\n\n" + code
+
            result, success = await self.execute_code.run(code)
-            # truncated the result
-            print(truncate(result))
-            
+            print(result)
            self.working_memory.add(
-                Message(content=truncate(remove_escape_and_color_codes(result)), role="user", cause_by=ExecutePyCode)
+                Message(content=result, role="user", cause_by=ExecutePyCode)
            )
-            
+
            if "!pip" in code:
                success = False
-            # if not success:
-            #     await self._ask_review()
-            
+
            counter += 1
-        
-        return code, result, success, code_steps
-    
-    async def _ask_review(self):
-        if not self.auto_run:
+
+            if not success and counter >= max_retry:
+                logger.info("coding failed!")
+                review, _ = await self._ask_review(auto_run=False, trigger=ReviewConst.CODE_REVIEW_TRIGGER)
+                if ReviewConst.CHANGE_WORD[0] in review:
+                    counter = 0  # redo the task again with help of human suggestions
+
+        return code, result, success
+
+    async def _ask_review(self, auto_run: bool = None, trigger: str = ReviewConst.TASK_REVIEW_TRIGGER):
+        auto_run = auto_run or self.auto_run
+        if not auto_run:
            context = self.get_useful_memories()
-            review, confirmed = await AskReview().run(context=context[-5:], plan=self.plan)
+            review, confirmed = await AskReview().run(context=context[-5:], plan=self.plan, trigger=trigger)
            if not confirmed:
                self.working_memory.add(Message(content=review, role="user", cause_by=AskReview))
-            return confirmed
-        return True
-    
-    async def _update_plan(self, max_tasks: int = 3):
+            return review, confirmed
+        return "", True
+
+    async def _update_plan(self, review: str = "", max_tasks: int = 3, max_retries: int = 3):
        plan_confirmed = False
-        
        while not plan_confirmed:
            context = self.get_useful_memories()
            rsp = await WritePlan().run(
@ -241,43 +229,57 @@ class MLEngineer(Role):
            self.working_memory.add(
                Message(content=rsp, role="assistant", cause_by=WritePlan)
            )
-            plan_confirmed = await self._ask_review()
-        
-        new_tasks = WritePlan.rsp_to_tasks(rsp)
-        logger.debug(len(self.plan.tasks))
-        logger.debug(len(new_tasks))
-        ## fixme: 能重复执行多轮重新plan，但应该有更优处理逻辑
-        ## fixme: do not overwrite original tasks
-        tasks = self.plan.tasks + new_tasks
-        
-        self.plan.add_tasks(tasks)
+
+            # precheck plan before asking reviews
+            is_plan_valid, error = precheck_update_plan_from_rsp(rsp, self.plan)
+            if not is_plan_valid and max_retries > 0:
+                error_msg = f"The generated plan is not valid with error: {error}, try regenerating, remember to generate either the whole plan or the single changed task only"
+                logger.warning(error_msg)
+                self.working_memory.add(Message(content=error_msg, role="assistant", cause_by=WritePlan))
+                max_retries -= 1
+                continue
+
+            _, plan_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
+
+        update_plan_from_rsp(rsp, self.plan)
+
        self.working_memory.clear()
    
-    def get_useful_memories(self, task_exclude_field: set = None) -> List[Message]:
+    async def _reflect(self):
+        context = self.get_memories()
+        context = "\n".join([str(msg) for msg in context])
+        # print("*" * 10)
+        # print(context)
+        # print("*" * 10)
+        reflection = await Reflect().run(context=context)
+        self.working_memory.add(Message(content=reflection, role="assistant"))
+        self.working_memory.add(Message(content=Reflect.REWRITE_PLAN_INSTRUCTION, role="user"))
+
+    def get_useful_memories(self, task_exclude_field=None) -> List[Message]:
        """find useful memories only to reduce context length and improve performance"""
        # TODO dataset description , code steps
+        if task_exclude_field is None:
+            # Shorten the context as we don't need code steps after we get the codes.
+            # This doesn't affect current_task below, which should hold the code steps
+            task_exclude_field = {'code_steps'}
        user_requirement = self.plan.goal
-        tasks = json.dumps(
-            [task.dict(exclude=task_exclude_field) for task in self.plan.tasks], indent=4, ensure_ascii=False
-        )
+        data_desc = self.plan.context
+        tasks = [task.dict(exclude=task_exclude_field) for task in self.plan.tasks]
+        tasks = json.dumps(tasks, indent=4, ensure_ascii=False)
        current_task = self.plan.current_task.json() if self.plan.current_task else {}
        context = STRUCTURAL_CONTEXT.format(
-            user_requirement=user_requirement,
-            data_desc=self.data_desc,
-            tasks=tasks,
-            current_task=current_task
+            user_requirement=user_requirement, data_desc=data_desc, tasks=tasks, current_task=current_task
        )
        context_msg = [Message(content=context, role="user")]
+
+        return context_msg + self.get_working_memories()
        
-        return context_msg + self.working_memory.get()
-    
-    @property
-    def working_memory(self):
-        return self._rc.memory
+    def get_working_memories(self) -> List[Message]:
+        return self.working_memory.get()


 if __name__ == "__main__":
-    # requirement = "Run data analysis on sklearn Iris dataset, include a plot"
+    requirement = "Run data analysis on sklearn Iris dataset, include a plot"
    # requirement = "Run data analysis on sklearn Diabetes dataset, include a plot"
    # requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy"
    # requirement = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy"
--- a/metagpt/schema.py
+++ b/metagpt/schema.py
@ -78,14 +78,15 @@ class Task(BaseModel):
    dependent_task_ids: list[str] = [] # Tasks prerequisite to this Task
    instruction: str = ""
    task_type: str = ""
+    code_steps: str = ""
    code: str = ""
    result: str = ""
    is_finished: bool = False
-    code_steps: str = ""


 class Plan(BaseModel):
    goal: str
+    context: str = ""
    tasks: list[Task] = []
    task_map: dict[str, Task] = {}
    current_task_id = ""
@ -149,14 +150,81 @@ class Plan(BaseModel):
            self.tasks = final_tasks
        
        # Update current_task_id to the first unfinished task in the merged list
-        for task in self.tasks:
-            if not task.is_finished:
-                self.current_task_id = task.task_id
-                break
+        self._update_current_task()

        # Update the task map for quick access to tasks by ID
        self.task_map = {task.task_id: task for task in self.tasks}
+    
+    def reset_task(self, task_id: str):
+        """
+        Clear code and result of the task based on task_id, and set the task as unfinished.

+        Args:
+            task_id (str): The ID of the task to be reset.
+
+        Returns:
+            None
+        """
+        if task_id in self.task_map:
+            task = self.task_map[task_id]
+            task.code = ""
+            task.result = ""
+            task.is_finished = False
+
+    def replace_task(self, new_task: Task):
+        """
+        Replace an existing task with the new input task based on task_id, and reset all tasks depending on it.
+
+        Args:
+            new_task (Task): The new task that will replace an existing one.
+
+        Returns:
+            None
+        """
+        if new_task.task_id in self.task_map:
+            # Replace the task in the task map and the task list
+            self.task_map[new_task.task_id] = new_task
+            for i, task in enumerate(self.tasks):
+                if task.task_id == new_task.task_id:
+                    self.tasks[i] = new_task
+                    break
+
+            # Reset dependent tasks
+            for task in self.tasks:
+                if new_task.task_id in task.dependent_task_ids:
+                    self.reset_task(task.task_id)
+
+    def append_task(self, new_task: Task):
+        """
+        Append a new task to the end of existing task sequences
+
+        Args:
+            new_task (Task): The new task to be appended to the existing task sequence
+        
+        Returns:
+            None
+        """
+        assert not self.has_task_id(new_task.task_id), "Task already in current plan, use replace_task instead"
+
+        assert all([self.has_task_id(dep_id) for dep_id in new_task.dependent_task_ids]), \
+            "New task has unknown dependencies"
+
+        # Existing tasks do not depend on the new task, it's fine to put it to the end of the sorted task sequence
+        self.tasks.append(new_task)
+        self.task_map[new_task.task_id] = new_task
+        self._update_current_task()
+
+    def has_task_id(self, task_id: str) -> bool:
+        return task_id in self.task_map
+
+    def _update_current_task(self):
+        current_task_id = ""
+        for task in self.tasks:
+            if not task.is_finished:
+                current_task_id = task.task_id
+                break
+        self.current_task_id = current_task_id  # all tasks finished
+    
    @property
    def current_task(self) -> Task:
        """Find current task to execute
@ -170,10 +238,8 @@ class Plan(BaseModel):
        """Finish current task, set Task.is_finished=True, set current task to next task
        """
        if self.current_task_id:
-            current_task = self.current_task
-            current_task.is_finished = True
-            next_task_index = self.tasks.index(current_task) + 1
-            self.current_task_id = self.tasks[next_task_index].task_id if next_task_index < len(self.tasks) else None
+            self.current_task.is_finished = True
+            self._update_current_task()  # set to next task

    def get_finished_tasks(self) -> list[Task]:
        """return all finished tasks in correct linearized order
--- a/metagpt/tools/functions/init.py
+++ b/metagpt/tools/functions/init.py
@ -4,6 +4,3 @@
 # @Author  : lidanyang
 # @File    : __init__.py
 # @Desc    :
-from metagpt.tools.functions.register.register import registry
-import metagpt.tools.functions.libs.feature_engineering
-import metagpt.tools.functions.libs.data_preprocess
--- a/metagpt/tools/functions/libs/data_preprocess.py
+++ b/metagpt/tools/functions/libs/data_preprocess.py
@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import LabelEncoder
 from sklearn.preprocessing import MaxAbsScaler
@ -9,7 +10,6 @@ from sklearn.preprocessing import RobustScaler
 from sklearn.preprocessing import StandardScaler

 from metagpt.tools.functions.libs.base import MLProcess
-from metagpt.tools.functions.schemas.data_preprocess import *


 class FillMissingValue(MLProcess):
@ -141,7 +141,10 @@ def get_column_info(df: pd.DataFrame) -> dict:
    for i in df.columns:
        nan_freq = float("%.2g" % (df[i].isna().mean() * 100))
        n_unique = df[i].nunique()
-        data.append([i, df[i].dtype, nan_freq, n_unique])
+        data_type = str(df[i].dtype).replace("dtype('", "").replace("')", "")
+        if data_type == "O":
+            data_type = "object"
+        data.append([i, data_type, nan_freq, n_unique])

    samples = pd.DataFrame(
        data,
--- a/metagpt/tools/functions/libs/feature_engineering.py
+++ b/metagpt/tools/functions/libs/feature_engineering.py
@ -7,6 +7,7 @@
 import itertools

 import numpy as np
+import pandas as pd
 from dateutil.relativedelta import relativedelta
 from joblib import Parallel, delayed
 from pandas.api.types import is_numeric_dtype
@ -15,7 +16,6 @@ from sklearn.model_selection import KFold
 from sklearn.preprocessing import PolynomialFeatures, KBinsDiscretizer

 from metagpt.tools.functions.libs.base import MLProcess
-from metagpt.tools.functions.schemas.feature_engineering import *


 class PolynomialExpansion(MLProcess):
--- a/metagpt/tools/functions/libs/ml_model.py
+++ b/metagpt/tools/functions/libs/ml_model.py
@ -1,196 +0,0 @@
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-
-from sklearn.linear_model import LogisticRegression
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.ensemble import GradientBoostingClassifier
-
-
-from sklearn.linear_model import LinearRegression
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.ensemble import GradientBoostingRegressor
-
-from metagpt.tools.functions import registry
-from metagpt.tools.functions.schemas.ml_model import *
-
-
-#########
-## 分类 ##
-#########
-
-
-@registry.register("classification_model", LogisticRegressionClassification)
-def logistic_regression_classification(df, label, test_size=0.2, penalty='l2', dual=False):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-
-    model = LogisticRegression(penalty=penalty, dual=dual)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict_proba(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-@registry.register("classification_model", RandomForestClassification)
-def random_forest_classification(df, label, test_size=0.2, n_estimators=100, criterion='gini'):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-    model = RandomForestClassifier(n_estimators=n_estimators, criterion=criterion)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict_proba(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-@registry.register("classification_model", GradientBoostingClassification)
-def gradient_boosting_classification(df, label, test_size=0.2, n_estimators=100, learning_rate=0.1):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-    model = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=learning_rate)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict_proba(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-
-#########
-## 回归 ##
-#########
-
-
-@registry.register("regression_model", LinearRegressionRegression)
-def linear_regression(df, label, test_size=0.2, ):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-
-    model = LinearRegression()
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-@registry.register("regression_model", RandomForestRegression)
-def random_forest_regression(df, label, test_size=0.2, n_estimators=100, criterion='squared_error'):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-    model = RandomForestRegressor(n_estimators=n_estimators, criterion=criterion)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-@registry.register("regression_model", GradientBoostingRegression)
-def gradient_boosting_regression(df, label, test_size=0.2, n_estimators=100, learning_rate=0.1):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-    model = GradientBoostingRegressor(n_estimators=n_estimators, learning_rate=learning_rate)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-if __name__ == '__main__':
-    def run():
-        from sklearn.datasets import load_iris
-        loader = load_iris(as_frame=True)
-        df = loader['data']
-        df['target'] = loader['target']
-
-        df[df.columns[0]] = df[df.columns[0]].astype(str)
-        df[df.columns[1]] = df[df.columns[1]].astype(int)
-        df['target'] = df['target'].astype(str)
-
-        print(df)
-        print('####'*5)
-        res = logistic_regression_classification(df, 'target', test_size=0.25, penalty='l2', dual=False)
-        print(res['te_pred_prob'])
-
-        print('####'*5)
-        res = random_forest_classification(df, 'target', test_size=0.25, n_estimators=100, criterion='gini')
-        print(res['te_pred_prob'])
-
-        print('####'*5)
-        res = gradient_boosting_classification(df, 'target', test_size=0.25, n_estimators=100, learning_rate=0.1)
-        print(res['te_pred_prob'])
-
-        from sklearn.datasets import make_regression
-        import pandas as pd
-        loader = make_regression()
-        df = pd.DataFrame(loader[0])
-        df['target'] = loader[1]
-
-        df[df.columns[0]] = df[df.columns[0]].astype(str)
-        df[df.columns[1]] = df[df.columns[1]].astype(int)
-        # df['target'] = df['target'].astype(str)
-
-        print(df)
-        print('####' * 5)
-        res = linear_regression(df, 'target', test_size=0.25, )
-        print(res['te_pred_prob'])
-
-        print('####' * 5)
-        res = random_forest_regression(df, 'target', test_size=0.25, n_estimators=100, criterion='squared_error')
-        print(res['te_pred_prob'])
-
-        print('####' * 5)
-        res = gradient_boosting_regression(df, 'target', test_size=0.25, n_estimators=100, learning_rate=0.1)
-        print(res['te_pred_prob'])
-    run()
--- a/metagpt/tools/functions/register/init.py
+++ b/metagpt/tools/functions/register/init.py
@ -1,6 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/16 16:37
-# @Author  : lidanyang
-# @File    : __init__.py
-# @Desc    :
--- a/metagpt/tools/functions/register/register.py
+++ b/metagpt/tools/functions/register/register.py
@ -1,78 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/16 16:38
-# @Author  : lidanyang
-# @File    : register.py
-# @Desc    :
-import inspect
-from typing import Type, Optional, Callable, Dict, Union, List
-
-from metagpt.tools.functions.schemas.base import ToolSchema
-
-
-class FunctionRegistry:
-    def __init__(self):
-        self.functions: Dict[str, Dict[str, Dict]] = {}
-
-    @staticmethod
-    def _check_param_consistency(func_params, schema):
-        param_names = set(func_params.keys())
-        schema_names = set(schema["parameters"]["properties"].keys())
-
-        if param_names != schema_names:
-            raise ValueError("Function parameters do not match schema properties")
-
-    def register(self, module: str, tool_schema: Type[ToolSchema]) -> Callable:
-        def wrapper(func: Callable) -> Callable:
-            module_registry = self.functions.setdefault(module, {})
-
-            if func.__name__ in module_registry:
-                raise ValueError(f"Function {func.__name__} is already registered in {module}")
-
-            func_params = inspect.signature(func).parameters
-
-            schema = tool_schema.schema()
-            schema["name"] = func.__name__
-
-            self._check_param_consistency(func_params, schema)
-
-            module_registry[func.__name__] = {
-                "func": func,
-                "schema": schema,
-            }
-            return func
-
-        return wrapper
-
-    def get(self, module: str, name: str) -> Optional[Union[Callable, Dict]]:
-        """Get function by module and name"""
-        module_registry = self.functions.get(module, {})
-        return module_registry.get(name)
-
-    def get_by_name(self, name: str) -> Optional[Dict]:
-        """Get function by name"""
-        for module_registry in self.functions.values():
-            if name in module_registry:
-                return module_registry.get(name, {})
-
-    def get_all_by_module(self, module: str) -> Optional[Dict]:
-        """Get all functions by module"""
-        return self.functions.get(module, {})
-
-    def get_schema(self, module: str, name: str) -> Optional[Dict]:
-        """Get schema by module and name"""
-        module_registry = self.functions.get(module, {})
-        return module_registry.get(name, {}).get("schema")
-
-    def get_schemas(self, module: str, names: List[str]) -> List[Dict]:
-        """Get schemas by module and names"""
-        module_registry = self.functions.get(module, {})
-        return [module_registry.get(name, {}).get("schema") for name in names]
-
-    def get_all_schema_by_module(self, module: str) -> List[Dict]:
-        """Get all schemas by module"""
-        module_registry = self.functions.get(module, {})
-        return [v.get("schema") for v in module_registry.values()]
-
-
-registry = FunctionRegistry()
--- a/metagpt/tools/functions/schemas/base.py
+++ b/metagpt/tools/functions/schemas/base.py
@ -1,100 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/16 16:34
-# @Author  : lidanyang
-# @File    : base.py
-# @Desc    : Build base class to generate schema for tool
-from typing import Any, List, Optional, get_type_hints
-
-
-class NoDefault:
-    """
-    A class to represent a missing default value.
-
-    This is used to distinguish between a default value of None and a missing default value.
-    """
-    pass
-
-
-def tool_field(
-    description: str, default: Any = NoDefault(), enum: Optional[List[Any]] = None, **kwargs
-):
-    """
-    Create a field for a tool parameter.
-
-    Args:
-        description (str): A description of the field.
-        default (Any, optional): The default value for the field. Defaults to None.
-        enum (Optional[List[Any]], optional): A list of possible values for the field. Defaults to None.
-        **kwargs: Additional keyword arguments.
-
-    Returns:
-        dict: A dictionary representing the field with provided attributes.
-    """
-    field_info = {
-        "description": description,
-        "default": default,
-        "enum": enum,
-    }
-    field_info.update(kwargs)
-    return field_info
-
-
-class ToolSchema:
-    @staticmethod
-    def format_type(type_hint):
-        """
-        Format a type hint into a string representation.
-
-        Args:
-            type_hint (type): The type hint to format.
-
-        Returns:
-            str: A string representation of the type hint.
-        """
-        if isinstance(type_hint, type):
-            # Handle built-in types separately
-            if type_hint.__module__ == "builtins":
-                return type_hint.__name__
-            else:
-                return f"{type_hint.__module__}.{type_hint.__name__}"
-        elif hasattr(type_hint, "__origin__") and hasattr(type_hint, "__args__"):
-            # Handle generic types (like List[int])
-            origin_type = ToolSchema.format_type(type_hint.__origin__)
-            args_type = ", ".join(
-                [ToolSchema.format_type(t) for t in type_hint.__args__]
-            )
-            return f"{origin_type}[{args_type}]"
-        else:
-            return str(type_hint)
-
-    @classmethod
-    def schema(cls):
-        """
-        Generate a schema dictionary for the class.
-
-        The schema includes the class name, description, and information about
-        each class parameter based on type hints and field definitions.
-
-        Returns:
-            dict: A dictionary representing the schema of the class.
-        """
-        schema = {
-            "name": cls.__name__,
-            "description": cls.__doc__,
-            "parameters": {"type": "object", "properties": {}, "required": []},
-        }
-        type_hints = get_type_hints(cls)
-        for attr, type_hint in type_hints.items():
-            value = getattr(cls, attr, None)
-            if isinstance(value, dict):
-                # Process each attribute that is defined using the field function
-                prop_info = {k: v for k, v in value.items() if v is not None or k == "default"}
-                if isinstance(prop_info["default"], NoDefault):
-                    del prop_info["default"]
-                prop_info["type"] = ToolSchema.format_type(type_hint)
-                schema["parameters"]["properties"][attr] = prop_info
-                # Check for required fields
-                if "default" not in prop_info:
-                    schema["parameters"]["required"].append(attr)
-        return schema
--- a/metagpt/tools/functions/schemas/data_preprocess.py
+++ b/metagpt/tools/functions/schemas/data_preprocess.py
@ -1,67 +0,0 @@
-
-import pandas as pd
-
-from metagpt.tools.functions.schemas.base import tool_field, ToolSchema
-
-
-class FillMissingValue(ToolSchema):
-    """Completing missing values with simple strategies"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-    strategy: str = tool_field(
-        description="the imputation strategy",
-        default='mean',
-        enum=['mean', 'median', 'most_frequent', 'constant']
-    )
-    fill_value: int = tool_field(
-        description="fill_value is used to replace all occurrences of missing_values", default=None)
-
-
-class SplitBins(ToolSchema):
-    """Bin continuous data into intervals and return the bin identifier encoded as an integer value"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-    strategy: str = tool_field(description="Strategy used to define the widths of the bins", default='quantile')
-
-
-class MinMaxScale(ToolSchema):
-    """Transform features by scaling each feature to a range, witch is (0, 1)"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class StandardScale(ToolSchema):
-    """Standardize features by removing the mean and scaling to unit variance"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class LogTransform(ToolSchema):
-    """Performs a logarithmic transformation on the specified columns"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class MaxAbsScale(ToolSchema):
-    """Scale each feature by its maximum absolute value"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class RobustScale(ToolSchema):
-    """Scale features using statistics that are robust to outliers, the quantile_range is (25.0, 75.0)"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class OrdinalEncode(ToolSchema):
-    """Encode categorical features as an integer array"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class OneHotEncoding(ToolSchema):
-    """Apply one-hot encoding to specified categorical columns, the original columns will be dropped."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    cols: list = tool_field(description="Categorical columns to be one-hot encoded and dropped.")
--- a/metagpt/tools/functions/schemas/feature_engineering.py
+++ b/metagpt/tools/functions/schemas/feature_engineering.py
@ -1,110 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/17 10:34
-# @Author  : lidanyang
-# @File    : feature_engineering.py
-# @Desc    : Schema for feature engineering functions
-from typing import List
-
-import pandas as pd
-
-from metagpt.tools.functions.schemas.base import ToolSchema, tool_field
-
-
-class PolynomialExpansion(ToolSchema):
-    """Add polynomial and interaction features from selected numeric columns, excluding the bias column."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    cols: list = tool_field(description="Columns for polynomial expansion.")
-    degree: int = tool_field(description="Degree of polynomial features.", default=2)
-
-
-class FrequencyEncoding(ToolSchema):
-    """Add value counts of categorical columns as new features."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    cols: list = tool_field(description="Categorical columns to be frequency encoded.")
-
-
-class TargetMeanEncoder(ToolSchema):
-    """Encodes a categorical column by the mean of the label column, and adds the result as a new feature."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    col: str = tool_field(description="Column to be mean encoded.")
-    label: str = tool_field(description="Predicted label column.")
-
-
-class KFoldTargetMeanEncoder(ToolSchema):
-    """Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column."""
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    col: str = tool_field(description="Column to be k-fold mean encoded.")
-    label: str = tool_field(description="Predicted label column.")
-    n_splits: int = tool_field(description="Number of splits for K-fold.", default=5)
-    random_state: int = tool_field(description="Random seed.", default=2021)
-
-
-class CatCross(ToolSchema):
-    """Add pairwise crossed features and convert them to numerical features."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    cols: list = tool_field(description="Columns to be pairwise crossed.")
-    max_cat_num: int = tool_field(
-        description="Maximum unique categories per crossed feature.", default=100
-    )
-
-
-class GroupStat(ToolSchema):
-    """Aggregate specified column in a DataFrame grouped by another column, adding new features named '<agg_col>_<agg_func>_by_<group_col>'."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    group_col: str = tool_field(description="Column used for grouping.")
-    agg_col: str = tool_field(description="Column on which aggregation is performed.")
-    agg_funcs: list = tool_field(
-        description="""List of aggregation functions to apply, such as ['mean', 'std'].
-                    Each function must be supported by pandas."""
-    )
-
-
-class ExtractTimeComps(ToolSchema):
-    """Extract and add specific time components as new features from a designated time column."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    time_col: str = tool_field(
-        description="The name of the column containing time data."
-    )
-    time_comps: List[str] = tool_field(
-        description="""List of time components to extract.
-        Each component must be in ['year', 'month', 'day', 'hour', 'dayofweek', 'is_weekend']."""
-    )
-
-
-class FeShiftByTime(ToolSchema):
-    """Shift column values based on specified time intervals and add the resulting new features to the DataFrame. New features are named in the format of '<group_col>_<shift_col>_lag_<period>_<freq>'."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    time_col: str = tool_field(description="Column for time-based shifting.")
-    group_col: str = tool_field(description="Column for grouping before shifting.")
-    shift_col: str = tool_field(description="Column to shift.")
-    periods: list = tool_field(description="Time intervals for shifting.")
-    freq: str = tool_field(
-        description="Frequency unit for time intervals (e.g., 'D', 'M').",
-        enum=["D", "M", "Y", "W", "H"],
-    )
-
-
-class FeRollingByTime(ToolSchema):
-    """Calculate rolling statistics for a DataFrame column over time intervals."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    time_col: str = tool_field(description="Column for time-based rolling.")
-    group_col: str = tool_field(description="Column for grouping before rolling.")
-    rolling_col: str = tool_field(description="Column for rolling calculations.")
-    periods: list = tool_field(description="Window sizes for rolling.")
-    freq: str = tool_field(
-        description="Frequency unit for time windows (e.g., 'D', 'M').",
-        enum=["D", "M", "Y", "W", "H"],
-    )
-    agg_funcs: list = tool_field(
-        description="""List of aggregation functions for rolling, like ['mean', 'std'].
-        Each function must be in ['mean', 'std', 'min', 'max', 'median', 'sum', 'count']."""
-    )
--- a/metagpt/tools/functions/schemas/feature_engineering.yml
+++ b/metagpt/tools/functions/schemas/feature_engineering.yml
@ -53,17 +53,17 @@ PolynomialExpansion:

 CatCount:
  type: class
-  description: "Add value counts of categorical columns as new features."
+  description: "Add value counts of a categorical column as new feature."
  methods:
    __init__:
      description: "Initialize self."
      parameters:
        properties:
-          cols:
-            type: list
-            description: "Columns for value counts."
+          col:
+            type: str
+            description: "Column for value counts."
        required:
-          - cols
+          - col
    fit:
      description: "Fit the CatCount model."
      parameters:
--- a/metagpt/tools/functions/schemas/ml_model.py
+++ b/metagpt/tools/functions/schemas/ml_model.py
@ -1,55 +0,0 @@
-import pandas as pd
-
-from metagpt.tools.functions.schemas.base import tool_field, ToolSchema
-
-
-class LogisticRegressionClassification(ToolSchema):
-    """Logistic Regression (aka logit, MaxEnt) classifier"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    penalty: str = tool_field(description="Specify the norm of the penalty", default="l2")
-    dual: bool = tool_field(description="Dual (constrained) or primal (regularized) formulation", default="l2")
-
-
-class RandomForestClassification(ToolSchema):
-    """random forest is a meta estimator that fits a number of decision tree classifiers on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    n_estimators: int = tool_field(description="The number of trees in the forest", default=100)
-    criterion: str = tool_field(description="The function to measure the quality of a split", default="gini")
-
-
-class GradientBoostingClassification(ToolSchema):
-    """Gradient Boosting for classification.This algorithm builds an additive model in a forward stage-wise fashion"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    n_estimators: int = tool_field(description="The number of boosting stages to perform", default=100)
-    learning_rate: float = tool_field(description="Learning rate shrinks the contribution of each tree by learning_rate", default=0.1)
-
-
-class LinearRegressionRegression(ToolSchema):
-    """Ordinary least squares Linear Regression."""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-
-
-class RandomForestRegression(ToolSchema):
-    """random forest is a meta estimator that fits a number of decision tree on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    n_estimators: int = tool_field(description="The number of trees in the forest", default=100)
-    criterion: str = tool_field(description="The function to measure the quality of a split", default="squared_error")
-
-
-class GradientBoostingRegression(ToolSchema):
-    """Gradient Boosting for regression.This estimator builds an additive model in a forward stage-wise fashion"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    n_estimators: int = tool_field(description="The number of boosting stages to perform", default=100)
-    learning_rate: float = tool_field(description="Learning rate shrinks the contribution of each tree by learning_rate", default=0.1)
--- a/metagpt/tools/web_browser_engine.py
+++ b/metagpt/tools/web_browser_engine.py
@ -7,7 +7,7 @@ from typing import Any, Callable, Coroutine, Literal, overload

 from metagpt.config import CONFIG
 from metagpt.tools import WebBrowserEngineType
-# from metagpt.utils.parse_html import WebPage
+from metagpt.utils.parse_html import WebPage


 class WebBrowserEngine:
--- a/metagpt/utils/init.py
+++ b/metagpt/utils/init.py
@ -6,7 +6,7 @@
@File    : __init__.py
 """

-# from metagpt.utils.read_document import read_docx
+from metagpt.utils.read_document import read_docx
 from metagpt.utils.singleton import Singleton
 from metagpt.utils.token_counter import (
    TOKEN_COSTS,
@ -16,7 +16,7 @@ from metagpt.utils.token_counter import (


 __all__ = [
-    # "read_docx",
+    "read_docx",
    "Singleton",
    "TOKEN_COSTS",
    "count_message_tokens",
--- a/metagpt/utils/save_code.py
+++ b/metagpt/utils/save_code.py
@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+# @Date    : 12/12/2023 4:14 PM
+# @Author  : stellahong (stellahong@fuzhi.ai)
+# @Desc    :
+import os
+import json
+
+import nbformat
+
+from metagpt.const import DATA_PATH
+
+def save_code_file(name: str, code_context: str, file_format: str = "py") -> None:
+    """
+    Save code files to a specified path.
+
+    Args:
+    - name (str): The name of the folder to save the files.
+    - code_context (str): The code content.
+    - file_format (str, optional): The file format. Supports 'py' (Python file), 'json' (JSON file), and 'ipynb' (Jupyter Notebook file). Default is 'py'.
+
+
+    Returns:
+    - None
+    """
+    # Create the folder path if it doesn't exist
+    os.makedirs(name=DATA_PATH / "output" / f"{name}", exist_ok=True)
+
+    # Choose to save as a Python file or a JSON file based on the file format
+    file_path = DATA_PATH / "output" / f"{name}/code.{file_format}"
+    if file_format == "py":
+        with open(file_path, "w", encoding="utf-8") as fp:
+            fp.write(code_context + "\n\n")
+    elif file_format == "json":
+        # Parse the code content as JSON and save
+        data = {"code": code_context}
+        with open(file_path, "w", encoding="utf-8") as fp:
+            json.dump(data, fp, indent=2)
+    elif file_format == "ipynb":
+         nbformat.write(code_context, file_path)
+    else:
+        raise ValueError("Unsupported file format. Please choose 'py', 'json', or 'ipynb'.")
+
+
+
+
--- a/requirements.txt
+++ b/requirements.txt
@ -45,6 +45,7 @@ wrapt==1.15.0
 websocket-client==0.58.0
 zhipuai==1.0.7
 rich==13.6.0
+nbclient==0.9.0
 nbformat==5.9.2
 ipython==8.17.2
 ipykernel==6.27.0
--- a/tests/metagpt/actions/test_execute_code.py
+++ b/tests/metagpt/actions/test_execute_code.py
@ -1,6 +1,6 @@
 import pytest

-from metagpt.actions.execute_code import ExecutePyCode
+from metagpt.actions.execute_code import ExecutePyCode, truncate
 from metagpt.schema import Message


@ -81,3 +81,10 @@ async def test_plotting_bug():
    pi = ExecutePyCode()
    output = await pi.run(code)
    assert output[1] is True
+
+
+def test_truncate():
+    output = "hello world"
+    assert truncate(output) == output
+    output = "hello world"
+    assert truncate(output, 5) == "Truncated to show only the last 5 characters\nworld"
--- a/tests/metagpt/actions/test_write_analysis_code.py
+++ b/tests/metagpt/actions/test_write_analysis_code.py
@ -31,22 +31,15 @@ async def test_tool_recommendation():
    step 1: 对数据集进行去重
    step 2: 对数据集进行缺失值处理
    """
-    available_tools = [
-        {
-            "name": "fill_missing_value",
-            "description": "Completing missing values with simple strategies",
-        },
-        {
-            "name": "split_bins",
-            "description": "Bin continuous data into intervals and return the bin identifier encoded as an integer value",
-        },
-    ]
+    available_tools = {
+        "fill_missing_value": "Completing missing values with simple strategies",
+        "split_bins": "Bin continuous data into intervals and return the bin identifier encoded as an integer value",
+    }
    write_code = WriteCodeWithTools()
    tools = await write_code._tool_recommendation(task, code_steps, available_tools)

-    assert len(tools) == 2
-    assert tools[0] == []
-    assert tools[1] == ["fill_missing_value"]
+    assert len(tools) == 1
+    assert tools[0] == ["fill_missing_value"]


@pytest.mark.asyncio
@ -57,7 +50,7 @@ async def test_write_code_with_tools():
        "1": Task(
                task_id="1",
                instruction="随机生成一个pandas DataFrame数据集",
-                task_type="unknown",
+                task_type="other",
                dependent_task_ids=[],
                code="""
                import pandas as pd
@ -75,6 +68,10 @@ async def test_write_code_with_tools():
                instruction="对数据集进行数据清洗",
                task_type="data_preprocess",
                dependent_task_ids=["1"],
+                code_steps="""
+                {"Step 1": "对数据集进行去重",
+                "Step 2": "对数据集进行缺失值处理"}
+                """
            ),
    }
    plan = Plan(
@ -83,13 +80,9 @@ async def test_write_code_with_tools():
        task_map=task_map,
        current_task_id="2",
    )
-    task_guide = """
-    step 1: 对数据集进行去重
-    step 2: 对数据集进行缺失值处理
-    """
-    data_desc = "None"
+    column_info = ""

-    code = await write_code.run(messages, plan, task_guide, data_desc)
+    code = await write_code.run(messages, plan, column_info)
    assert len(code) > 0
    print(code)

--- a/tests/metagpt/actions/test_write_plan.py
+++ b/tests/metagpt/actions/test_write_plan.py
@ -1,13 +1,15 @@
 import pytest

-from metagpt.actions.write_plan import WritePlan
+from metagpt.actions.write_plan import WritePlan, precheck_update_plan_from_rsp, Plan, Task

+def test_precheck_update_plan_from_rsp():
+    plan = Plan(goal="")
+    plan.add_tasks([Task(task_id="1")])
+    rsp = '[{"task_id": "2"}]'
+    success, _ = precheck_update_plan_from_rsp(rsp, plan)
+    assert success
+    assert len(plan.tasks) == 1 and plan.tasks[0].task_id == "1"  # precheck should not change the original one

-@pytest.mark.asyncio
-async def test_plan():
-    p = WritePlan()
-    task_desc = """Here’s some background information on Cyclistic, a bike-sharing company designing a marketing strategy aimed at converting casual riders into annual members: So far, Cyclistic’s marketing strategy has relied on building general awareness and engaging a wide range of consumers. group. One way to help achieve these goals is the flexibility of its pricing plans: one-way passes, full-day passes, and annual memberships. Customers who purchase a one-way or full-day pass are known as recreational riders. Customers purchasing an annual membership are Cyclistic members. I will provide you with a data sheet that records user behavior: '/Users/vicis/Downloads/202103-divvy-tripdata.csv"""
-    rsp = await p.run(task_desc, role="data analyst")
-    assert len(rsp.content) > 0
-    assert rsp.sent_from == "WritePlan"
-    print(rsp)
+    invalid_rsp = 'wrong'
+    success, _ = precheck_update_plan_from_rsp(invalid_rsp, plan)
+    assert not success
--- a/tests/metagpt/test_schema.py
+++ b/tests/metagpt/test_schema.py
@ -5,6 +5,7 @@
@Author  : alexanderwu
@File    : test_schema.py
 """
+import pytest
 from metagpt.schema import AIMessage, Message, SystemMessage, UserMessage
 from metagpt.schema import Task, Plan

@ -104,3 +105,82 @@ class TestPlan:
        finished_tasks = plan.get_finished_tasks()
        assert len(finished_tasks) == 1
        assert finished_tasks[0].task_id == "1"
+
+    def test_reset_task_existing(self):
+        plan = Plan(goal="")
+        task = Task(task_id="1", instruction="Do something", code="print('Hello')", result="Hello", finished=True)
+        plan.add_tasks([task])
+        plan.reset_task("1")
+        reset_task = plan.task_map["1"]
+        assert reset_task.code == ""
+        assert reset_task.result == ""
+        assert not reset_task.is_finished
+
+    def test_reset_task_non_existing(self):
+        plan = Plan(goal="")
+        task = Task(task_id="1", instruction="Do something", code="print('Hello')", result="Hello", finished=True)
+        plan.add_tasks([task])
+        plan.reset_task("2")  # Task with ID 2 does not exist
+        assert "1" in plan.task_map
+        assert "2" not in plan.task_map
+
+    def test_replace_task_with_dependents(self):
+        plan = Plan(goal="")
+        tasks = [Task(task_id="1", instruction="First Task", finished=True),
+                 Task(task_id="2", instruction="Second Task", dependent_task_ids=["1"], finished=True)]
+        plan.add_tasks(tasks)
+        new_task = Task(task_id="1", instruction="Updated First Task")
+        plan.replace_task(new_task)
+        assert plan.task_map["1"].instruction == "Updated First Task"
+        assert not plan.task_map["2"].is_finished  # Dependent task should be reset
+        assert plan.task_map["2"].code == ""
+        assert plan.task_map["2"].result == ""
+
+    def test_replace_task_non_existing(self):
+        plan = Plan(goal="")
+        task = Task(task_id="1", instruction="First Task")
+        plan.add_tasks([task])
+        new_task = Task(task_id="2", instruction="New Task")
+        plan.replace_task(new_task)  # Task with ID 2 does not exist in plan
+        assert "1" in plan.task_map
+        assert "2" not in plan.task_map
+    
+    def test_append_task_with_valid_dependencies(self):
+        plan = Plan(goal="Test")
+        existing_task = [Task(task_id="1")]
+        plan.add_tasks(existing_task)
+        new_task = Task(task_id="2", dependent_task_ids=["1"])
+        plan.append_task(new_task)
+        assert plan.tasks[-1].task_id == "2"
+        assert plan.task_map["2"] == new_task
+
+    def test_append_task_with_invalid_dependencies(self):
+        new_task = Task(task_id="2", dependent_task_ids=["3"])
+        plan = Plan(goal="Test")
+        with pytest.raises(AssertionError):
+            plan.append_task(new_task)
+    
+    def test_append_task_without_dependencies(self):
+        plan = Plan(goal="Test")
+        existing_task = [Task(task_id="1")]
+        plan.add_tasks(existing_task)
+
+        new_task = Task(task_id="2")
+        plan.append_task(new_task)
+
+        assert len(plan.tasks) == 2
+        assert plan.current_task_id == "1"
+
+    def test_append_task_updates_current_task(self):
+        finished_task = Task(task_id="1", is_finished=True)
+        new_task = Task(task_id="2")
+        plan = Plan(goal="Test", tasks=[finished_task])
+        plan.append_task(new_task)
+        assert plan.current_task_id == "2"
+
+    def test_update_current_task(self):
+        task1 = Task(task_id="1", is_finished=True)
+        task2 = Task(task_id="2")
+        plan = Plan(goal="Test", tasks=[task1, task2])
+        plan._update_current_task()
+        assert plan.current_task_id == "2"
--- a/tests/metagpt/utils/test_save_code.py
+++ b/tests/metagpt/utils/test_save_code.py
@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+# @Date    : 12/12/2023 4:17 PM
+# @Author  : stellahong (stellahong@fuzhi.ai)
+# @Desc    :
+import pytest
+import os
+import json
+import nbformat
+
+from metagpt.actions.write_analysis_code import WriteCodeByGenerate
+from metagpt.actions.execute_code import ExecutePyCode
+
+from metagpt.utils.save_code import save_code_file, DATA_PATH
+
+
+def test_save_code_file_python():
+    save_code_file("example", "print('Hello, World!')")
+    file_path = DATA_PATH / "output" / "example" / "code.py"
+    assert os.path.exists(file_path), f"File does not exist: {file_path}"
+
+
+def test_save_code_file_python():
+    save_code_file("example", "print('Hello, World!')")
+    file_path = DATA_PATH / "output" / "example" / "code.py"
+    with open(file_path, "r", encoding="utf-8") as fp:
+        content = fp.read()
+    assert "print('Hello, World!')" in content, "File content does not match"
+
+
+def test_save_code_file_json():
+    save_code_file("example_json", "print('Hello, JSON!')", file_format="json")
+    file_path = DATA_PATH / "output" / "example_json" / "code.json"
+    with open(file_path, "r", encoding="utf-8") as fp:
+        data = json.load(fp)
+    assert "code" in data, "JSON key 'code' is missing"
+    assert data["code"] == "print('Hello, JSON!')", "JSON content does not match"
+
+
+
+@pytest.mark.asyncio
+async def test_save_code_file_notebook():
+    code = await WriteCodeByGenerate().run(
+        context="basic python, hello world", plan="", code_steps="", temperature=0.0
+    )
+    executor = ExecutePyCode()
+    await executor.run(code)
+    # Save as a Notebook file
+    save_code_file("example_nb", executor.nb, file_format="ipynb")
+    file_path = DATA_PATH / "output" / "example_nb" / "code.ipynb"
+    assert os.path.exists(file_path), f"Notebook file does not exist: {file_path}"
+
+    # Additional checks specific to notebook format
+    notebook = nbformat.read(file_path, as_version=4)
+    assert len(notebook.cells) > 0, "Notebook should have at least one cell"
+    first_cell_source = notebook.cells[0].source
+    assert "print('Hello, World!')" in first_cell_source, "Notebook cell content does not match"