From 8b3d640dd60b3accce7845744f24522a8ec1bd22 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Fri, 1 Dec 2023 00:44:47 +0800
Subject: [PATCH 01/34] add kaggle manager

---
 kaggle_team.py                  |  37 +++++++++
 metagpt/roles/kaggle_manager.py | 129 ++++++++++++++++++++++++++++++++
 metagpt/schema.py               |   1 +
 3 files changed, 167 insertions(+)
 create mode 100644 kaggle_team.py
 create mode 100644 metagpt/roles/kaggle_manager.py

diff --git a/kaggle_team.py b/kaggle_team.py
new file mode 100644
index 000000000..0743d445b
--- /dev/null
+++ b/kaggle_team.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import asyncio
+
+import fire
+
+from metagpt.roles.kaggle_manager import KaggleManager
+from metagpt.roles.ml_engineer import MLEngineer
+from metagpt.team import Team
+
+async def main(
+    # competition: str,
+    # data_desc: str,
+    # requirement: str,
+    investment: float = 3.0,
+    n_round: int = 5,
+):
+    competition, data_desc, requirement = (
+        "titanic",
+        "Training set is train.csv.\nTest set is test.csv. We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.",
+        "Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
+    )
+
+    team = Team()
+    team.hire(
+        [
+            KaggleManager(competition=competition, data_desc=data_desc),
+            MLEngineer(goal=requirement),
+        ]
+    )
+
+    team.invest(investment)
+    team.start_project(requirement)
+    await team.run(n_round=n_round)
+
+if __name__ == '__main__':
+    fire.Fire(main)
diff --git a/metagpt/roles/kaggle_manager.py b/metagpt/roles/kaggle_manager.py
new file mode 100644
index 000000000..e902d99a0
--- /dev/null
+++ b/metagpt/roles/kaggle_manager.py
@@ -0,0 +1,129 @@
+from typing import Dict, List, Union, Tuple
+import json
+import subprocess
+
+import fire
+import pandas as pd
+
+from metagpt.const import WORKSPACE_ROOT
+from metagpt.roles import Role
+from metagpt.actions import Action, BossRequirement
+from metagpt.actions.write_analysis_code import AskReview, SummarizeAnalysis
+from metagpt.schema import Message, Task, Plan
+from metagpt.logs import logger
+
+import os
+os.environ["KAGGLE_USERNAME"] = "xxx"
+os.environ["KAGGLE_KEY"] = "xxx"
+
+def run_command(cmd):
+    print(cmd)
+    output = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+    if output.returncode != 0:
+        print("Error output:", output.stderr)
+        exit()
+    else:
+        print(output.stdout)
+    return output.stdout
+
+class DownloadData(Action):
+
+    async def run(self, competition, data_desc="") -> str:
+        data_path = WORKSPACE_ROOT / competition
+        
+        output = run_command(f"kaggle competitions list --search {competition}")
+        assert output != "No competitions found", "You must provide the correct competition name"
+        
+        run_command(f"kaggle competitions download {competition} --path {WORKSPACE_ROOT}")
+        
+        # if not os.path.exists(data_path):
+        if True:
+            run_command(f"unzip -o {WORKSPACE_ROOT / '*.zip'} -d {data_path}")  # FIXME: not safe
+        
+        file_list = run_command(f"ls {data_path}")
+
+        rsp = f"""
+        Location:
+        Data downloaded at {data_path} folder, including {file_list}
+        Data Description:
+        {data_desc}
+        """
+        return rsp
+
+class SubmitResult(Action):
+    PROMPT_TEMPLATE = """
+    # Context
+    {context}
+    # Your task
+    Extract the prediction file for test set, return only the path string, e.g., xxx.csv, xxx.xlsx
+    """
+
+    def __init__(self, name: str = "", context=None, llm=None) -> str:
+        super().__init__(name, context, llm)
+
+    async def _parse_submit_file_path(self, context) -> str:
+        prompt = self.PROMPT_TEMPLATE.format(context=context)
+        rsp = await self._aask(prompt)
+        return rsp
+
+    async def run(self, competition, submit_message="") -> str:
+        submit_file_path = self._parse_submit_file_path(submit_message)
+
+        data_path = WORKSPACE_ROOT / competition
+
+        run_command(f"kaggle competitions submit {competition} -f {submit_file_path} -m '{submit_message}'")
+        run_command(f"kaggle competitions leaderboard --show --csv {competition} > {data_path / 'leaderboard.csv'}")
+        run_command(f"kaggle competitions submissions --csv {competition} > {data_path / 'submission.csv'}")
+        
+        leaderboard = pd.read_csv(data_path / 'leaderboard.csv')
+        submission = pd.read_csv(data_path / 'submission.csv')
+        submission_score = submission.loc[0, "publicScore"]
+        submission_rank = leaderboard.loc[leaderboard["score"] == submission_score].index[0]
+        submission_rank_pct = round(submission_rank / len(leaderboard), 4) * 100
+
+        # best_score = max(submission["publicScore"])
+        # best_rank = leaderboard.loc[leaderboard["score"] == best_score].index[0]
+
+        submission_summary = f"""
+        ## All History
+        {submission.to_json(orient="records")}
+        ## Current
+        Current submission score: {submission_score}, rank: {submission_rank} (top {submission_rank_pct}%);
+        """
+        print(submission_summary)
+        return submission_summary
+
+
+class KaggleManager(Role):
+    def __init__(
+        self, name="ABC", profile="KaggleManager", goal="", competition="titanic", data_desc=""
+    ):
+        super().__init__(name=name, profile=profile, goal=goal)
+        self._init_actions([DownloadData, SubmitResult])
+        self._watch([BossRequirement, SummarizeAnalysis])
+        self.competition = competition
+        self.data_desc = data_desc  # currently passed in, later can be scrapped down from web by another Role
+
+    async def _think(self):
+        observed = self.get_memories()[-1].cause_by
+        if observed == BossRequirement:
+            self._set_state(0)  # DownloadData, get competition of interest from human, download datasets
+        elif observed == SummarizeAnalysis:
+            self._set_state(1)  # SubmitResult, get prediction from MLEngineer and submit it to Kaggle
+        elif observed == SubmitResult:
+            self._set_state(2)  # AskReview, ask human for improvement
+
+    async def _act(self):
+        todo = self._rc.todo
+        logger.info(f"{self._setting}: ready to {self._rc.todo}")
+
+        if isinstance(todo, DownloadData):
+            rsp = await todo.run(self.competition, self.data_desc)
+
+        elif isinstance(todo, SubmitResult):
+            submit_message = self.get_memories()[-1].content  # use analysis summary from MLEngineer as submission message
+            rsp = await todo.run(competition=self.competition, submit_message=submit_message)
+
+        msg = Message(content=rsp, role="user", cause_by=type(todo))
+
+        return msg
diff --git a/metagpt/schema.py b/metagpt/schema.py
index e39f54a0c..601bdcea2 100644
--- a/metagpt/schema.py
+++ b/metagpt/schema.py
@@ -85,6 +85,7 @@ class Task(BaseModel):
 
 class Plan(BaseModel):
     goal: str
+    context: str = ""
     tasks: list[Task] = []
     task_map: dict[str, Task] = {}
     current_task_id = ""

From d3d08fe5f33cf65fcf74442d2dd754ffed1c2b7a Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Sat, 2 Dec 2023 01:34:22 +0800
Subject: [PATCH 02/34] more plan operation, review update, add kaggle team

---
 config/config.yaml              |   5 +-
 kaggle_team.py                  |   3 +-
 metagpt/actions/ml_da_action.py | 119 +++++++++++++++++++++++++++++
 metagpt/actions/write_plan.py   |   2 +-
 metagpt/config.py               |   3 +
 metagpt/prompts/ml_engineer.py  |  11 +++
 metagpt/roles/kaggle_manager.py |  65 ++++++++++------
 metagpt/roles/ml_engineer.py    | 129 ++++++++++++++++----------------
 metagpt/schema.py               |  42 +++++++++++
 tests/metagpt/test_schema.py    |  39 ++++++++++
 10 files changed, 330 insertions(+), 88 deletions(-)
 create mode 100644 metagpt/actions/ml_da_action.py

diff --git a/config/config.yaml b/config/config.yaml
index bed67083c..52a8eb036 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -94,4 +94,7 @@ MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k
 ### browser path for pyppeteer engine, support Chrome, Chromium,MS Edge
 #PYPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable"
 
-PROMPT_FORMAT: json #json or markdown
\ No newline at end of file
+PROMPT_FORMAT: json #json or markdown
+
+KAGGLE_USERNAME: ""
+KAGGLE_KEY: ""
\ No newline at end of file
diff --git a/kaggle_team.py b/kaggle_team.py
index 0743d445b..659c4a495 100644
--- a/kaggle_team.py
+++ b/kaggle_team.py
@@ -12,13 +12,14 @@ async def main(
     # competition: str,
     # data_desc: str,
     # requirement: str,
-    investment: float = 3.0,
+    investment: float = 5.0,
     n_round: int = 5,
 ):
     competition, data_desc, requirement = (
         "titanic",
         "Training set is train.csv.\nTest set is test.csv. We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.",
         "Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
+        # "generate a random prediction of the same shape as gender_submission.csv and save",
     )
 
     team = Team()
diff --git a/metagpt/actions/ml_da_action.py b/metagpt/actions/ml_da_action.py
new file mode 100644
index 000000000..9f903fd22
--- /dev/null
+++ b/metagpt/actions/ml_da_action.py
@@ -0,0 +1,119 @@
+import json
+from typing import Dict, List, Union
+
+from metagpt.actions import Action
+from metagpt.schema import Message, Plan
+from metagpt.logs import logger
+
+
+def truncate(result: str, keep_len: int = 1000) -> str:
+    desc = "Truncated to show only the last 1000 characters\n"
+    if result.startswith(desc):
+        result = result[-len(desc) :]
+
+    if len(result) > keep_len:
+        result = result[-keep_len:]
+
+    if not result.startswith(desc):
+        return desc + result
+    return desc
+
+
+class ReviewConst:
+    TASK_REVIEW_TRIGGER = "task"
+    CODE_REVIEW_TRIGGER = "code"
+    CONTINUE_WORD = ["confirm", "continue", "c", "yes", "y"]
+    CHANGE_WORD = ["change"]
+    EXIT_WORD = ["exit"]
+    TASK_REVIEW_INSTRUCTION = (
+        f"If you want to change, add, delete a task or merge tasks in the plan, say '{CHANGE_WORD[0]} task task_id or current task, ... (things to change)' "
+        f"If you confirm the output from the current task and wish to continue, type: {CONTINUE_WORD[0]}"
+    )
+    CODE_REVIEW_INSTRUCTION = (
+        f"If you want the codes to be rewritten, say '{CHANGE_WORD[0]} ... (your change advice)' "
+        f"If you want to leave it as is, type: {CONTINUE_WORD[0]} or {CONTINUE_WORD[1]}"
+    )
+    EXIT_INSTRUCTION = f"If you want to terminate the process, type: {EXIT_WORD[0]}"
+
+
+class AskReview(Action):
+    async def run(
+        self, context: List[Message], plan: Plan = None, trigger: str = "task"
+    ):
+        logger.info("Current overall plan:")
+        logger.info(
+            "\n".join(
+                [
+                    f"{task.task_id}: {task.instruction}, is_finished: {task.is_finished}"
+                    for task in plan.tasks
+                ]
+            )
+        )
+
+        logger.info("most recent context:")
+        latest_action = context[-1].cause_by.__name__ if context[-1].cause_by else ""
+        review_instruction = (
+            ReviewConst.TASK_REVIEW_INSTRUCTION
+            if trigger == ReviewConst.TASK_REVIEW_TRIGGER
+            else ReviewConst.CODE_REVIEW_INSTRUCTION
+        )
+        prompt = (
+            f"This is a <{trigger}> review. Please review output from {latest_action}\n"
+            f"{review_instruction}\n"
+            f"{ReviewConst.EXIT_INSTRUCTION}\n"
+            "Please type your review below:\n"
+        )
+
+        rsp = input(prompt)
+
+        if rsp.lower() in ReviewConst.EXIT_WORD:
+            exit()
+
+        confirmed = rsp.lower() in ReviewConst.CONTINUE_WORD
+
+        return rsp, confirmed
+
+
+class SummarizeAnalysis(Action):
+    PROMPT_TEMPLATE = """
+    # Context
+    {context}
+    # Summary
+    Output a 30-word summary on analysis tool and modeling algorithms you have used, and the corresponding result. Make sure to announce the complete path to your test prediction file. Your summary:
+    """
+
+    def __init__(self, name: str = "", context=None, llm=None) -> str:
+        super().__init__(name, context, llm)
+
+    async def run(self, conmpleted_plan: Plan) -> str:
+        tasks = json.dumps(
+            [task.dict() for task in conmpleted_plan.tasks],
+            indent=4,
+            ensure_ascii=False,
+        )  # all tasks finished, return all task outputs
+        prompt = self.PROMPT_TEMPLATE.format(context=tasks)
+        summary = await self._aask(prompt)
+        return summary
+
+
+class Reflect(Action):
+    PROMPT_TEMPLATE = """
+    # User Requirement
+    {user_requirement}
+    # Context
+    {context}
+    # Summary
+    Above is all your attempts to tackle the user requirement. You plan, act, submit your output, and get the result and feedback.
+    First, summarize each of your previous trial in a triple of (your methods, the corresponding result, potential improvement), list them out.
+    # Takeaways
+    Second, carefully find key takeaways from your summarization in a step-by-step thinking process
+    # Guidance
+    Finally, make a concise one-sentence guidance for improving your future plan.
+    Your response:
+    """
+
+    async def run(self, context: str) -> str:
+        user_requirement = "Score as high as possible in a data modeling competition"
+        prompt = self.PROMPT_TEMPLATE.format(context=context, user_requirement=user_requirement)
+        rsp = await self._aask(prompt)
+        return rsp
diff --git a/metagpt/actions/write_plan.py b/metagpt/actions/write_plan.py
index dcfa25d55..5ff6d965c 100644
--- a/metagpt/actions/write_plan.py
+++ b/metagpt/actions/write_plan.py
@@ -17,7 +17,7 @@ class WritePlan(Action):
     __context__
     # Task:
     Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to __max_tasks__ tasks.
-    If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes.
+    If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.
     Output a list of jsons following the format:
     ```json
     [
diff --git a/metagpt/config.py b/metagpt/config.py
index 3f9e742bd..5973adfc4 100644
--- a/metagpt/config.py
+++ b/metagpt/config.py
@@ -95,6 +95,9 @@ class Config(metaclass=Singleton):
 
         self.prompt_format = self._get("PROMPT_FORMAT", "markdown")
 
+        self.kaggle_username = self._get("KAGGLE_USERNAME", "")
+        self.kaggle_key = self._get("KAGGLE_KEY", "")
+
     def _init_with_config_files_and_env(self, configs: dict, yaml_file):
         """Load from config/key.yaml, config/config.yaml, and env in decreasing order of priority"""
         configs.update(os.environ)
diff --git a/metagpt/prompts/ml_engineer.py b/metagpt/prompts/ml_engineer.py
index 55ac27d82..e78ea4166 100644
--- a/metagpt/prompts/ml_engineer.py
+++ b/metagpt/prompts/ml_engineer.py
@@ -168,3 +168,14 @@ ML_MODULE_MAP = {
     "classification_model": "metagpt.tools.functions.libs.machine_learning.ml_model",
     "regression_model": "metagpt.tools.functions.libs.machine_learning.ml_model",
 }
+
+STRUCTURAL_CONTEXT = """
+## User Requirement
+{user_requirement}
+## Data Description
+{data_desc}
+## Current Plan
+{tasks}
+## Current Task
+{current_task}
+"""
diff --git a/metagpt/roles/kaggle_manager.py b/metagpt/roles/kaggle_manager.py
index e902d99a0..d20769b92 100644
--- a/metagpt/roles/kaggle_manager.py
+++ b/metagpt/roles/kaggle_manager.py
@@ -5,16 +5,18 @@ import subprocess
 import fire
 import pandas as pd
 
+from metagpt.config import CONFIG
 from metagpt.const import WORKSPACE_ROOT
 from metagpt.roles import Role
 from metagpt.actions import Action, BossRequirement
-from metagpt.actions.write_analysis_code import AskReview, SummarizeAnalysis
+from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis
 from metagpt.schema import Message, Task, Plan
 from metagpt.logs import logger
+from metagpt.utils.common import CodeParser
 
 import os
-os.environ["KAGGLE_USERNAME"] = "xxx"
-os.environ["KAGGLE_KEY"] = "xxx"
+os.environ["KAGGLE_USERNAME"] = CONFIG.kaggle_username
+os.environ["KAGGLE_KEY"] = CONFIG.kaggle_key
 
 def run_command(cmd):
     print(cmd)
@@ -38,6 +40,7 @@ class DownloadData(Action):
         
         # if not os.path.exists(data_path):
         if True:
+            # run_command(f"rm -r {data_path / '*'}")
             run_command(f"unzip -o {WORKSPACE_ROOT / '*.zip'} -d {data_path}")  # FIXME: not safe
         
         file_list = run_command(f"ls {data_path}")
@@ -52,24 +55,30 @@ class DownloadData(Action):
 
 class SubmitResult(Action):
     PROMPT_TEMPLATE = """
-    # Context
-    {context}
+    # Summary
+    __summary__
     # Your task
-    Extract the prediction file for test set, return only the path string, e.g., xxx.csv, xxx.xlsx
+    Extract the file path for test set prediction from the summary above, output a json following the format:
+    ```json
+    {"file_path": str = "the file path, for example, /path/to/the/prediction/file/xxx.csv, /path/to/the/prediction/file/xxx.xlsx"}
+    ```
     """
 
     def __init__(self, name: str = "", context=None, llm=None) -> str:
         super().__init__(name, context, llm)
 
     async def _parse_submit_file_path(self, context) -> str:
-        prompt = self.PROMPT_TEMPLATE.format(context=context)
+        prompt = self.PROMPT_TEMPLATE.replace("__summary__", context)
         rsp = await self._aask(prompt)
-        return rsp
+        rsp = CodeParser.parse_code(block=None, text=rsp)
+        file_path = json.loads(rsp)["file_path"]
+        return file_path
 
     async def run(self, competition, submit_message="") -> str:
-        submit_file_path = self._parse_submit_file_path(submit_message)
+        submit_file_path = await self._parse_submit_file_path(submit_message)
 
         data_path = WORKSPACE_ROOT / competition
+        submit_message = submit_message.replace("'", "")
 
         run_command(f"kaggle competitions submit {competition} -f {submit_file_path} -m '{submit_message}'")
         run_command(f"kaggle competitions leaderboard --show --csv {competition} > {data_path / 'leaderboard.csv'}")
@@ -77,20 +86,20 @@ class SubmitResult(Action):
         
         leaderboard = pd.read_csv(data_path / 'leaderboard.csv')
         submission = pd.read_csv(data_path / 'submission.csv')
-        submission_score = submission.loc[0, "publicScore"]
-        submission_rank = leaderboard.loc[leaderboard["score"] == submission_score].index[0]
-        submission_rank_pct = round(submission_rank / len(leaderboard), 4) * 100
+        print(submission)  # submission.to_json(orient="records")
 
-        # best_score = max(submission["publicScore"])
-        # best_rank = leaderboard.loc[leaderboard["score"] == best_score].index[0]
+        submission_score = submission.loc[0, "publicScore"]
+        best_score = max(submission["publicScore"])  # might be min
+        rank = leaderboard.loc[leaderboard["score"] == best_score].index[0]
+        rank_pct = round(rank / len(leaderboard), 4) * 100
 
         submission_summary = f"""
-        ## All History
-        {submission.to_json(orient="records")}
-        ## Current
-        Current submission score: {submission_score}, rank: {submission_rank} (top {submission_rank_pct}%);
+        # All histories:
+        {submission.head(5).to_string()}
+        # Current
+        Current submission score: {submission_score}, best score: {best_score}, best rank: {rank} (top {rank_pct}%)
         """
-        print(submission_summary)
+        logger.info(submission_summary)
         return submission_summary
 
 
@@ -110,8 +119,6 @@ class KaggleManager(Role):
             self._set_state(0)  # DownloadData, get competition of interest from human, download datasets
         elif observed == SummarizeAnalysis:
             self._set_state(1)  # SubmitResult, get prediction from MLEngineer and submit it to Kaggle
-        elif observed == SubmitResult:
-            self._set_state(2)  # AskReview, ask human for improvement
 
     async def _act(self):
         todo = self._rc.todo
@@ -127,3 +134,19 @@ class KaggleManager(Role):
         msg = Message(content=rsp, role="user", cause_by=type(todo))
 
         return msg
+
+if __name__ == "__main__":
+    competition, data_desc, requirement = (
+        "titanic",
+        "Training set is train.csv.\nTest set is test.csv. We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.",
+        "Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
+    )
+
+    summary = "I used Python with pandas for data preprocessing, sklearn's RandomForestClassifier for modeling, and achieved 82.12% accuracy on validation. Predictions saved at '/Users/gary/Desktop/data_agents_opt/workspace/titanic/gender_submission.csv'."
+
+    async def main(requirement: str = requirement):
+        role = KaggleManager(competition=competition, data_desc=data_desc)
+        # await role.run(Message(content="", cause_by=BossRequirement))
+        await role.run(Message(content=summary, cause_by=SummarizeAnalysis))
+
+    fire.Fire(main)
\ No newline at end of file
diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index 1e4367372..4536395ba 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -7,55 +7,14 @@ import fire
 from metagpt.roles import Role
 from metagpt.actions import Action
 from metagpt.schema import Message, Task, Plan
+from metagpt.memory import Memory
 from metagpt.logs import logger
 from metagpt.actions.write_plan import WritePlan
 from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
+from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis, Reflect, ReviewConst, truncate
 from metagpt.actions.execute_code import ExecutePyCode
-
-STRUCTURAL_CONTEXT = """
-## User Requirement
-{user_requirement}
-## Current Plan
-{tasks}
-## Current Task
-{current_task}
-"""
-
-
-def truncate(result: str, keep_len: int = 1000) -> str:
-    desc = "Truncated to show only the last 1000 characters\n"
-    if result.startswith(desc):
-        result = result[-len(desc) :]
-
-    if len(result) > keep_len:
-        result = result[-keep_len:]
-
-    if not result.startswith(desc):
-        return desc + result
-    return desc
-
-
-class AskReview(Action):
-    async def run(self, context: List[Message], plan: Plan = None):
-        logger.info("Current overall plan:")
-        logger.info(
-            "\n".join([f"{task.task_id}: {task.instruction}, is_finished: {task.is_finished}" for task in plan.tasks])
-        )
-
-        logger.info("most recent context:")
-        latest_action = context[-1].cause_by.__name__ if context[-1].cause_by else ""
-        prompt = f"\nPlease review output from {latest_action}:\n" \
-            "If you want to change a task in the plan, say 'change task task_id, ... (things to change)'\n" \
-            "If you confirm the output and wish to continue with the current process, type CONFIRM\n" \
-            "If you want to terminate the process, type exit:\n"
-        rsp = input(prompt)
-
-        if rsp.lower() in ("exit"):
-            exit()
-
-        confirmed = rsp.lower() in ("confirm", "yes", "y")
-
-        return rsp, confirmed
+from metagpt.roles.kaggle_manager import DownloadData, SubmitResult
+from metagpt.prompts.ml_engineer import STRUCTURAL_CONTEXT
 
 
 class WriteTaskGuide(Action):
@@ -69,13 +28,35 @@ class MLEngineer(Role):
     ):
         super().__init__(name=name, profile=profile, goal=goal)
         self._set_react_mode(react_mode="plan_and_act")
+        self._watch([DownloadData, SubmitResult])
+
         self.plan = Plan(goal=goal)
         self.use_tools = False
         self.use_task_guide = False
         self.execute_code = ExecutePyCode()
         self.auto_run = auto_run
 
+        # memory for working on each task, discarded each time a task is done
+        self.working_memory = Memory()
+
     async def _plan_and_act(self):
+
+        ### Actions in a multi-agent multi-turn setting ###
+        memories = self.get_memories()
+        if memories:
+            latest_event = memories[-1].cause_by
+            if latest_event == DownloadData:
+                self.plan.context = memories[-1].content
+            elif latest_event == SubmitResult:
+                # get feedback for improvement from human, add to working memory
+                await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
+                # self reflect on previous plan outcomes and think about how to improve the plan, add to working  memory
+                prev_plan_outcomes = memories[-1].content
+                reflection = await Reflect().run(context=prev_plan_outcomes)
+                self.working_memory.add(Message(content=reflection, role="assistant"))
+
+
+        ### Common Procedure in both single- and multi-agent setting ###
         # create initial plan and update until confirmation
         await self._update_plan()
 
@@ -87,7 +68,7 @@ class MLEngineer(Role):
             code, result, success = await self._write_and_exec_code()
 
             # ask for acceptance, users can other refuse and change tasks in the plan
-            task_result_confirmed = await self._ask_review()
+            review, task_result_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
 
             if success and task_result_confirmed:
                 # tick off this task and record progress
@@ -98,7 +79,16 @@ class MLEngineer(Role):
 
             else:
                 # update plan according to user's feedback and to take on changed tasks
-                await self._update_plan()
+                await self._update_plan(review)
+
+        completed_plan_memory = self.get_useful_memories()  # completed plan as a outcome
+        self._rc.memory.add(completed_plan_memory[0])  # add to persistent memory
+
+        summary = await SummarizeAnalysis().run(self.plan)
+        rsp = Message(content=summary, cause_by=SummarizeAnalysis)
+        self._rc.memory.add(rsp)
+
+        return rsp
 
     async def _write_and_exec_code(self, max_retry: int = 3):
         task_guide = (
@@ -143,23 +133,28 @@ class MLEngineer(Role):
 
             if "!pip" in code:
                 success = False
-            # if not success:
-            #     await self._ask_review()
 
             counter += 1
 
+            if not success and counter >= max_retry:
+                logger.info("coding failed!")
+                review, _ = await self._ask_review(auto_run=False, trigger=ReviewConst.CODE_REVIEW_TRIGGER)
+                if ReviewConst.CHANGE_WORD in review:
+                    counter = 0  # redo the task again with help of human suggestions
+
         return code, result, success
 
-    async def _ask_review(self):
-        if not self.auto_run:
+    async def _ask_review(self, auto_run: bool = None, trigger: str = ReviewConst.TASK_REVIEW_TRIGGER):
+        auto_run = auto_run or self.auto_run
+        if not auto_run:
             context = self.get_useful_memories()
-            review, confirmed = await AskReview().run(context=context[-5:], plan=self.plan)
+            review, confirmed = await AskReview().run(context=context[-5:], plan=self.plan, trigger=trigger)
             if not confirmed:
                 self.working_memory.add(Message(content=review, role="user", cause_by=AskReview))
-            return confirmed
-        return True
+            return review, confirmed
+        return "", True
 
-    async def _update_plan(self, max_tasks: int = 3):
+    async def _update_plan(self, review: str = "", max_tasks: int = 3):
         plan_confirmed = False
         while not plan_confirmed:
             context = self.get_useful_memories()
@@ -167,30 +162,36 @@ class MLEngineer(Role):
             self.working_memory.add(
                 Message(content=rsp, role="assistant", cause_by=WritePlan)
             )
-            plan_confirmed = await self._ask_review()
+
+            # TODO: precheck plan before asking reviews
+
+            _, plan_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
 
         tasks = WritePlan.rsp_to_tasks(rsp)
-        self.plan.add_tasks(tasks)
-        self.working_memory.clear()
+        if len(tasks) == 1 and self.plan.has_task_id(tasks[0].task_id):
+            self.plan.replace_task(tasks[0])
+        else:
+            self.plan.add_tasks(tasks)
+        self.working_memory.clear()        
 
     def get_useful_memories(self) -> List[Message]:
         """find useful memories only to reduce context length and improve performance"""
 
         user_requirement = self.plan.goal
+        data_desc = self.plan.context
         tasks = json.dumps(
             [task.dict() for task in self.plan.tasks], indent=4, ensure_ascii=False
         )
         current_task = self.plan.current_task.json() if self.plan.current_task else {}
         context = STRUCTURAL_CONTEXT.format(
-            user_requirement=user_requirement, tasks=tasks, current_task=current_task
+            user_requirement=user_requirement, data_desc=data_desc, tasks=tasks, current_task=current_task
         )
         context_msg = [Message(content=context, role="user")]
 
-        return context_msg + self.working_memory.get()
-
-    @property
-    def working_memory(self):
-        return self._rc.memory
+        return context_msg + self.get_working_memories()
+    
+    def get_working_memories(self) -> List[Message]:
+        return self.working_memory.get()
 
 
 if __name__ == "__main__":
diff --git a/metagpt/schema.py b/metagpt/schema.py
index 601bdcea2..9b86a2448 100644
--- a/metagpt/schema.py
+++ b/metagpt/schema.py
@@ -156,7 +156,49 @@ class Plan(BaseModel):
 
         # Update the task map for quick access to tasks by ID
         self.task_map = {task.task_id: task for task in self.tasks}
+    
+    def reset_task(self, task_id: str):
+        """
+        Clear code and result of the task based on task_id, and set the task as unfinished.
 
+        Args:
+            task_id (str): The ID of the task to be reset.
+
+        Returns:
+            None
+        """
+        if task_id in self.task_map:
+            task = self.task_map[task_id]
+            task.code = ""
+            task.result = ""
+            task.is_finished = False
+
+    def replace_task(self, new_task: Task):
+        """
+        Replace an existing task with the new input task based on task_id, and reset all tasks depending on it.
+
+        Args:
+            new_task (Task): The new task that will replace an existing one.
+
+        Returns:
+            None
+        """
+        if new_task.task_id in self.task_map:
+            # Replace the task in the task map and the task list
+            self.task_map[new_task.task_id] = new_task
+            for i, task in enumerate(self.tasks):
+                if task.task_id == new_task.task_id:
+                    self.tasks[i] = new_task
+                    break
+
+            # Reset dependent tasks
+            for task in self.tasks:
+                if new_task.task_id in task.dependent_task_ids:
+                    self.reset_task(task.task_id)
+
+    def has_task_id(self, task_id: str) -> bool:
+        return task_id in self.task_map
+    
     @property
     def current_task(self) -> Task:
         """Find current task to execute
diff --git a/tests/metagpt/test_schema.py b/tests/metagpt/test_schema.py
index 8f65d3785..324a083ca 100644
--- a/tests/metagpt/test_schema.py
+++ b/tests/metagpt/test_schema.py
@@ -104,3 +104,42 @@ class TestPlan:
         finished_tasks = plan.get_finished_tasks()
         assert len(finished_tasks) == 1
         assert finished_tasks[0].task_id == "1"
+
+    def test_reset_task_existing(self):
+        plan = Plan(goal="")
+        task = Task(task_id="1", instruction="Do something", code="print('Hello')", result="Hello", finished=True)
+        plan.add_tasks([task])
+        plan.reset_task("1")
+        reset_task = plan.task_map["1"]
+        assert reset_task.code == ""
+        assert reset_task.result == ""
+        assert not reset_task.is_finished
+
+    def test_reset_task_non_existing(self):
+        plan = Plan(goal="")
+        task = Task(task_id="1", instruction="Do something", code="print('Hello')", result="Hello", finished=True)
+        plan.add_tasks([task])
+        plan.reset_task("2")  # Task with ID 2 does not exist
+        assert "1" in plan.task_map
+        assert "2" not in plan.task_map
+
+    def test_replace_task_with_dependents(self):
+        plan = Plan(goal="")
+        tasks = [Task(task_id="1", instruction="First Task", finished=True),
+                 Task(task_id="2", instruction="Second Task", dependent_task_ids=["1"], finished=True)]
+        plan.add_tasks(tasks)
+        new_task = Task(task_id="1", instruction="Updated First Task")
+        plan.replace_task(new_task)
+        assert plan.task_map["1"].instruction == "Updated First Task"
+        assert not plan.task_map["2"].is_finished  # Dependent task should be reset
+        assert plan.task_map["2"].code == ""
+        assert plan.task_map["2"].result == ""
+
+    def test_replace_task_non_existing(self):
+        plan = Plan(goal="")
+        task = Task(task_id="1", instruction="First Task")
+        plan.add_tasks([task])
+        new_task = Task(task_id="2", instruction="New Task")
+        plan.replace_task(new_task)  # Task with ID 2 does not exist in plan
+        assert "1" in plan.task_map
+        assert "2" not in plan.task_map

From 8d7657f347d51feb3048d6774bdbe17308ecf2ee Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Mon, 4 Dec 2023 14:29:47 +0800
Subject: [PATCH 03/34] update reflect on previous plan

---
 config/config.yaml              |  4 ++--
 kaggle_team.py                  |  7 ++++---
 metagpt/actions/ml_da_action.py | 37 ++++++++++++++++++++-------------
 metagpt/roles/kaggle_manager.py |  4 ++--
 metagpt/roles/ml_engineer.py    | 19 +++++++++++------
 5 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index 52a8eb036..bf998def7 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -96,5 +96,5 @@ MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k
 
 PROMPT_FORMAT: json #json or markdown
 
-KAGGLE_USERNAME: ""
-KAGGLE_KEY: ""
\ No newline at end of file
+# KAGGLE_USERNAME: ""
+# KAGGLE_KEY: ""
\ No newline at end of file
diff --git a/kaggle_team.py b/kaggle_team.py
index 659c4a495..e8ab3ec41 100644
--- a/kaggle_team.py
+++ b/kaggle_team.py
@@ -13,20 +13,21 @@ async def main(
     # data_desc: str,
     # requirement: str,
     investment: float = 5.0,
-    n_round: int = 5,
+    n_round: int = 10,
+    auto_run: bool = False,
 ):
     competition, data_desc, requirement = (
         "titanic",
         "Training set is train.csv.\nTest set is test.csv. We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.",
         "Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
-        # "generate a random prediction of the same shape as gender_submission.csv and save",
+        # "generate a random prediction, replace the Survived column of gender_submission.csv, and save the prediction to a new submission file",
     )
 
     team = Team()
     team.hire(
         [
             KaggleManager(competition=competition, data_desc=data_desc),
-            MLEngineer(goal=requirement),
+            MLEngineer(goal=requirement, auto_run=auto_run),
         ]
     )
 
diff --git a/metagpt/actions/ml_da_action.py b/metagpt/actions/ml_da_action.py
index 9f903fd22..a4537dad9 100644
--- a/metagpt/actions/ml_da_action.py
+++ b/metagpt/actions/ml_da_action.py
@@ -3,6 +3,7 @@ from typing import Dict, List, Union
 
 from metagpt.actions import Action
 from metagpt.schema import Message, Plan
+from metagpt.utils.common import CodeParser
 from metagpt.logs import logger
 
 
@@ -98,22 +99,30 @@ class SummarizeAnalysis(Action):
 
 class Reflect(Action):
     PROMPT_TEMPLATE = """
-    # User Requirement
-    {user_requirement}
     # Context
-    {context}
+    __context__
+    # Latest User Requirement
+    __user_requirement__
     # Summary
     Above is all your attempts to tackle the user requirement. You plan, act, submit your output, and get the result and feedback.
-    First, summarize each of your previous trial in a triple of (your methods, the corresponding result, potential improvement), list them out.
-    # Takeaways
-    Second, carefully find key takeaways from your summarization in a step-by-step thinking process
-    # Guidance
-    Finally, make a concise one-sentence guidance for improving your future plan.
-    Your response:
+    Output a json following the format:
+    ```json
+    {
+        "summary": str = "summarize each of your previous trial in a triple of (your methods, the corresponding result, potential improvement), list them out",
+        "takeaways": str = "carefully find key takeaways from your summarization in a step-by-step thinking process",
+        "reflection": "in one sentence, state executable actions for improving your future plan",
+    }
+    ```
     """
+    REWRITE_PLAN_INSTRUCTION = """When taking this reflection for rewriting plan, modify the current plan in place, replace, add, or delete tasks in the plan,
+    only make necessary change to the current plan, keep reusable tasks unchanged, provide the complete new plan."""
 
-    async def run(self, context: str) -> str:
-        user_requirement = "Score as high as possible in a data modeling competition"
-        prompt = self.PROMPT_TEMPLATE.format(context=context, user_requirement=user_requirement)
-        rsp = await self._aask(prompt)
-        return rsp
+    async def run(self, context: str, user_requirement: str = "") -> str:
+        user_requirement = user_requirement or "Score as high as possible in a data modeling competition"
+        # prompt = self.PROMPT_TEMPLATE.format(context=context, user_requirement=user_requirement)
+        prompt = self.PROMPT_TEMPLATE.replace("__context__", context).replace("__user_requirement__", user_requirement)
+        rsp_json = await self._aask(prompt)
+        rsp = CodeParser.parse_code(block=None, text=rsp_json)
+        reflection = json.loads(rsp)["reflection"]
+        reflection += self.REWRITE_PLAN_INSTRUCTION
+        return reflection
diff --git a/metagpt/roles/kaggle_manager.py b/metagpt/roles/kaggle_manager.py
index d20769b92..354289975 100644
--- a/metagpt/roles/kaggle_manager.py
+++ b/metagpt/roles/kaggle_manager.py
@@ -38,8 +38,8 @@ class DownloadData(Action):
         
         run_command(f"kaggle competitions download {competition} --path {WORKSPACE_ROOT}")
         
-        # if not os.path.exists(data_path):
-        if True:
+        if not os.path.exists(data_path):
+        # if True:
             # run_command(f"rm -r {data_path / '*'}")
             run_command(f"unzip -o {WORKSPACE_ROOT / '*.zip'} -d {data_path}")  # FIXME: not safe
         
diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index 4536395ba..abd14c7fb 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -48,13 +48,11 @@ class MLEngineer(Role):
             if latest_event == DownloadData:
                 self.plan.context = memories[-1].content
             elif latest_event == SubmitResult:
+                # self reflect on previous plan outcomes and think about how to improve the plan, add to working  memory
+                await self._reflect()
+
                 # get feedback for improvement from human, add to working memory
                 await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
-                # self reflect on previous plan outcomes and think about how to improve the plan, add to working  memory
-                prev_plan_outcomes = memories[-1].content
-                reflection = await Reflect().run(context=prev_plan_outcomes)
-                self.working_memory.add(Message(content=reflection, role="assistant"))
-
 
         ### Common Procedure in both single- and multi-agent setting ###
         # create initial plan and update until confirmation
@@ -172,7 +170,16 @@ class MLEngineer(Role):
             self.plan.replace_task(tasks[0])
         else:
             self.plan.add_tasks(tasks)
-        self.working_memory.clear()        
+        self.working_memory.clear()
+    
+    async def _reflect(self):
+        context = self.get_memories()
+        context = "\n".join([str(msg) for msg in context])
+        # print("*" * 10)
+        # print(context)
+        # print("*" * 10)
+        reflection = await Reflect().run(context=context)
+        self.working_memory.add(Message(content=reflection, role="assistant"))
 
     def get_useful_memories(self) -> List[Message]:
         """find useful memories only to reduce context length and improve performance"""

From b561b2f98252c9174f885f4c82fc1c9eb4ee83df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Tue, 5 Dec 2023 18:58:16 +0800
Subject: [PATCH 04/34] fix: change keep length of result from 1000 to 2000.

---
 metagpt/roles/ml_engineer.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index 65583638e..e2203c4fb 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -143,10 +143,12 @@ class MLEngineer(Role):
 
             result, success = await self.execute_code.run(code)
             # truncated the result
-            print(truncate(result))
+            _keep_result_len = 2000
+            truncate_result = truncate(remove_escape_and_color_codes(result), keep_len=_keep_result_len)
+            print(truncate_result)
             # print(result)
             self.working_memory.add(
-                Message(content=truncate(remove_escape_and_color_codes(result)), role="user", cause_by=ExecutePyCode)
+                Message(content=truncate_result, keep_len=_keep_result_len), role="user", cause_by=ExecutePyCode)
             )
 
             if "!pip" in code:

From 98b14bbcc38fd99d39731fe38342e6e2fac96961 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Wed, 6 Dec 2023 16:44:14 +0800
Subject: [PATCH 05/34] chore

---
 metagpt/roles/ml_engineer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index e2203c4fb..34bd81110 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -148,7 +148,7 @@ class MLEngineer(Role):
             print(truncate_result)
             # print(result)
             self.working_memory.add(
-                Message(content=truncate_result, keep_len=_keep_result_len), role="user", cause_by=ExecutePyCode)
+                Message(content=truncate_result, role="user", cause_by=ExecutePyCode)
             )
 
             if "!pip" in code:

From 4231e0a11e7775d22c35ec9f8f4dfc1a233cb925 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Mon, 11 Dec 2023 16:13:34 +0800
Subject: [PATCH 06/34] kaggle iterative trial done

---
 kaggle_team.py                           |  3 +-
 metagpt/actions/execute_code.py          | 28 ++++++++++++++--
 metagpt/actions/ml_da_action.py          | 17 +++++-----
 metagpt/actions/write_plan.py            | 38 ++++++++++++++++++----
 metagpt/roles/kaggle_manager.py          |  3 +-
 metagpt/roles/ml_engineer.py             | 34 ++++++++++++++------
 metagpt/schema.py                        | 39 +++++++++++++++++-----
 tests/metagpt/actions/test_write_plan.py | 20 ++++++------
 tests/metagpt/test_schema.py             | 41 ++++++++++++++++++++++++
 9 files changed, 178 insertions(+), 45 deletions(-)

diff --git a/kaggle_team.py b/kaggle_team.py
index e8ab3ec41..50a8f7288 100644
--- a/kaggle_team.py
+++ b/kaggle_team.py
@@ -19,8 +19,9 @@ async def main(
     competition, data_desc, requirement = (
         "titanic",
         "Training set is train.csv.\nTest set is test.csv. We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.",
-        "Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
+        # "Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
         # "generate a random prediction, replace the Survived column of gender_submission.csv, and save the prediction to a new submission file",
+        "Score as high as possible for the provided dataset, save the test prediction to a csv with two columns PassengerId and Survived"
     )
 
     team = Team()
diff --git a/metagpt/actions/execute_code.py b/metagpt/actions/execute_code.py
index 981aa894c..9c2b8d96c 100644
--- a/metagpt/actions/execute_code.py
+++ b/metagpt/actions/execute_code.py
@@ -8,6 +8,7 @@ from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Dict, List, Tuple, Union
 import traceback
+import re
 
 import nbformat
 from nbclient import NotebookClient
@@ -171,11 +172,34 @@ class ExecutePyCode(ExecuteCode, Action):
                 # TODO: add max_tries for run code.
                 cell_index = len(self.nb.cells) - 1
                 await self.nb_client.async_execute_cell(self.nb.cells[-1], cell_index)
-                return self.parse_outputs(self.nb.cells[-1].outputs), True
+                outputs = self.parse_outputs(self.nb.cells[-1].outputs)
+                success = True
             except Exception as e:
                 # FIXME: CellExecutionError is hard to read. for example `1\0` raise ZeroDivisionError:
                 #  CellExecutionError('An error occurred while executing the following cell:\n------------------\nz=1/0\n------------------\n\n\n\x1b[0;31m---------------------------------------------------------------------------\x1b[0m\n\x1b[0;31mZeroDivisionError\x1b[0m                         Traceback (most recent call last)\nCell \x1b[0;32mIn[1], line 1\x1b[0m\n\x1b[0;32m----> 1\x1b[0m z\x1b[38;5;241m=\x1b[39m\x1b[38;5;241;43m1\x1b[39;49m\x1b[38;5;241;43m/\x1b[39;49m\x1b[38;5;241;43m0\x1b[39;49m\n\n\x1b[0;31mZeroDivisionError\x1b[0m: division by zero\n')
-                return traceback.format_exc(), False
+                outputs = traceback.format_exc()
+                success = False
+            return truncate(remove_escape_and_color_codes(outputs)), success
         else:
             # TODO: markdown
             raise NotImplementedError(f"Not support this code type : {language}, Only support code!")
+
+
+def truncate(result: str, keep_len: int = 2000) -> str:
+    desc = f"Truncated to show only the last {keep_len} characters\n"
+    if result.startswith(desc):
+        result = result[-len(desc) :]
+
+    if len(result) > keep_len:
+        result = result[-keep_len:]
+
+    if not result.startswith(desc):
+        return desc + result
+    return desc
+
+
+def remove_escape_and_color_codes(input_str):
+    # 使用正则表达式去除转义字符和颜色代码
+    pattern = re.compile(r'\x1b\[[0-9;]*[mK]')
+    result = pattern.sub('', input_str)
+    return result
diff --git a/metagpt/actions/ml_da_action.py b/metagpt/actions/ml_da_action.py
index a4537dad9..6be4b3040 100644
--- a/metagpt/actions/ml_da_action.py
+++ b/metagpt/actions/ml_da_action.py
@@ -7,8 +7,8 @@ from metagpt.utils.common import CodeParser
 from metagpt.logs import logger
 
 
-def truncate(result: str, keep_len: int = 1000) -> str:
-    desc = "Truncated to show only the last 1000 characters\n"
+def truncate(result: str, keep_len: int = 2000) -> str:
+    desc = "Truncated to show only the last keep_len characters\n"
     if result.startswith(desc):
         result = result[-len(desc) :]
 
@@ -70,7 +70,9 @@ class AskReview(Action):
         if rsp.lower() in ReviewConst.EXIT_WORD:
             exit()
 
-        confirmed = rsp.lower() in ReviewConst.CONTINUE_WORD
+        # Confirmation can be one of "confirm", "continue", "c", "yes", "y" exactly, or sentences containing "confirm".
+        # One could say "confirm this task, but change the next task to ..."
+        confirmed = rsp.lower() in ReviewConst.CONTINUE_WORD or ReviewConst.CONTINUE_WORD[0] in rsp.lower()
 
         return rsp, confirmed
 
@@ -109,13 +111,13 @@ class Reflect(Action):
     ```json
     {
         "summary": str = "summarize each of your previous trial in a triple of (your methods, the corresponding result, potential improvement), list them out",
-        "takeaways": str = "carefully find key takeaways from your summarization in a step-by-step thinking process",
-        "reflection": "in one sentence, state executable actions for improving your future plan",
+        "takeaways": str = "carefully find key takeaways from your summarization",
+        "reflection": str = "give specific instruction to improve your next trial in a step-by-step thinking process",
     }
     ```
     """
-    REWRITE_PLAN_INSTRUCTION = """When taking this reflection for rewriting plan, modify the current plan in place, replace, add, or delete tasks in the plan,
-    only make necessary change to the current plan, keep reusable tasks unchanged, provide the complete new plan."""
+    REWRITE_PLAN_INSTRUCTION = """Take this reflection for rewriting plan, modify the current plan in place, make reference to your specific instruction, think about you should
+    change which task, add or delete what tasks in the plan. Only make necessary changes, keep reusable tasks unchanged, output the COMPLETE new plan starting from the first task. Your plan should have no more than 5 tasks."""
 
     async def run(self, context: str, user_requirement: str = "") -> str:
         user_requirement = user_requirement or "Score as high as possible in a data modeling competition"
@@ -124,5 +126,4 @@ class Reflect(Action):
         rsp_json = await self._aask(prompt)
         rsp = CodeParser.parse_code(block=None, text=rsp_json)
         reflection = json.loads(rsp)["reflection"]
-        reflection += self.REWRITE_PLAN_INSTRUCTION
         return reflection
diff --git a/metagpt/actions/write_plan.py b/metagpt/actions/write_plan.py
index 71133bb4d..f7ca1ff4c 100644
--- a/metagpt/actions/write_plan.py
+++ b/metagpt/actions/write_plan.py
@@ -4,12 +4,14 @@
 @Author  :   orange-crow
 @File    :   plan.py
 """
-from typing import List, Dict
+from typing import List, Dict, Tuple
 import json
+from copy import deepcopy
+import traceback
 
 from metagpt.actions import Action
 from metagpt.prompts.ml_engineer import ASSIGN_TASK_TYPE_PROMPT, ASSIGN_TASK_TYPE
-from metagpt.schema import Message, Task
+from metagpt.schema import Message, Task, Plan
 from metagpt.utils.common import CodeParser, create_func_config
 
 
@@ -67,8 +69,30 @@ class WritePlan(Action):
             rsp = await self.assign_task_type(json.loads(rsp))
         return rsp
 
-    @staticmethod
-    def rsp_to_tasks(rsp: str) -> List[Task]:
-        rsp = json.loads(rsp)
-        tasks = [Task(**task_config) for task_config in rsp]
-        return tasks
+def rsp_to_tasks(rsp: str) -> List[Task]:
+    rsp = json.loads(rsp)
+    tasks = [Task(**task_config) for task_config in rsp]
+    return tasks
+
+def update_plan_from_rsp(rsp: str, current_plan: Plan):
+    tasks = rsp_to_tasks(rsp)
+    if len(tasks) == 1:
+        # handle a single task
+        if current_plan.has_task_id(tasks[0].task_id):
+            # replace an existing task
+            current_plan.replace_task(tasks[0])
+        else:
+            # append one task
+            current_plan.append_task(tasks[0])
+
+    else:
+        # add tasks in general
+        current_plan.add_tasks(tasks)
+
+def precheck_update_plan_from_rsp(rsp: str, current_plan: Plan) -> Tuple[bool, str]:
+    temp_plan = deepcopy(current_plan)
+    try:
+        update_plan_from_rsp(rsp, temp_plan)
+        return True, ""
+    except Exception as e:
+        return False, e
diff --git a/metagpt/roles/kaggle_manager.py b/metagpt/roles/kaggle_manager.py
index 354289975..18ac6733a 100644
--- a/metagpt/roles/kaggle_manager.py
+++ b/metagpt/roles/kaggle_manager.py
@@ -1,6 +1,7 @@
 from typing import Dict, List, Union, Tuple
 import json
 import subprocess
+import os
 
 import fire
 import pandas as pd
@@ -14,7 +15,7 @@ from metagpt.schema import Message, Task, Plan
 from metagpt.logs import logger
 from metagpt.utils.common import CodeParser
 
-import os
+
 os.environ["KAGGLE_USERNAME"] = CONFIG.kaggle_username
 os.environ["KAGGLE_KEY"] = CONFIG.kaggle_key
 
diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index 4e818ca3c..6e7331281 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -10,7 +10,7 @@ from metagpt.actions import Action
 from metagpt.schema import Message, Task, Plan
 from metagpt.memory import Memory
 from metagpt.logs import logger
-from metagpt.actions.write_plan import WritePlan
+from metagpt.actions.write_plan import WritePlan, update_plan_from_rsp, precheck_update_plan_from_rsp
 from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
 from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis, Reflect, ReviewConst
 from metagpt.actions.execute_code import ExecutePyCode
@@ -69,13 +69,24 @@ class MLEngineer(Role):
             # ask for acceptance, users can other refuse and change tasks in the plan
             review, task_result_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
 
-            if success and task_result_confirmed:
+            if task_result_confirmed:
                 # tick off this task and record progress
                 task.code = code
                 task.result = result
                 self.plan.finish_current_task()
                 self.working_memory.clear()
 
+                confirmed_and_more = (ReviewConst.CONTINUE_WORD[0] in review.lower()
+                    and review.lower() not in ReviewConst.CONTINUE_WORD[0])  # "confirm, ... (more content, such as changing downstream tasks)"
+                if confirmed_and_more:
+                    self.working_memory.add(Message(content=review, role="user", cause_by=AskReview))
+                    await self._update_plan(review)
+            
+            elif "redo" in review:
+                # Ask the Role to redo this task with help of review feedback,
+                # useful when the code run is successful but the procedure or result is not what we want
+                continue
+
             else:
                 # update plan according to user's feedback and to take on changed tasks
                 await self._update_plan(review)
@@ -151,7 +162,7 @@ class MLEngineer(Role):
             return review, confirmed
         return "", True
 
-    async def _update_plan(self, review: str = "", max_tasks: int = 3):
+    async def _update_plan(self, review: str = "", max_tasks: int = 3, max_retries: int = 3):
         plan_confirmed = False
         while not plan_confirmed:
             context = self.get_useful_memories()
@@ -162,15 +173,19 @@ class MLEngineer(Role):
                 Message(content=rsp, role="assistant", cause_by=WritePlan)
             )
 
-            # TODO: precheck plan before asking reviews
+            # precheck plan before asking reviews
+            is_plan_valid, error = precheck_update_plan_from_rsp(rsp, self.plan)
+            if not is_plan_valid and max_retries > 0:
+                error_msg = f"The generated plan is not valid with error: {error}, try regenerating, remember to generate either the whole plan or the single changed task only"
+                logger.warning(error_msg)
+                self.working_memory.add(Message(content=error_msg, role="assistant", cause_by=WritePlan))
+                max_retries -= 1
+                continue
 
             _, plan_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
 
-        tasks = WritePlan.rsp_to_tasks(rsp)
-        if len(tasks) == 1 and self.plan.has_task_id(tasks[0].task_id):
-            self.plan.replace_task(tasks[0])
-        else:
-            self.plan.add_tasks(tasks)
+        update_plan_from_rsp(rsp, self.plan)
+
         self.working_memory.clear()
     
     async def _reflect(self):
@@ -181,6 +196,7 @@ class MLEngineer(Role):
         # print("*" * 10)
         reflection = await Reflect().run(context=context)
         self.working_memory.add(Message(content=reflection, role="assistant"))
+        self.working_memory.add(Message(content=Reflect.REWRITE_PLAN_INSTRUCTION, role="user"))
 
     def get_useful_memories(self) -> List[Message]:
         """find useful memories only to reduce context length and improve performance"""
diff --git a/metagpt/schema.py b/metagpt/schema.py
index 9b86a2448..4e5e083ec 100644
--- a/metagpt/schema.py
+++ b/metagpt/schema.py
@@ -149,10 +149,7 @@ class Plan(BaseModel):
             self.tasks = final_tasks
         
         # Update current_task_id to the first unfinished task in the merged list
-        for task in self.tasks:
-            if not task.is_finished:
-                self.current_task_id = task.task_id
-                break
+        self._update_current_task()
 
         # Update the task map for quick access to tasks by ID
         self.task_map = {task.task_id: task for task in self.tasks}
@@ -196,8 +193,36 @@ class Plan(BaseModel):
                 if new_task.task_id in task.dependent_task_ids:
                     self.reset_task(task.task_id)
 
+    def append_task(self, new_task: Task):
+        """
+        Append a new task to the end of existing task sequences
+
+        Args:
+            new_task (Task): The new task to be appended to the existing task sequence
+        
+        Returns:
+            None
+        """
+        assert not self.has_task_id(new_task.task_id), "Task already in current plan, use replace_task instead"
+
+        assert all([self.has_task_id(dep_id) for dep_id in new_task.dependent_task_ids]), \
+            "New task has unknown dependencies"
+
+        # Existing tasks do not depend on the new task, it's fine to put it to the end of the sorted task sequence
+        self.tasks.append(new_task)
+        self.task_map[new_task.task_id] = new_task
+        self._update_current_task()
+
     def has_task_id(self, task_id: str) -> bool:
         return task_id in self.task_map
+
+    def _update_current_task(self):
+        current_task_id = ""
+        for task in self.tasks:
+            if not task.is_finished:
+                current_task_id = task.task_id
+                break
+        self.current_task_id = current_task_id  # all tasks finished
     
     @property
     def current_task(self) -> Task:
@@ -212,10 +237,8 @@ class Plan(BaseModel):
         """Finish current task, set Task.is_finished=True, set current task to next task
         """
         if self.current_task_id:
-            current_task = self.current_task
-            current_task.is_finished = True
-            next_task_index = self.tasks.index(current_task) + 1
-            self.current_task_id = self.tasks[next_task_index].task_id if next_task_index < len(self.tasks) else None
+            self.current_task.is_finished = True
+            self._update_current_task()  # set to next task
 
     def get_finished_tasks(self) -> list[Task]:
         """return all finished tasks in correct linearized order
diff --git a/tests/metagpt/actions/test_write_plan.py b/tests/metagpt/actions/test_write_plan.py
index 2bf200ab3..7766e0d51 100644
--- a/tests/metagpt/actions/test_write_plan.py
+++ b/tests/metagpt/actions/test_write_plan.py
@@ -1,13 +1,15 @@
 import pytest
 
-from metagpt.actions.write_plan import WritePlan
+from metagpt.actions.write_plan import WritePlan, precheck_update_plan_from_rsp, Plan, Task
 
+def test_precheck_update_plan_from_rsp():
+    plan = Plan(goal="")
+    plan.add_tasks([Task(task_id="1")])
+    rsp = '[{"task_id": "2"}]'
+    success, _ = precheck_update_plan_from_rsp(rsp, plan)
+    assert success
+    assert len(plan.tasks) == 1 and plan.tasks[0].task_id == "1"  # precheck should not change the original one
 
-@pytest.mark.asyncio
-async def test_plan():
-    p = WritePlan()
-    task_desc = """Here’s some background information on Cyclistic, a bike-sharing company designing a marketing strategy aimed at converting casual riders into annual members: So far, Cyclistic’s marketing strategy has relied on building general awareness and engaging a wide range of consumers. group. One way to help achieve these goals is the flexibility of its pricing plans: one-way passes, full-day passes, and annual memberships. Customers who purchase a one-way or full-day pass are known as recreational riders. Customers purchasing an annual membership are Cyclistic members. I will provide you with a data sheet that records user behavior: '/Users/vicis/Downloads/202103-divvy-tripdata.csv"""
-    rsp = await p.run(task_desc, role="data analyst")
-    assert len(rsp.content) > 0
-    assert rsp.sent_from == "WritePlan"
-    print(rsp)
+    invalid_rsp = 'wrong'
+    success, _ = precheck_update_plan_from_rsp(invalid_rsp, plan)
+    assert not success
diff --git a/tests/metagpt/test_schema.py b/tests/metagpt/test_schema.py
index 324a083ca..b5d49b7a1 100644
--- a/tests/metagpt/test_schema.py
+++ b/tests/metagpt/test_schema.py
@@ -5,6 +5,7 @@
 @Author  : alexanderwu
 @File    : test_schema.py
 """
+import pytest
 from metagpt.schema import AIMessage, Message, SystemMessage, UserMessage
 from metagpt.schema import Task, Plan
 
@@ -143,3 +144,43 @@ class TestPlan:
         plan.replace_task(new_task)  # Task with ID 2 does not exist in plan
         assert "1" in plan.task_map
         assert "2" not in plan.task_map
+    
+    def test_append_task_with_valid_dependencies(self):
+        plan = Plan(goal="Test")
+        existing_task = [Task(task_id="1")]
+        plan.add_tasks(existing_task)
+        new_task = Task(task_id="2", dependent_task_ids=["1"])
+        plan.append_task(new_task)
+        assert plan.tasks[-1].task_id == "2"
+        assert plan.task_map["2"] == new_task
+
+    def test_append_task_with_invalid_dependencies(self):
+        new_task = Task(task_id="2", dependent_task_ids=["3"])
+        plan = Plan(goal="Test")
+        with pytest.raises(AssertionError):
+            plan.append_task(new_task)
+    
+    def test_append_task_without_dependencies(self):
+        plan = Plan(goal="Test")
+        existing_task = [Task(task_id="1")]
+        plan.add_tasks(existing_task)
+
+        new_task = Task(task_id="2")
+        plan.append_task(new_task)
+
+        assert len(plan.tasks) == 2
+        assert plan.current_task_id == "1"
+
+    def test_append_task_updates_current_task(self):
+        finished_task = Task(task_id="1", is_finished=True)
+        new_task = Task(task_id="2")
+        plan = Plan(goal="Test", tasks=[finished_task])
+        plan.append_task(new_task)
+        assert plan.current_task_id == "2"
+
+    def test_update_current_task(self):
+        task1 = Task(task_id="1", is_finished=True)
+        task2 = Task(task_id="2")
+        plan = Plan(goal="Test", tasks=[task1, task2])
+        plan._update_current_task()
+        assert plan.current_task_id == "2"

From 3847e672b1ad8ad4f6ca5c8a149f570c445b2e09 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Tue, 12 Dec 2023 14:20:15 +0800
Subject: [PATCH 07/34] rm redundant

---
 metagpt/actions/execute_code.py |  2 --
 metagpt/actions/ml_da_action.py | 13 -------------
 2 files changed, 15 deletions(-)

diff --git a/metagpt/actions/execute_code.py b/metagpt/actions/execute_code.py
index 9c2b8d96c..1d20bf3f6 100644
--- a/metagpt/actions/execute_code.py
+++ b/metagpt/actions/execute_code.py
@@ -175,8 +175,6 @@ class ExecutePyCode(ExecuteCode, Action):
                 outputs = self.parse_outputs(self.nb.cells[-1].outputs)
                 success = True
             except Exception as e:
-                # FIXME: CellExecutionError is hard to read. for example `1\0` raise ZeroDivisionError:
-                #  CellExecutionError('An error occurred while executing the following cell:\n------------------\nz=1/0\n------------------\n\n\n\x1b[0;31m---------------------------------------------------------------------------\x1b[0m\n\x1b[0;31mZeroDivisionError\x1b[0m                         Traceback (most recent call last)\nCell \x1b[0;32mIn[1], line 1\x1b[0m\n\x1b[0;32m----> 1\x1b[0m z\x1b[38;5;241m=\x1b[39m\x1b[38;5;241;43m1\x1b[39;49m\x1b[38;5;241;43m/\x1b[39;49m\x1b[38;5;241;43m0\x1b[39;49m\n\n\x1b[0;31mZeroDivisionError\x1b[0m: division by zero\n')
                 outputs = traceback.format_exc()
                 success = False
             return truncate(remove_escape_and_color_codes(outputs)), success
diff --git a/metagpt/actions/ml_da_action.py b/metagpt/actions/ml_da_action.py
index 6be4b3040..5e4580b17 100644
--- a/metagpt/actions/ml_da_action.py
+++ b/metagpt/actions/ml_da_action.py
@@ -7,19 +7,6 @@ from metagpt.utils.common import CodeParser
 from metagpt.logs import logger
 
 
-def truncate(result: str, keep_len: int = 2000) -> str:
-    desc = "Truncated to show only the last keep_len characters\n"
-    if result.startswith(desc):
-        result = result[-len(desc) :]
-
-    if len(result) > keep_len:
-        result = result[-keep_len:]
-
-    if not result.startswith(desc):
-        return desc + result
-    return desc
-
-
 class ReviewConst:
     TASK_REVIEW_TRIGGER = "task"
     CODE_REVIEW_TRIGGER = "code"

From 0231cfdcc750f3366c3eee16fc776581f67cbaf6 Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Tue, 12 Dec 2023 16:23:56 +0800
Subject: [PATCH 08/34] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=E6=96=87=E4=BB=B6=E8=BE=93=E5=87=BA=E4=BF=9D=E5=AD=98=EF=BC=8C?=
 =?UTF-8?q?=E5=88=9B=E5=BB=BA=E9=A1=B9=E7=9B=AE=E6=96=87=E4=BB=B6=E5=A4=B9?=
 =?UTF-8?q?=EF=BC=8C=E4=BD=BF=E7=94=A8=E9=A1=B9=E7=9B=AE=E6=96=87=E4=BB=B6?=
 =?UTF-8?q?=E5=A4=B9=E9=9A=94=E7=A6=BB=20=E5=AE=8C=E6=95=B4=E4=BB=A3?=
 =?UTF-8?q?=E7=A0=81=E4=BF=9D=E5=AD=98=E5=89=8D=EF=BC=8C=E5=8F=AF=E8=80=83?=
 =?UTF-8?q?=E8=99=91=E6=8B=BC=E6=8E=A5=E5=85=A8=E9=87=8F=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=E5=86=8D=E8=BE=93=E5=87=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 metagpt/utils/save_code.py            | 40 +++++++++++++++++++++++++++
 tests/metagpt/utils/test_save_code.py | 30 ++++++++++++++++++++
 2 files changed, 70 insertions(+)
 create mode 100644 metagpt/utils/save_code.py
 create mode 100644 tests/metagpt/utils/test_save_code.py

diff --git a/metagpt/utils/save_code.py b/metagpt/utils/save_code.py
new file mode 100644
index 000000000..b0720a5cf
--- /dev/null
+++ b/metagpt/utils/save_code.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+# @Date    : 12/12/2023 4:14 PM
+# @Author  : stellahong (stellahong@fuzhi.ai)
+# @Desc    :
+import os
+import json
+
+from metagpt.const import DATA_PATH
+
+def save_code_file(name: str, code_context: str, file_format: str = "py") -> None:
+    """
+    Save code files to a specified path.
+
+    Args:
+    - name (str): The name of the folder to save the files.
+    - code_context (str): The code content.
+    - file_format (str, optional): The file format, supports 'py' (Python file) and 'json' (JSON file). Default is 'py'.
+
+    Returns:
+    - None
+    """
+    # Create the folder path if it doesn't exist
+    os.makedirs(name=DATA_PATH / "output" / f"{name}", exist_ok=True)
+
+    # Choose to save as a Python file or a JSON file based on the file format
+    file_path = DATA_PATH / "output" / f"{name}/code.{file_format}"
+    if file_format == "py":
+        with open(file_path, "w", encoding="utf-8") as fp:
+            fp.write(code_context + "\n\n")
+    elif file_format == "json":
+        # Parse the code content as JSON and save
+        data = {"code": code_context}
+        with open(file_path, "w", encoding="utf-8") as fp:
+            json.dump(data, fp, indent=2)
+    else:
+        raise ValueError("Unsupported file format. Please choose 'py' or 'json'.")
+
+
+
+
diff --git a/tests/metagpt/utils/test_save_code.py b/tests/metagpt/utils/test_save_code.py
new file mode 100644
index 000000000..33addb2bf
--- /dev/null
+++ b/tests/metagpt/utils/test_save_code.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+# @Date    : 12/12/2023 4:17 PM
+# @Author  : stellahong (stellahong@fuzhi.ai)
+# @Desc    :
+import os
+import json
+
+from metagpt.utils.save_code import save_code_file, DATA_PATH
+
+
+def test_save_code_file_python():
+    save_code_file("example", "print('Hello, World!')")
+    file_path = DATA_PATH / "output" / "example" / "code.py"
+    assert os.path.exists(file_path), f"File does not exist: {file_path}"
+
+
+def test_save_code_file_python():
+    save_code_file("example", "print('Hello, World!')")
+    file_path = DATA_PATH / "output" / "example" / "code.py"
+    with open(file_path, "r", encoding="utf-8") as fp:
+        content = fp.read()
+    assert "print('Hello, World!')" in content, "File content does not match"
+
+def test_save_code_file_json():
+    save_code_file("example_json", "print('Hello, JSON!')", file_format="json")
+    file_path = DATA_PATH / "output" / "example_json" / "code.json"
+    with open(file_path, "r", encoding="utf-8") as fp:
+        data = json.load(fp)
+    assert "code" in data, "JSON key 'code' is missing"
+    assert data["code"] == "print('Hello, JSON!')", "JSON content does not match"

From 35c9d744a46b8f0ad75512ebf6bf51537de089a9 Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Tue, 12 Dec 2023 16:29:35 +0800
Subject: [PATCH 09/34] update gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index e03eab3d3..d36fbb856 100644
--- a/.gitignore
+++ b/.gitignore
@@ -164,3 +164,4 @@ tmp
 output.wav
 metagpt/roles/idea_agent.py
 .aider*
+/config/config.yaml

From a4cef261e07b380bd55856bef752e380c82f238b Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Tue, 12 Dec 2023 17:17:40 +0800
Subject: [PATCH 10/34] =?UTF-8?q?update:=20=E6=B7=BB=E5=8A=A0nb=E6=94=AF?=
 =?UTF-8?q?=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore                            |  1 +
 metagpt/roles/ml_engineer.py          |  2 +-
 metagpt/utils/save_code.py            |  4 ++++
 tests/metagpt/utils/test_save_code.py | 26 ++++++++++++++++++++++++++
 4 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index d36fbb856..5f8e400e3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -165,3 +165,4 @@ output.wav
 metagpt/roles/idea_agent.py
 .aider*
 /config/config.yaml
+/tests/metagpt/actions/check_data.py
diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index fe6f81841..08451ec89 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -93,7 +93,7 @@ class MLEngineer(Role):
         summary = await SummarizeAnalysis().run(self.plan)
         rsp = Message(content=summary, cause_by=SummarizeAnalysis)
         self._rc.memory.add(rsp)
-
+        
         return rsp
 
     async def _write_and_exec_code(self, max_retry: int = 3):
diff --git a/metagpt/utils/save_code.py b/metagpt/utils/save_code.py
index b0720a5cf..f1fdf0403 100644
--- a/metagpt/utils/save_code.py
+++ b/metagpt/utils/save_code.py
@@ -5,6 +5,8 @@
 import os
 import json
 
+import nbformat
+
 from metagpt.const import DATA_PATH
 
 def save_code_file(name: str, code_context: str, file_format: str = "py") -> None:
@@ -32,6 +34,8 @@ def save_code_file(name: str, code_context: str, file_format: str = "py") -> Non
         data = {"code": code_context}
         with open(file_path, "w", encoding="utf-8") as fp:
             json.dump(data, fp, indent=2)
+    elif file_format == "ipynb":
+         nbformat.write(code_context, file_path)
     else:
         raise ValueError("Unsupported file format. Please choose 'py' or 'json'.")
 
diff --git a/tests/metagpt/utils/test_save_code.py b/tests/metagpt/utils/test_save_code.py
index 33addb2bf..60a9e1ff4 100644
--- a/tests/metagpt/utils/test_save_code.py
+++ b/tests/metagpt/utils/test_save_code.py
@@ -2,8 +2,13 @@
 # @Date    : 12/12/2023 4:17 PM
 # @Author  : stellahong (stellahong@fuzhi.ai)
 # @Desc    :
+import pytest
 import os
 import json
+import nbformat
+
+from metagpt.actions.write_analysis_code import WriteCodeByGenerate
+from metagpt.actions.execute_code import ExecutePyCode
 
 from metagpt.utils.save_code import save_code_file, DATA_PATH
 
@@ -21,6 +26,7 @@ def test_save_code_file_python():
         content = fp.read()
     assert "print('Hello, World!')" in content, "File content does not match"
 
+
 def test_save_code_file_json():
     save_code_file("example_json", "print('Hello, JSON!')", file_format="json")
     file_path = DATA_PATH / "output" / "example_json" / "code.json"
@@ -28,3 +34,23 @@ def test_save_code_file_json():
         data = json.load(fp)
     assert "code" in data, "JSON key 'code' is missing"
     assert data["code"] == "print('Hello, JSON!')", "JSON content does not match"
+
+
+
+@pytest.mark.asyncio
+async def test_save_code_file_notebook():
+    code = await WriteCodeByGenerate().run(
+        context="basic python, hello world", plan="", code_steps="", temperature=0.0
+    )
+    executor = ExecutePyCode()
+    await executor.run(code)
+    # Save as a Notebook file
+    save_code_file("example_nb", executor.nb, file_format="ipynb")
+    file_path = DATA_PATH / "output" / "example_nb" / "code.ipynb"
+    assert os.path.exists(file_path), f"Notebook file does not exist: {file_path}"
+
+    # Additional checks specific to notebook format
+    notebook = nbformat.read(file_path, as_version=4)
+    assert len(notebook.cells) > 0, "Notebook should have at least one cell"
+    first_cell_source = notebook.cells[0].source
+    assert "print('Hello, World!')" in first_cell_source, "Notebook cell content does not match"

From 8db5f22105b344eeebbe7df2281f9f062fd8fa0a Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Tue, 12 Dec 2023 17:26:15 +0800
Subject: [PATCH 11/34] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=B3=A8=E9=87=8A?=
 =?UTF-8?q?=E5=92=8C=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 metagpt/utils/save_code.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/metagpt/utils/save_code.py b/metagpt/utils/save_code.py
index f1fdf0403..96c310336 100644
--- a/metagpt/utils/save_code.py
+++ b/metagpt/utils/save_code.py
@@ -16,7 +16,8 @@ def save_code_file(name: str, code_context: str, file_format: str = "py") -> Non
     Args:
     - name (str): The name of the folder to save the files.
     - code_context (str): The code content.
-    - file_format (str, optional): The file format, supports 'py' (Python file) and 'json' (JSON file). Default is 'py'.
+    - file_format (str, optional): The file format. Supports 'py' (Python file), 'json' (JSON file), and 'ipynb' (Jupyter Notebook file). Default is 'py'.
+
 
     Returns:
     - None
@@ -37,7 +38,7 @@ def save_code_file(name: str, code_context: str, file_format: str = "py") -> Non
     elif file_format == "ipynb":
          nbformat.write(code_context, file_path)
     else:
-        raise ValueError("Unsupported file format. Please choose 'py' or 'json'.")
+        raise ValueError("Unsupported file format. Please choose 'py', 'json', or 'ipynb'.")
 
 
 

From 7c1809af1ef39f5cc134870d03b2e5603d885789 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Tue, 12 Dec 2023 22:35:06 +0800
Subject: [PATCH 12/34] support more forms of task generation

---
 metagpt/actions/write_plan.py | 10 +++++++++-
 metagpt/roles/ml_engineer.py  | 21 +++++++++++++++------
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/metagpt/actions/write_plan.py b/metagpt/actions/write_plan.py
index f7ca1ff4c..11a3f3e1e 100644
--- a/metagpt/actions/write_plan.py
+++ b/metagpt/actions/write_plan.py
@@ -13,6 +13,7 @@ from metagpt.actions import Action
 from metagpt.prompts.ml_engineer import ASSIGN_TASK_TYPE_PROMPT, ASSIGN_TASK_TYPE
 from metagpt.schema import Message, Task, Plan
 from metagpt.utils.common import CodeParser, create_func_config
+from metagpt.logs import logger
 
 
 class WritePlan(Action):
@@ -22,6 +23,7 @@ class WritePlan(Action):
     # Task:
     Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to __max_tasks__ tasks.
     If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.
+    If you encounter errors on the current task, revise and output the current single task only.
     Output a list of jsons following the format:
     ```json
     [
@@ -76,7 +78,13 @@ def rsp_to_tasks(rsp: str) -> List[Task]:
 
 def update_plan_from_rsp(rsp: str, current_plan: Plan):
     tasks = rsp_to_tasks(rsp)
-    if len(tasks) == 1:
+    if len(tasks) == 1 or tasks[0].dependent_task_ids:
+        if tasks[0].dependent_task_ids and len(tasks) > 1:
+            # tasks[0].dependent_task_ids means the generated tasks are not a complete plan
+            # for they depend on tasks in the current plan, in this case, we only support updating one task each time
+            logger.warning(
+                "Current plan will take only the first generated task if the generated tasks are not a complete plan"
+            )
         # handle a single task
         if current_plan.has_task_id(tasks[0].task_id):
             # replace an existing task
diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index fe6f81841..de649e857 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -28,7 +28,7 @@ class MLEngineer(Role):
 
         self.plan = Plan(goal=goal)
         self.use_tools = False
-        self.use_code_steps = True
+        self.use_code_steps = False
         self.execute_code = ExecutePyCode()
         self.auto_run = auto_run
 
@@ -64,6 +64,11 @@ class MLEngineer(Role):
             # ask for acceptance, users can other refuse and change tasks in the plan
             review, task_result_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
 
+            if self.auto_run:
+                # if human confirms the task result, then we deem the task completed, regardless of whether the code run succeeds;
+                # if auto mode, then the code run has to succeed for the task to be considered completed
+                task_result_confirmed = success
+
             if task_result_confirmed:
                 # tick off this task and record progress
                 task.code = code
@@ -143,7 +148,7 @@ class MLEngineer(Role):
             if not success and counter >= max_retry:
                 logger.info("coding failed!")
                 review, _ = await self._ask_review(auto_run=False, trigger=ReviewConst.CODE_REVIEW_TRIGGER)
-                if ReviewConst.CHANGE_WORD in review:
+                if ReviewConst.CHANGE_WORD[0] in review:
                     counter = 0  # redo the task again with help of human suggestions
 
         return code, result, success, code_steps
@@ -199,9 +204,12 @@ class MLEngineer(Role):
         # TODO dataset description , code steps
         user_requirement = self.plan.goal
         data_desc = self.plan.context
-        tasks = json.dumps(
-            [task.dict() for task in self.plan.tasks], indent=4, ensure_ascii=False
-        )
+        tasks = [task.dict() for task in self.plan.tasks]
+        for task in tasks:
+            # Shorten the context as we don't need code steps after we get the codes.
+            # This doesn't affect current_task below, which should hold the code steps
+            task.pop("code_steps")
+        tasks = json.dumps(tasks, indent=4, ensure_ascii=False)
         current_task = self.plan.current_task.json() if self.plan.current_task else {}
         context = STRUCTURAL_CONTEXT.format(
             user_requirement=user_requirement, data_desc=data_desc, tasks=tasks, current_task=current_task
@@ -219,7 +227,8 @@ if __name__ == "__main__":
     # requirement = "Run data analysis on sklearn Diabetes dataset, include a plot"
     # requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy"
     # requirement = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy"
-    requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv"
+    # requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv"
+    requirement = "This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: 'workspace/house-prices-advanced-regression-techniques/split_train.csv', eval data path: 'workspace/house-prices-advanced-regression-techniques/split_eval.csv'."
 
     async def main(requirement: str = requirement, auto_run: bool = False):
         role = MLEngineer(goal=requirement, auto_run=auto_run)

From 0147e0bb534ab487dcbdbc52cce938c62893f4be Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Wed, 13 Dec 2023 10:29:50 +0800
Subject: [PATCH 13/34] add ignore

---
 .gitignore         |   1 +
 config/config.yaml | 100 ---------------------------------------------
 2 files changed, 1 insertion(+), 100 deletions(-)
 delete mode 100644 config/config.yaml

diff --git a/.gitignore b/.gitignore
index 5f8e400e3..f79581de4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -166,3 +166,4 @@ metagpt/roles/idea_agent.py
 .aider*
 /config/config.yaml
 /tests/metagpt/actions/check_data.py
+/config/config.yaml
diff --git a/config/config.yaml b/config/config.yaml
deleted file mode 100644
index bf998def7..000000000
--- a/config/config.yaml
+++ /dev/null
@@ -1,100 +0,0 @@
-# DO NOT MODIFY THIS FILE, create a new key.yaml, define OPENAI_API_KEY.
-# The configuration of key.yaml has a higher priority and will not enter git
-
-#### if OpenAI
-## The official OPENAI_API_BASE is https://api.openai.com/v1
-## If the official OPENAI_API_BASE is not available, we recommend using the [openai-forward](https://github.com/beidongjiedeguang/openai-forward).
-## Or, you can configure OPENAI_PROXY to access official OPENAI_API_BASE.
-OPENAI_API_BASE: "https://api.openai.com/v1"
-#OPENAI_PROXY: "http://127.0.0.1:8118"
-#OPENAI_API_KEY: "YOUR_API_KEY"   # set the value to sk-xxx if you host the openai interface for open llm model
-OPENAI_API_MODEL: "gpt-4"
-MAX_TOKENS: 1500
-RPM: 10
-
-#### if Spark
-#SPARK_APPID : "YOUR_APPID"
-#SPARK_API_SECRET : "YOUR_APISecret"
-#SPARK_API_KEY : "YOUR_APIKey"
-#DOMAIN : "generalv2"
-#SPARK_URL : "ws://spark-api.xf-yun.com/v2.1/chat"
-
-#### if Anthropic
-#Anthropic_API_KEY: "YOUR_API_KEY"
-
-#### if AZURE, check https://github.com/openai/openai-cookbook/blob/main/examples/azure/chat.ipynb
-#### You can use ENGINE or DEPLOYMENT mode
-#OPENAI_API_TYPE: "azure"
-#OPENAI_API_BASE: "YOUR_AZURE_ENDPOINT"
-#OPENAI_API_KEY: "YOUR_AZURE_API_KEY"
-#OPENAI_API_VERSION: "YOUR_AZURE_API_VERSION"
-#DEPLOYMENT_NAME: "YOUR_DEPLOYMENT_NAME"
-#DEPLOYMENT_ID: "YOUR_DEPLOYMENT_ID"
-
-#### if zhipuai from `https://open.bigmodel.cn`. You can set here or export API_KEY="YOUR_API_KEY"
-# ZHIPUAI_API_KEY: "YOUR_API_KEY"
-
-#### for Search
-
-## Supported values: serpapi/google/serper/ddg
-#SEARCH_ENGINE: serpapi
-
-## Visit https://serpapi.com/ to get key.
-#SERPAPI_API_KEY: "YOUR_API_KEY"
-
-## Visit https://console.cloud.google.com/apis/credentials to get key.
-#GOOGLE_API_KEY: "YOUR_API_KEY"
-## Visit https://programmablesearchengine.google.com/controlpanel/create to get id.
-#GOOGLE_CSE_ID: "YOUR_CSE_ID"
-
-## Visit https://serper.dev/ to get key.
-#SERPER_API_KEY: "YOUR_API_KEY"
-
-#### for web access
-
-## Supported values: playwright/selenium
-#WEB_BROWSER_ENGINE: playwright
-
-## Supported values: chromium/firefox/webkit, visit https://playwright.dev/python/docs/api/class-browsertype
-##PLAYWRIGHT_BROWSER_TYPE: chromium
-
-## Supported values: chrome/firefox/edge/ie, visit https://www.selenium.dev/documentation/webdriver/browsers/
-# SELENIUM_BROWSER_TYPE: chrome
-
-#### for TTS
-
-#AZURE_TTS_SUBSCRIPTION_KEY: "YOUR_API_KEY"
-#AZURE_TTS_REGION: "eastus"
-
-#### for Stable Diffusion
-## Use SD service, based on https://github.com/AUTOMATIC1111/stable-diffusion-webui
-SD_URL: "YOUR_SD_URL"
-SD_T2I_API: "/sdapi/v1/txt2img"
-
-#### for Execution
-#LONG_TERM_MEMORY: false
-
-#### for Mermaid CLI
-## If you installed mmdc (Mermaid CLI) only for metagpt then enable the following configuration.
-#PUPPETEER_CONFIG: "./config/puppeteer-config.json"
-#MMDC: "./node_modules/.bin/mmdc"
-
-
-### for calc_usage
-# CALC_USAGE: false
-
-### for Research
-MODEL_FOR_RESEARCHER_SUMMARY: gpt-3.5-turbo
-MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k
-
-### choose the engine for mermaid conversion, 
-# default is nodejs, you can change it to playwright,pyppeteer or ink
-# MERMAID_ENGINE: nodejs
-
-### browser path for pyppeteer engine, support Chrome, Chromium,MS Edge
-#PYPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable"
-
-PROMPT_FORMAT: json #json or markdown
-
-# KAGGLE_USERNAME: ""
-# KAGGLE_KEY: ""
\ No newline at end of file

From 32c4a557556a6e23afa18ea1a316169cd858e7dd Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Wed, 13 Dec 2023 12:54:50 +0800
Subject: [PATCH 14/34] add save code

---
 metagpt/roles/ml_engineer.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index 08451ec89..d679b2e01 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -1,13 +1,11 @@
-from typing import Dict, List, Union
+from typing import  List
 import json
-import subprocess
+from datetime import datetime
 
 import fire
-import re
 
 from metagpt.roles import Role
-from metagpt.actions import Action
-from metagpt.schema import Message, Task, Plan
+from metagpt.schema import Message, Plan
 from metagpt.memory import Memory
 from metagpt.logs import logger
 from metagpt.actions.write_plan import WritePlan, update_plan_from_rsp, precheck_update_plan_from_rsp
@@ -17,6 +15,7 @@ from metagpt.actions.execute_code import ExecutePyCode
 from metagpt.roles.kaggle_manager import DownloadData, SubmitResult
 from metagpt.prompts.ml_engineer import STRUCTURAL_CONTEXT
 from metagpt.actions.write_code_steps import WriteCodeSteps
+from metagpt.utils.save_code import save_code_file
 
 class MLEngineer(Role):
     def __init__(
@@ -93,7 +92,10 @@ class MLEngineer(Role):
         summary = await SummarizeAnalysis().run(self.plan)
         rsp = Message(content=summary, cause_by=SummarizeAnalysis)
         self._rc.memory.add(rsp)
-        
+
+        # save code using datetime.now or  keywords related to the goal of your project (plan.goal).
+        project_record = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+        save_code_file(name=project_record, code_context=self.execute_code.nb, file_format="ipynb")
         return rsp
 
     async def _write_and_exec_code(self, max_retry: int = 3):

From 2e4094c7a798f15f42ec3d85fc87395e4260d352 Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Wed, 13 Dec 2023 12:56:54 +0800
Subject: [PATCH 15/34] test auto mode

---
 .gitignore                   | 1 -
 metagpt/roles/ml_engineer.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index f79581de4..5f8e400e3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -166,4 +166,3 @@ metagpt/roles/idea_agent.py
 .aider*
 /config/config.yaml
 /tests/metagpt/actions/check_data.py
-/config/config.yaml
diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index d679b2e01..8b7b72517 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -223,7 +223,7 @@ if __name__ == "__main__":
     # requirement = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy"
     requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv"
 
-    async def main(requirement: str = requirement, auto_run: bool = False):
+    async def main(requirement: str = requirement, auto_run: bool = True):
         role = MLEngineer(goal=requirement, auto_run=auto_run)
         await role.run(requirement)
 

From f81f355ff24378701c17de6d0c7260ad649fbf54 Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Wed, 13 Dec 2023 13:01:32 +0800
Subject: [PATCH 16/34] add default config.yaml

---
 .gitignore                   |  1 -
 config/config.yaml           | 97 ++++++++++++++++++++++++++++++++++++
 metagpt/roles/ml_engineer.py |  4 +-
 3 files changed, 99 insertions(+), 3 deletions(-)
 create mode 100644 config/config.yaml

diff --git a/.gitignore b/.gitignore
index 5f8e400e3..9b679d48a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -164,5 +164,4 @@ tmp
 output.wav
 metagpt/roles/idea_agent.py
 .aider*
-/config/config.yaml
 /tests/metagpt/actions/check_data.py
diff --git a/config/config.yaml b/config/config.yaml
new file mode 100644
index 000000000..bed67083c
--- /dev/null
+++ b/config/config.yaml
@@ -0,0 +1,97 @@
+# DO NOT MODIFY THIS FILE, create a new key.yaml, define OPENAI_API_KEY.
+# The configuration of key.yaml has a higher priority and will not enter git
+
+#### if OpenAI
+## The official OPENAI_API_BASE is https://api.openai.com/v1
+## If the official OPENAI_API_BASE is not available, we recommend using the [openai-forward](https://github.com/beidongjiedeguang/openai-forward).
+## Or, you can configure OPENAI_PROXY to access official OPENAI_API_BASE.
+OPENAI_API_BASE: "https://api.openai.com/v1"
+#OPENAI_PROXY: "http://127.0.0.1:8118"
+#OPENAI_API_KEY: "YOUR_API_KEY"   # set the value to sk-xxx if you host the openai interface for open llm model
+OPENAI_API_MODEL: "gpt-4"
+MAX_TOKENS: 1500
+RPM: 10
+
+#### if Spark
+#SPARK_APPID : "YOUR_APPID"
+#SPARK_API_SECRET : "YOUR_APISecret"
+#SPARK_API_KEY : "YOUR_APIKey"
+#DOMAIN : "generalv2"
+#SPARK_URL : "ws://spark-api.xf-yun.com/v2.1/chat"
+
+#### if Anthropic
+#Anthropic_API_KEY: "YOUR_API_KEY"
+
+#### if AZURE, check https://github.com/openai/openai-cookbook/blob/main/examples/azure/chat.ipynb
+#### You can use ENGINE or DEPLOYMENT mode
+#OPENAI_API_TYPE: "azure"
+#OPENAI_API_BASE: "YOUR_AZURE_ENDPOINT"
+#OPENAI_API_KEY: "YOUR_AZURE_API_KEY"
+#OPENAI_API_VERSION: "YOUR_AZURE_API_VERSION"
+#DEPLOYMENT_NAME: "YOUR_DEPLOYMENT_NAME"
+#DEPLOYMENT_ID: "YOUR_DEPLOYMENT_ID"
+
+#### if zhipuai from `https://open.bigmodel.cn`. You can set here or export API_KEY="YOUR_API_KEY"
+# ZHIPUAI_API_KEY: "YOUR_API_KEY"
+
+#### for Search
+
+## Supported values: serpapi/google/serper/ddg
+#SEARCH_ENGINE: serpapi
+
+## Visit https://serpapi.com/ to get key.
+#SERPAPI_API_KEY: "YOUR_API_KEY"
+
+## Visit https://console.cloud.google.com/apis/credentials to get key.
+#GOOGLE_API_KEY: "YOUR_API_KEY"
+## Visit https://programmablesearchengine.google.com/controlpanel/create to get id.
+#GOOGLE_CSE_ID: "YOUR_CSE_ID"
+
+## Visit https://serper.dev/ to get key.
+#SERPER_API_KEY: "YOUR_API_KEY"
+
+#### for web access
+
+## Supported values: playwright/selenium
+#WEB_BROWSER_ENGINE: playwright
+
+## Supported values: chromium/firefox/webkit, visit https://playwright.dev/python/docs/api/class-browsertype
+##PLAYWRIGHT_BROWSER_TYPE: chromium
+
+## Supported values: chrome/firefox/edge/ie, visit https://www.selenium.dev/documentation/webdriver/browsers/
+# SELENIUM_BROWSER_TYPE: chrome
+
+#### for TTS
+
+#AZURE_TTS_SUBSCRIPTION_KEY: "YOUR_API_KEY"
+#AZURE_TTS_REGION: "eastus"
+
+#### for Stable Diffusion
+## Use SD service, based on https://github.com/AUTOMATIC1111/stable-diffusion-webui
+SD_URL: "YOUR_SD_URL"
+SD_T2I_API: "/sdapi/v1/txt2img"
+
+#### for Execution
+#LONG_TERM_MEMORY: false
+
+#### for Mermaid CLI
+## If you installed mmdc (Mermaid CLI) only for metagpt then enable the following configuration.
+#PUPPETEER_CONFIG: "./config/puppeteer-config.json"
+#MMDC: "./node_modules/.bin/mmdc"
+
+
+### for calc_usage
+# CALC_USAGE: false
+
+### for Research
+MODEL_FOR_RESEARCHER_SUMMARY: gpt-3.5-turbo
+MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k
+
+### choose the engine for mermaid conversion, 
+# default is nodejs, you can change it to playwright,pyppeteer or ink
+# MERMAID_ENGINE: nodejs
+
+### browser path for pyppeteer engine, support Chrome, Chromium,MS Edge
+#PYPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable"
+
+PROMPT_FORMAT: json #json or markdown
\ No newline at end of file
diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index 8b7b72517..c3f1bd669 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -217,11 +217,11 @@ class MLEngineer(Role):
 
 
 if __name__ == "__main__":
-    # requirement = "Run data analysis on sklearn Iris dataset, include a plot"
+    requirement = "Run data analysis on sklearn Iris dataset, include a plot"
     # requirement = "Run data analysis on sklearn Diabetes dataset, include a plot"
     # requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy"
     # requirement = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy"
-    requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv"
+    # requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv"
 
     async def main(requirement: str = requirement, auto_run: bool = True):
         role = MLEngineer(goal=requirement, auto_run=auto_run)

From 92d59ea31bb7bcb563d2fdd94cd6b6af64963aa7 Mon Sep 17 00:00:00 2001
From: yzlin <yzlin@fuzhi.ai>
Date: Wed, 13 Dec 2023 13:48:18 +0800
Subject: [PATCH 17/34] save code steps early

---
 metagpt/actions/write_analysis_code.py |  7 ++-----
 metagpt/roles/ml_engineer.py           | 11 +++++------
 metagpt/schema.py                      |  2 +-
 3 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py
index 1127dc78b..7e6483371 100644
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@@ -23,9 +23,7 @@ from metagpt.utils.common import create_func_config
 
 
 class BaseWriteAnalysisCode(Action):
-    async def run(
-        self, context: List[Message], plan: Plan = None, task_guide: str = ""
-    ) -> str:
+    async def run(self, context: List[Message], plan: Plan = None) -> str:
         """Run of a code writing action, used in data analysis or modeling
 
         Args:
@@ -85,7 +83,6 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
         self,
         context: [List[Message]],
         plan: Plan = None,
-        code_steps: str = "",
         system_msg: str = None,
         **kwargs,
     ) -> str:
@@ -155,11 +152,11 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
         self,
         context: List[Message],
         plan: Plan = None,
-        code_steps: str = "",
         data_desc: str = "",
     ) -> str:
         task_type = plan.current_task.task_type
         task = plan.current_task.instruction
+        code_steps = plan.current_task.code_steps
         available_tools = registry.get_all_schema_by_module(task_type)
         available_tools = [
             {k: tool[k] for k in ["name", "description"] if k in tool}
diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index de649e857..3260dd43f 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -59,7 +59,7 @@ class MLEngineer(Role):
             logger.info(f"ready to take on task {task}")
 
             # take on current task
-            code, result, success, code_steps = await self._write_and_exec_code()
+            code, result, success = await self._write_and_exec_code()
 
             # ask for acceptance, users can other refuse and change tasks in the plan
             review, task_result_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
@@ -73,7 +73,6 @@ class MLEngineer(Role):
                 # tick off this task and record progress
                 task.code = code
                 task.result = result
-                task.code_steps = code_steps
                 self.plan.finish_current_task()
                 self.working_memory.clear()
 
@@ -102,7 +101,7 @@ class MLEngineer(Role):
         return rsp
 
     async def _write_and_exec_code(self, max_retry: int = 3):
-        code_steps = (
+        self.plan.current_task.code_steps = (
             await WriteCodeSteps().run(self.plan)
             if self.use_code_steps
             else ""
@@ -121,12 +120,12 @@ class MLEngineer(Role):
             if not self.use_tools or self.plan.current_task.task_type == "other":
                 # code = "print('abc')"
                 code = await WriteCodeByGenerate().run(
-                    context=context, plan=self.plan, code_steps=code_steps, temperature=0.0
+                    context=context, plan=self.plan, temperature=0.0
                 )
                 cause_by = WriteCodeByGenerate
             else:
                 code = await WriteCodeWithTools().run(
-                    context=context, plan=self.plan, code_steps=code_steps, data_desc=""
+                    context=context, plan=self.plan, data_desc=""
                 )
                 cause_by = WriteCodeWithTools
 
@@ -151,7 +150,7 @@ class MLEngineer(Role):
                 if ReviewConst.CHANGE_WORD[0] in review:
                     counter = 0  # redo the task again with help of human suggestions
 
-        return code, result, success, code_steps
+        return code, result, success
 
     async def _ask_review(self, auto_run: bool = None, trigger: str = ReviewConst.TASK_REVIEW_TRIGGER):
         auto_run = auto_run or self.auto_run
diff --git a/metagpt/schema.py b/metagpt/schema.py
index f91922535..8eb7e31ca 100644
--- a/metagpt/schema.py
+++ b/metagpt/schema.py
@@ -78,10 +78,10 @@ class Task(BaseModel):
     dependent_task_ids: list[str] = [] # Tasks prerequisite to this Task
     instruction: str = ""
     task_type: str = ""
+    code_steps: str = ""
     code: str = ""
     result: str = ""
     is_finished: bool = False
-    code_steps: str = ""
 
 
 class Plan(BaseModel):

From 05ae935d8cfaef957c539ce1c3a6ebcb21d40ad8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Wed, 13 Dec 2023 15:55:04 +0800
Subject: [PATCH 18/34] fix truncate.

---
 metagpt/actions/execute_code.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/metagpt/actions/execute_code.py b/metagpt/actions/execute_code.py
index 1d20bf3f6..36e01ed0e 100644
--- a/metagpt/actions/execute_code.py
+++ b/metagpt/actions/execute_code.py
@@ -186,14 +186,13 @@ class ExecutePyCode(ExecuteCode, Action):
 def truncate(result: str, keep_len: int = 2000) -> str:
     desc = f"Truncated to show only the last {keep_len} characters\n"
     if result.startswith(desc):
-        result = result[-len(desc) :]
+        result = result[len(desc) :]
 
     if len(result) > keep_len:
         result = result[-keep_len:]
-
-    if not result.startswith(desc):
         return desc + result
-    return desc
+
+    return result
 
 
 def remove_escape_and_color_codes(input_str):

From cfbf1630841e05d07d6b537e736dbcf28e349622 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Wed, 13 Dec 2023 15:55:30 +0800
Subject: [PATCH 19/34] add test for truncate.

---
 tests/metagpt/actions/test_execute_code.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/metagpt/actions/test_execute_code.py b/tests/metagpt/actions/test_execute_code.py
index 73b5886dc..95f883e12 100644
--- a/tests/metagpt/actions/test_execute_code.py
+++ b/tests/metagpt/actions/test_execute_code.py
@@ -1,6 +1,6 @@
 import pytest
 
-from metagpt.actions.execute_code import ExecutePyCode
+from metagpt.actions.execute_code import ExecutePyCode, truncate
 from metagpt.schema import Message
 
 
@@ -81,3 +81,10 @@ async def test_plotting_bug():
     pi = ExecutePyCode()
     output = await pi.run(code)
     assert output[1] is True
+
+
+def test_truncate():
+    output = "hello world"
+    assert truncate(output) == output
+    output = "hello world"
+    assert truncate(output, 5) == "Truncated to show only the last 5 characters\nworld"

From abbaa6afa95e7fcada42df8a299f1dd3a7cc97c5 Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Wed, 13 Dec 2023 17:03:56 +0800
Subject: [PATCH 20/34] refine prompt

---
 metagpt/prompts/ml_engineer.py | 36 ++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/metagpt/prompts/ml_engineer.py b/metagpt/prompts/ml_engineer.py
index 2d2d3315a..f2412c35b 100644
--- a/metagpt/prompts/ml_engineer.py
+++ b/metagpt/prompts/ml_engineer.py
@@ -155,46 +155,51 @@ PRINT_DATA_COLUMNS = {
 
 GENERATE_CODE_PROMPT = """
 # Background
-Assist in completing [{user_requirement}] in a Jupyter notebook.
+As a data scientist, you need to help user to achieve their goal [{user_requirement}] step-by-step in an continuous Jupyter notebook.
 
-## Task Progress
-### Done Tasks
+## Done Tasks
 ```python
 {history_code}
 ```end
 
-### Current Task
+## Current Task
 {current_task}
 
-## Latest Data Info
+# Latest Data Info
+Latest data info after previous tasks:
 {column_info}
 
 # Task
-Fully implement 'Current Task', ensuring all necessary steps are covered without repeating code from 'Done Tasks'. Specifically, {special_prompt}
+Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
+Specifically, {special_prompt}
 
 # Code Steps:
 Follow steps below when you writing code if it's convenient.
 {code_steps}
+
+# Constraints:
+- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
 """
 
 TOOL_USAGE_PROMPT = """
 # Background
-Assist in completing [{user_requirement}] in a Jupyter notebook.
+As a data scientist, you need to help user to achieve their goal [{user_requirement}] step-by-step in an continuous Jupyter notebook.
 
-## Task Progress
-### Done Tasks
+## Done Tasks
 ```python
 {history_code}
 ```end
 
-### Current Task
+## Current Task
 {current_task}
 
-## Latest Data Info
+# Latest Data Info
+Latest data info after previous tasks:
 {column_info}
 
 # Task
-Fully implement 'Current Task', ensuring all necessary steps are covered without repeating code from 'Done Tasks'. Specifically, {special_prompt}
+Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
+Specifically, {special_prompt}
 
 # Code Steps:
 Follow steps below when you writing code if it's convenient.
@@ -205,11 +210,11 @@ Follow steps below when you writing code if it's convenient.
 - You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
 
 # Available Tools:
-Each Class tool is described in JSON format. When you call it, import the tool from `{module_name}` first.
+Each Class tool is described in JSON format. When you call a tool, import the tool from `{module_name}` first.
 {tool_catalog}
 
 # Output Example:
-For "fill missing value and handle outliers", the output code be like when there are training data and test data:
+when current task is "fill missing value and handle outliers", and their are training data and test data, the output code be like:
 ```python
 # Tools used: ['FillMissingValue']
 from metagpt.tools.functions.libs.data_preprocess import FillMissingValue
@@ -229,8 +234,9 @@ for col in num_cols:
 ```end
 
 # Constraints:
+- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
 - Prioritize using pre-defined tools for the same functionality.
-- Copy DataFrame before processing if needed.
+- Always copy the DataFrame before processing it and use the copy to process.
 """
 #- If 'Code Steps' contains step done in 'Done Tasks', such as reading data, don't repeat it.
 

From 4423524734b15fdb9ca8aafb5eefa823d70ba671 Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Wed, 13 Dec 2023 18:11:54 +0800
Subject: [PATCH 21/34] fix schema

---
 .../tools/functions/schemas/feature_engineering.yml    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/metagpt/tools/functions/schemas/feature_engineering.yml b/metagpt/tools/functions/schemas/feature_engineering.yml
index 4f2a7100d..3ba9e863b 100644
--- a/metagpt/tools/functions/schemas/feature_engineering.yml
+++ b/metagpt/tools/functions/schemas/feature_engineering.yml
@@ -53,17 +53,17 @@ PolynomialExpansion:
 
 CatCount:
   type: class
-  description: "Add value counts of categorical columns as new features."
+  description: "Add value counts of a categorical column as new feature."
   methods:
     __init__:
       description: "Initialize self."
       parameters:
         properties:
-          cols:
-            type: list
-            description: "Columns for value counts."
+          col:
+            type: str
+            description: "Column for value counts."
         required:
-          - cols
+          - col
     fit:
       description: "Fit the CatCount model."
       parameters:

From e59bab73b06985fd02cc955002372909a0c571aa Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Wed, 13 Dec 2023 19:36:02 +0800
Subject: [PATCH 22/34] refine prompt

---
 metagpt/prompts/ml_engineer.py | 31 ++++++++++++++++++++++++++-----
 metagpt/roles/ml_engineer.py   |  9 +--------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/metagpt/prompts/ml_engineer.py b/metagpt/prompts/ml_engineer.py
index f2412c35b..05d8db8e9 100644
--- a/metagpt/prompts/ml_engineer.py
+++ b/metagpt/prompts/ml_engineer.py
@@ -174,11 +174,29 @@ Write complete code for 'Current Task'. And avoid duplicating code from 'Done Ta
 Specifically, {special_prompt}
 
 # Code Steps:
-Follow steps below when you writing code if it's convenient.
+Strictly follow steps below when you writing code if it's convenient.
 {code_steps}
 
+# Output Example:
+when current task is "train a lightgbm model on training data", and their are two steps in 'Code Steps', the code be like:
+```python
+# Step 1: check data type and convert to numeric
+ojb_cols = train.select_dtypes(include='object').columns.tolist()
+
+for col in obj_cols:
+    encoder = LabelEncoder()
+    train[col] = encoder.fit_transform(train[col])
+    test[col] = test[col].apply(lambda x: x if x in encoder.classes_ else 'unknown')
+    test[col] = encoder.transform(test[col])
+
+# Step 2: train lightgbm model
+model = LGBMClassifier()
+model.fit(train, y_train)
+```end
+
 # Constraints:
 - Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
+- The output code should contain all steps implemented in 'Code Steps'.
 """
 
 TOOL_USAGE_PROMPT = """
@@ -202,7 +220,7 @@ Write complete code for 'Current Task'. And avoid duplicating code from 'Done Ta
 Specifically, {special_prompt}
 
 # Code Steps:
-Follow steps below when you writing code if it's convenient.
+Strictly follow steps below when you writing code if it's convenient.
 {code_steps}
 
 # Capabilities
@@ -214,8 +232,9 @@ Each Class tool is described in JSON format. When you call a tool, import the to
 {tool_catalog}
 
 # Output Example:
-when current task is "fill missing value and handle outliers", and their are training data and test data, the output code be like:
+when current task is "do data preprocess, like fill missing value, handle outliers, etc.", and their are two steps in 'Code Steps', the code be like:
 ```python
+# Step 1: fill missing value
 # Tools used: ['FillMissingValue']
 from metagpt.tools.functions.libs.data_preprocess import FillMissingValue
 
@@ -227,6 +246,7 @@ fill_missing_value.fit(train_processed)
 train_processed = fill_missing_value.transform(train_processed)
 test_processed = fill_missing_value.transform(test_processed)
 
+# Step 2: handle outliers
 for col in num_cols:
     low, high = train_processed[col].quantile([0.01, 0.99])
     train_processed[col] = train_processed[col].clip(low, high)
@@ -235,8 +255,9 @@ for col in num_cols:
 
 # Constraints:
 - Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
-- Prioritize using pre-defined tools for the same functionality.
+- Always prioritize using pre-defined tools for the same functionality.
 - Always copy the DataFrame before processing it and use the copy to process.
+- The output code should contain all steps implemented correctly in 'Code Steps'.
 """
 #- If 'Code Steps' contains step done in 'Done Tasks', such as reading data, don't repeat it.
 
@@ -266,7 +287,7 @@ The current task is about training a model, please ensure high performance:
 
 MODEL_EVALUATE_PROMPT = """
 The current task is about evaluating a model, please note the following:
-- Ensure that the evaluated data is same processed as the training data.
+- Ensure that the evaluated data is same processed as the training data. If not, remember use object in 'Done Tasks' to transform the data.
 - Use trained model from previous task result directly, do not mock or reload model yourself.
 """
 
diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index c735eb983..6a2a9e2b0 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -32,13 +32,6 @@ STRUCTURAL_CONTEXT = """
 {tasks}
 ## Current Task
 {current_task}
-## Packages Installed
-scikit-learn
-pandas
-numpy
-lightgbm
-xgboost
-catboost
 """
 
 
@@ -212,7 +205,7 @@ class MLEngineer(Role):
             result, success = await self.execute_code.run(code)
             # truncated the result
             print(truncate(result))
-            
+
             self.working_memory.add(
                 Message(content=truncate(remove_escape_and_color_codes(result)), role="user", cause_by=ExecutePyCode)
             )

From 7e6e493499c41c91c56a19a2ebc7ecb329ab6f5f Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Wed, 13 Dec 2023 19:36:31 +0800
Subject: [PATCH 23/34] refine prompt

---
 metagpt/actions/debug_code.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metagpt/actions/debug_code.py b/metagpt/actions/debug_code.py
index 53ca2f54d..58d006a08 100644
--- a/metagpt/actions/debug_code.py
+++ b/metagpt/actions/debug_code.py
@@ -47,7 +47,7 @@ REFLECTION_PROMPT = """
                        [runtime Error]
                        {runtime_result}
 
-                        Analysis the error step by step, provide me improve method and code. Remember to follow [context] requirement.
+                        Analysis the error step by step, provide me improve method and code. Remember to follow [context] rerquirement. Don't forget write code for steps behind the error step.
                         [reflection on previous impl]:
                         xxx
 

From cfb577d6747ba7dca7cea92b7199494a66eb3dfb Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Wed, 13 Dec 2023 20:10:17 +0800
Subject: [PATCH 24/34] rollback config

---
 config/config.yaml | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index 694251f17..17605307a 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -5,7 +5,7 @@
 ## The official OPENAI_API_BASE is https://api.openai.com/v1
 ## If the official OPENAI_API_BASE is not available, we recommend using the [openai-forward](https://github.com/beidongjiedeguang/openai-forward).
 ## Or, you can configure OPENAI_PROXY to access official OPENAI_API_BASE.
-#OPENAI_API_BASE: "https://api.openai.com/v1"
+OPENAI_API_BASE: "https://api.openai.com/v1"
 #OPENAI_PROXY: "http://127.0.0.1:8118"
 #OPENAI_API_KEY: "YOUR_API_KEY"   # set the value to sk-xxx if you host the openai interface for open llm model
 OPENAI_API_MODEL: "gpt-4"
@@ -24,13 +24,12 @@ RPM: 10
 
 #### if AZURE, check https://github.com/openai/openai-cookbook/blob/main/examples/azure/chat.ipynb
 #### You can use ENGINE or DEPLOYMENT mode
-OPENAI_API_TYPE: "azure"
-OPENAI_API_BASE: "https://deepwisdom.openai.azure.com/"
-OPENAI_API_KEY: "02ae6058d09849c691176befeae2107c"
-#OPENAI_API_VERSION: "2023-05-15"
-OPENAI_API_VERSION: "2023-07-01-preview"
-DEPLOYMENT_ID: "GPT-4"
-OPENAI_API_ENGINE: "gpt-4"
+#OPENAI_API_TYPE: "azure"
+#OPENAI_API_BASE: "YOUR_AZURE_ENDPOINT"
+#OPENAI_API_KEY: "YOUR_AZURE_API_KEY"
+#OPENAI_API_VERSION: "YOUR_AZURE_API_VERSION"
+#DEPLOYMENT_NAME: "YOUR_DEPLOYMENT_NAME"
+#DEPLOYMENT_ID: "YOUR_DEPLOYMENT_ID"
 
 #### if zhipuai from `https://open.bigmodel.cn`. You can set here or export API_KEY="YOUR_API_KEY"
 # ZHIPUAI_API_KEY: "YOUR_API_KEY"
@@ -88,7 +87,7 @@ SD_T2I_API: "/sdapi/v1/txt2img"
 MODEL_FOR_RESEARCHER_SUMMARY: gpt-3.5-turbo
 MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k
 
-### choose the engine for mermaid conversion, 
+### choose the engine for mermaid conversion,
 # default is nodejs, you can change it to playwright,pyppeteer or ink
 # MERMAID_ENGINE: nodejs
 

From 8b0b5eeb804402f6a5329b92cdcb6da9e387d59d Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Wed, 13 Dec 2023 20:14:10 +0800
Subject: [PATCH 25/34] fix conflict

---
 metagpt/actions/write_code_steps.py |  1 -
 metagpt/roles/ml_engineer.py        | 48 ++++++++++++-----------------
 2 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/metagpt/actions/write_code_steps.py b/metagpt/actions/write_code_steps.py
index 9e06bc91e..3c08adc19 100644
--- a/metagpt/actions/write_code_steps.py
+++ b/metagpt/actions/write_code_steps.py
@@ -120,6 +120,5 @@ class WriteCodeSteps(Action):
         context = STRUCTURAL_CONTEXT.format(
             user_requirement=user_requirement, tasks=tasks, codes=codes, current_task=current_task
         )
-        print(context)
         # print(context)
         return context
diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index 26dfdbc67..8ab3ac981 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -4,30 +4,26 @@ from datetime import datetime
 
 import fire
 
-from metagpt.roles import Role
-from metagpt.schema import Message, Plan
-from metagpt.memory import Memory
-from metagpt.logs import logger
 from metagpt.actions import Action
-from metagpt.actions.write_plan import WritePlan, update_plan_from_rsp, precheck_update_plan_from_rsp
-from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
-from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis, Reflect, ReviewConst
+from metagpt.actions.debug_code import DebugCode
 from metagpt.actions.execute_code import ExecutePyCode
-from metagpt.roles.kaggle_manager import DownloadData, SubmitResult
-from metagpt.prompts.ml_engineer import STRUCTURAL_CONTEXT
+from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis, Reflect, ReviewConst
+from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
 from metagpt.actions.write_code_steps import WriteCodeSteps
 from metagpt.actions.write_plan import WritePlan
+from metagpt.actions.write_plan import update_plan_from_rsp, precheck_update_plan_from_rsp
 from metagpt.const import DATA_PATH, PROJECT_ROOT
 from metagpt.logs import logger
+from metagpt.memory import Memory
+from metagpt.prompts.ml_engineer import STRUCTURAL_CONTEXT
 from metagpt.prompts.ml_engineer import (
-    GEN_DATA_DESC_PROMPT,
     UPDATE_DATA_COLUMNS,
     PRINT_DATA_COLUMNS
 )
 from metagpt.roles import Role
+from metagpt.roles.kaggle_manager import DownloadData, SubmitResult
 from metagpt.schema import Message, Plan
-from metagpt.utils.common import CodeParser, remove_comments, create_func_config
-from metagpt.actions.debug_code import DebugCode
+from metagpt.utils.common import remove_comments, create_func_config
 from metagpt.utils.save_code import save_code_file
 
 
@@ -103,9 +99,10 @@ class MLEngineer(Role):
                 self.plan.finish_current_task()
                 self.working_memory.clear()
 
-                success, new_code = await self._update_data_columns()
-                if success:
-                    task.code = task.code + "\n\n" + new_code
+                if self.use_tools:
+                    success, new_code = await self._update_data_columns()
+                    if success:
+                        task.code = task.code + "\n\n" + new_code
 
                 confirmed_and_more = (ReviewConst.CONTINUE_WORD[0] in review.lower()
                     and review.lower() not in ReviewConst.CONTINUE_WORD[0])  # "confirm, ... (more content, such as changing downstream tasks)"
@@ -134,9 +131,6 @@ class MLEngineer(Role):
         save_code_file(name=project_record, code_context=self.execute_code.nb, file_format="ipynb")
         return rsp
 
-        time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
-        self.execute_code.save_notebook(f"{DATA_PATH}/notebooks/ml_{time}.ipynb")
-
     async def _update_data_columns(self):
         rsp = await UpdateDataColumns().run(self.plan)
         is_update, code = rsp["is_update"], rsp["code"]
@@ -159,12 +153,6 @@ class MLEngineer(Role):
         success = False
         debug_context = []
 
-        finished_tasks = self.plan.get_finished_tasks()
-        code_context = [task.code for task in finished_tasks]
-        code_result = [task.result for task in finished_tasks]
-        code_context = "\n\n".join(code_context)
-        code_result = "\n\n".join(code_result)
-
         while not success and counter < max_retry:
             context = self.get_useful_memories()
 
@@ -272,16 +260,18 @@ class MLEngineer(Role):
         self.working_memory.add(Message(content=reflection, role="assistant"))
         self.working_memory.add(Message(content=Reflect.REWRITE_PLAN_INSTRUCTION, role="user"))
 
-    def get_useful_memories(self, task_exclude_field: set = None) -> List[Message]:
+    def get_useful_memories(self, task_exclude_field=None) -> List[Message]:
         """find useful memories only to reduce context length and improve performance"""
         # TODO dataset description , code steps
+        if task_exclude_field is None:
+            task_exclude_field = {'code_steps'}
         user_requirement = self.plan.goal
         data_desc = self.plan.context
         tasks = [task.dict(exclude=task_exclude_field) for task in self.plan.tasks]
-        for task in tasks:
-            # Shorten the context as we don't need code steps after we get the codes.
-            # This doesn't affect current_task below, which should hold the code steps
-            task.pop("code_steps")
+        # for task in tasks:
+        #     # Shorten the context as we don't need code steps after we get the codes.
+        #     # This doesn't affect current_task below, which should hold the code steps
+        #     task.pop("code_steps")
         tasks = json.dumps(tasks, indent=4, ensure_ascii=False)
         current_task = self.plan.current_task.json() if self.plan.current_task else {}
         context = STRUCTURAL_CONTEXT.format(

From 7744815c5ff8f61eb90ccee07555c9f7207182bd Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Wed, 13 Dec 2023 20:32:49 +0800
Subject: [PATCH 26/34] fix conflict

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 9b75fd200..2328de2a1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -45,6 +45,7 @@ wrapt==1.15.0
 websocket-client==0.58.0
 zhipuai==1.0.7
 rich==13.6.0
+nbclient==0.9.0
 nbformat==5.9.2
 ipython==8.17.2
 ipykernel==6.27.0

From edd6987a1c4738f27fb1936fa701441145b96869 Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Wed, 13 Dec 2023 20:41:32 +0800
Subject: [PATCH 27/34] drop old tool definition

---
 metagpt/tools/functions/__init__.py           |   3 -
 metagpt/tools/functions/libs/ml_model.py      | 196 ------------------
 metagpt/tools/functions/register/__init__.py  |   6 -
 metagpt/tools/functions/register/register.py  |  78 -------
 metagpt/tools/functions/schemas/base.py       | 100 ---------
 .../functions/schemas/data_preprocess.py      |  67 ------
 .../functions/schemas/feature_engineering.py  | 110 ----------
 metagpt/tools/functions/schemas/ml_model.py   |  55 -----
 8 files changed, 615 deletions(-)
 delete mode 100644 metagpt/tools/functions/libs/ml_model.py
 delete mode 100644 metagpt/tools/functions/register/__init__.py
 delete mode 100644 metagpt/tools/functions/register/register.py
 delete mode 100644 metagpt/tools/functions/schemas/base.py
 delete mode 100644 metagpt/tools/functions/schemas/data_preprocess.py
 delete mode 100644 metagpt/tools/functions/schemas/feature_engineering.py
 delete mode 100644 metagpt/tools/functions/schemas/ml_model.py

diff --git a/metagpt/tools/functions/__init__.py b/metagpt/tools/functions/__init__.py
index 30ee10827..a0a43f507 100644
--- a/metagpt/tools/functions/__init__.py
+++ b/metagpt/tools/functions/__init__.py
@@ -4,6 +4,3 @@
 # @Author  : lidanyang
 # @File    : __init__.py
 # @Desc    :
-from metagpt.tools.functions.register.register import registry
-import metagpt.tools.functions.libs.feature_engineering
-import metagpt.tools.functions.libs.data_preprocess
diff --git a/metagpt/tools/functions/libs/ml_model.py b/metagpt/tools/functions/libs/ml_model.py
deleted file mode 100644
index b669de2c1..000000000
--- a/metagpt/tools/functions/libs/ml_model.py
+++ /dev/null
@@ -1,196 +0,0 @@
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-
-from sklearn.linear_model import LogisticRegression
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.ensemble import GradientBoostingClassifier
-
-
-from sklearn.linear_model import LinearRegression
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.ensemble import GradientBoostingRegressor
-
-from metagpt.tools.functions import registry
-from metagpt.tools.functions.schemas.ml_model import *
-
-
-#########
-## 分类 ##
-#########
-
-
-@registry.register("classification_model", LogisticRegressionClassification)
-def logistic_regression_classification(df, label, test_size=0.2, penalty='l2', dual=False):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-
-    model = LogisticRegression(penalty=penalty, dual=dual)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict_proba(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-@registry.register("classification_model", RandomForestClassification)
-def random_forest_classification(df, label, test_size=0.2, n_estimators=100, criterion='gini'):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-    model = RandomForestClassifier(n_estimators=n_estimators, criterion=criterion)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict_proba(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-@registry.register("classification_model", GradientBoostingClassification)
-def gradient_boosting_classification(df, label, test_size=0.2, n_estimators=100, learning_rate=0.1):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-    model = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=learning_rate)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict_proba(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-
-#########
-## 回归 ##
-#########
-
-
-@registry.register("regression_model", LinearRegressionRegression)
-def linear_regression(df, label, test_size=0.2, ):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-
-    model = LinearRegression()
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-@registry.register("regression_model", RandomForestRegression)
-def random_forest_regression(df, label, test_size=0.2, n_estimators=100, criterion='squared_error'):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-    model = RandomForestRegressor(n_estimators=n_estimators, criterion=criterion)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-@registry.register("regression_model", GradientBoostingRegression)
-def gradient_boosting_regression(df, label, test_size=0.2, n_estimators=100, learning_rate=0.1):
-    nonnumeric_columns = [col for col in df if df[col].dtype == 'object']
-    for col in nonnumeric_columns:
-        df[col] = LabelEncoder().fit_transform(df[col])
-    df = df.fillna(0)
-
-    features = [col for col in df if col != label]
-    x, y = df[features], df[label]
-    tr_x, te_x, tr_y, te_y = train_test_split(x, y, test_size=test_size, random_state=1)
-    model = GradientBoostingRegressor(n_estimators=n_estimators, learning_rate=learning_rate)
-    model.fit(tr_x, tr_y, )
-    te_pred_prob = model.predict(te_x)
-
-    res = {
-        'te_pred_prob': te_pred_prob
-    }
-    return res
-
-
-if __name__ == '__main__':
-    def run():
-        from sklearn.datasets import load_iris
-        loader = load_iris(as_frame=True)
-        df = loader['data']
-        df['target'] = loader['target']
-
-        df[df.columns[0]] = df[df.columns[0]].astype(str)
-        df[df.columns[1]] = df[df.columns[1]].astype(int)
-        df['target'] = df['target'].astype(str)
-
-        print(df)
-        print('####'*5)
-        res = logistic_regression_classification(df, 'target', test_size=0.25, penalty='l2', dual=False)
-        print(res['te_pred_prob'])
-
-        print('####'*5)
-        res = random_forest_classification(df, 'target', test_size=0.25, n_estimators=100, criterion='gini')
-        print(res['te_pred_prob'])
-
-        print('####'*5)
-        res = gradient_boosting_classification(df, 'target', test_size=0.25, n_estimators=100, learning_rate=0.1)
-        print(res['te_pred_prob'])
-
-        from sklearn.datasets import make_regression
-        import pandas as pd
-        loader = make_regression()
-        df = pd.DataFrame(loader[0])
-        df['target'] = loader[1]
-
-        df[df.columns[0]] = df[df.columns[0]].astype(str)
-        df[df.columns[1]] = df[df.columns[1]].astype(int)
-        # df['target'] = df['target'].astype(str)
-
-        print(df)
-        print('####' * 5)
-        res = linear_regression(df, 'target', test_size=0.25, )
-        print(res['te_pred_prob'])
-
-        print('####' * 5)
-        res = random_forest_regression(df, 'target', test_size=0.25, n_estimators=100, criterion='squared_error')
-        print(res['te_pred_prob'])
-
-        print('####' * 5)
-        res = gradient_boosting_regression(df, 'target', test_size=0.25, n_estimators=100, learning_rate=0.1)
-        print(res['te_pred_prob'])
-    run()
\ No newline at end of file
diff --git a/metagpt/tools/functions/register/__init__.py b/metagpt/tools/functions/register/__init__.py
deleted file mode 100644
index c80872750..000000000
--- a/metagpt/tools/functions/register/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/16 16:37
-# @Author  : lidanyang
-# @File    : __init__.py
-# @Desc    :
diff --git a/metagpt/tools/functions/register/register.py b/metagpt/tools/functions/register/register.py
deleted file mode 100644
index 0731e31c0..000000000
--- a/metagpt/tools/functions/register/register.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/16 16:38
-# @Author  : lidanyang
-# @File    : register.py
-# @Desc    :
-import inspect
-from typing import Type, Optional, Callable, Dict, Union, List
-
-from metagpt.tools.functions.schemas.base import ToolSchema
-
-
-class FunctionRegistry:
-    def __init__(self):
-        self.functions: Dict[str, Dict[str, Dict]] = {}
-
-    @staticmethod
-    def _check_param_consistency(func_params, schema):
-        param_names = set(func_params.keys())
-        schema_names = set(schema["parameters"]["properties"].keys())
-
-        if param_names != schema_names:
-            raise ValueError("Function parameters do not match schema properties")
-
-    def register(self, module: str, tool_schema: Type[ToolSchema]) -> Callable:
-        def wrapper(func: Callable) -> Callable:
-            module_registry = self.functions.setdefault(module, {})
-
-            if func.__name__ in module_registry:
-                raise ValueError(f"Function {func.__name__} is already registered in {module}")
-
-            func_params = inspect.signature(func).parameters
-
-            schema = tool_schema.schema()
-            schema["name"] = func.__name__
-
-            self._check_param_consistency(func_params, schema)
-
-            module_registry[func.__name__] = {
-                "func": func,
-                "schema": schema,
-            }
-            return func
-
-        return wrapper
-
-    def get(self, module: str, name: str) -> Optional[Union[Callable, Dict]]:
-        """Get function by module and name"""
-        module_registry = self.functions.get(module, {})
-        return module_registry.get(name)
-
-    def get_by_name(self, name: str) -> Optional[Dict]:
-        """Get function by name"""
-        for module_registry in self.functions.values():
-            if name in module_registry:
-                return module_registry.get(name, {})
-
-    def get_all_by_module(self, module: str) -> Optional[Dict]:
-        """Get all functions by module"""
-        return self.functions.get(module, {})
-
-    def get_schema(self, module: str, name: str) -> Optional[Dict]:
-        """Get schema by module and name"""
-        module_registry = self.functions.get(module, {})
-        return module_registry.get(name, {}).get("schema")
-
-    def get_schemas(self, module: str, names: List[str]) -> List[Dict]:
-        """Get schemas by module and names"""
-        module_registry = self.functions.get(module, {})
-        return [module_registry.get(name, {}).get("schema") for name in names]
-
-    def get_all_schema_by_module(self, module: str) -> List[Dict]:
-        """Get all schemas by module"""
-        module_registry = self.functions.get(module, {})
-        return [v.get("schema") for v in module_registry.values()]
-
-
-registry = FunctionRegistry()
diff --git a/metagpt/tools/functions/schemas/base.py b/metagpt/tools/functions/schemas/base.py
deleted file mode 100644
index aef604c8d..000000000
--- a/metagpt/tools/functions/schemas/base.py
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/16 16:34
-# @Author  : lidanyang
-# @File    : base.py
-# @Desc    : Build base class to generate schema for tool
-from typing import Any, List, Optional, get_type_hints
-
-
-class NoDefault:
-    """
-    A class to represent a missing default value.
-
-    This is used to distinguish between a default value of None and a missing default value.
-    """
-    pass
-
-
-def tool_field(
-    description: str, default: Any = NoDefault(), enum: Optional[List[Any]] = None, **kwargs
-):
-    """
-    Create a field for a tool parameter.
-
-    Args:
-        description (str): A description of the field.
-        default (Any, optional): The default value for the field. Defaults to None.
-        enum (Optional[List[Any]], optional): A list of possible values for the field. Defaults to None.
-        **kwargs: Additional keyword arguments.
-
-    Returns:
-        dict: A dictionary representing the field with provided attributes.
-    """
-    field_info = {
-        "description": description,
-        "default": default,
-        "enum": enum,
-    }
-    field_info.update(kwargs)
-    return field_info
-
-
-class ToolSchema:
-    @staticmethod
-    def format_type(type_hint):
-        """
-        Format a type hint into a string representation.
-
-        Args:
-            type_hint (type): The type hint to format.
-
-        Returns:
-            str: A string representation of the type hint.
-        """
-        if isinstance(type_hint, type):
-            # Handle built-in types separately
-            if type_hint.__module__ == "builtins":
-                return type_hint.__name__
-            else:
-                return f"{type_hint.__module__}.{type_hint.__name__}"
-        elif hasattr(type_hint, "__origin__") and hasattr(type_hint, "__args__"):
-            # Handle generic types (like List[int])
-            origin_type = ToolSchema.format_type(type_hint.__origin__)
-            args_type = ", ".join(
-                [ToolSchema.format_type(t) for t in type_hint.__args__]
-            )
-            return f"{origin_type}[{args_type}]"
-        else:
-            return str(type_hint)
-
-    @classmethod
-    def schema(cls):
-        """
-        Generate a schema dictionary for the class.
-
-        The schema includes the class name, description, and information about
-        each class parameter based on type hints and field definitions.
-
-        Returns:
-            dict: A dictionary representing the schema of the class.
-        """
-        schema = {
-            "name": cls.__name__,
-            "description": cls.__doc__,
-            "parameters": {"type": "object", "properties": {}, "required": []},
-        }
-        type_hints = get_type_hints(cls)
-        for attr, type_hint in type_hints.items():
-            value = getattr(cls, attr, None)
-            if isinstance(value, dict):
-                # Process each attribute that is defined using the field function
-                prop_info = {k: v for k, v in value.items() if v is not None or k == "default"}
-                if isinstance(prop_info["default"], NoDefault):
-                    del prop_info["default"]
-                prop_info["type"] = ToolSchema.format_type(type_hint)
-                schema["parameters"]["properties"][attr] = prop_info
-                # Check for required fields
-                if "default" not in prop_info:
-                    schema["parameters"]["required"].append(attr)
-        return schema
diff --git a/metagpt/tools/functions/schemas/data_preprocess.py b/metagpt/tools/functions/schemas/data_preprocess.py
deleted file mode 100644
index 16b97aeac..000000000
--- a/metagpt/tools/functions/schemas/data_preprocess.py
+++ /dev/null
@@ -1,67 +0,0 @@
-
-import pandas as pd
-
-from metagpt.tools.functions.schemas.base import tool_field, ToolSchema
-
-
-class FillMissingValue(ToolSchema):
-    """Completing missing values with simple strategies"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-    strategy: str = tool_field(
-        description="the imputation strategy",
-        default='mean',
-        enum=['mean', 'median', 'most_frequent', 'constant']
-    )
-    fill_value: int = tool_field(
-        description="fill_value is used to replace all occurrences of missing_values", default=None)
-
-
-class SplitBins(ToolSchema):
-    """Bin continuous data into intervals and return the bin identifier encoded as an integer value"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-    strategy: str = tool_field(description="Strategy used to define the widths of the bins", default='quantile')
-
-
-class MinMaxScale(ToolSchema):
-    """Transform features by scaling each feature to a range, witch is (0, 1)"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class StandardScale(ToolSchema):
-    """Standardize features by removing the mean and scaling to unit variance"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class LogTransform(ToolSchema):
-    """Performs a logarithmic transformation on the specified columns"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class MaxAbsScale(ToolSchema):
-    """Scale each feature by its maximum absolute value"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class RobustScale(ToolSchema):
-    """Scale features using statistics that are robust to outliers, the quantile_range is (25.0, 75.0)"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class OrdinalEncode(ToolSchema):
-    """Encode categorical features as an integer array"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    features: list = tool_field(description="columns to be processed")
-
-
-class OneHotEncoding(ToolSchema):
-    """Apply one-hot encoding to specified categorical columns, the original columns will be dropped."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    cols: list = tool_field(description="Categorical columns to be one-hot encoded and dropped.")
diff --git a/metagpt/tools/functions/schemas/feature_engineering.py b/metagpt/tools/functions/schemas/feature_engineering.py
deleted file mode 100644
index 5c89d9b16..000000000
--- a/metagpt/tools/functions/schemas/feature_engineering.py
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# @Time    : 2023/11/17 10:34
-# @Author  : lidanyang
-# @File    : feature_engineering.py
-# @Desc    : Schema for feature engineering functions
-from typing import List
-
-import pandas as pd
-
-from metagpt.tools.functions.schemas.base import ToolSchema, tool_field
-
-
-class PolynomialExpansion(ToolSchema):
-    """Add polynomial and interaction features from selected numeric columns, excluding the bias column."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    cols: list = tool_field(description="Columns for polynomial expansion.")
-    degree: int = tool_field(description="Degree of polynomial features.", default=2)
-
-
-class FrequencyEncoding(ToolSchema):
-    """Add value counts of categorical columns as new features."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    cols: list = tool_field(description="Categorical columns to be frequency encoded.")
-
-
-class TargetMeanEncoder(ToolSchema):
-    """Encodes a categorical column by the mean of the label column, and adds the result as a new feature."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    col: str = tool_field(description="Column to be mean encoded.")
-    label: str = tool_field(description="Predicted label column.")
-
-
-class KFoldTargetMeanEncoder(ToolSchema):
-    """Adds a new feature to the DataFrame by k-fold mean encoding of a categorical column using the label column."""
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    col: str = tool_field(description="Column to be k-fold mean encoded.")
-    label: str = tool_field(description="Predicted label column.")
-    n_splits: int = tool_field(description="Number of splits for K-fold.", default=5)
-    random_state: int = tool_field(description="Random seed.", default=2021)
-
-
-class CatCross(ToolSchema):
-    """Add pairwise crossed features and convert them to numerical features."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    cols: list = tool_field(description="Columns to be pairwise crossed.")
-    max_cat_num: int = tool_field(
-        description="Maximum unique categories per crossed feature.", default=100
-    )
-
-
-class GroupStat(ToolSchema):
-    """Aggregate specified column in a DataFrame grouped by another column, adding new features named '<agg_col>_<agg_func>_by_<group_col>'."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    group_col: str = tool_field(description="Column used for grouping.")
-    agg_col: str = tool_field(description="Column on which aggregation is performed.")
-    agg_funcs: list = tool_field(
-        description="""List of aggregation functions to apply, such as ['mean', 'std'].
-                    Each function must be supported by pandas."""
-    )
-
-
-class ExtractTimeComps(ToolSchema):
-    """Extract and add specific time components as new features from a designated time column."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    time_col: str = tool_field(
-        description="The name of the column containing time data."
-    )
-    time_comps: List[str] = tool_field(
-        description="""List of time components to extract.
-        Each component must be in ['year', 'month', 'day', 'hour', 'dayofweek', 'is_weekend']."""
-    )
-
-
-class FeShiftByTime(ToolSchema):
-    """Shift column values based on specified time intervals and add the resulting new features to the DataFrame. New features are named in the format of '<group_col>_<shift_col>_lag_<period>_<freq>'."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    time_col: str = tool_field(description="Column for time-based shifting.")
-    group_col: str = tool_field(description="Column for grouping before shifting.")
-    shift_col: str = tool_field(description="Column to shift.")
-    periods: list = tool_field(description="Time intervals for shifting.")
-    freq: str = tool_field(
-        description="Frequency unit for time intervals (e.g., 'D', 'M').",
-        enum=["D", "M", "Y", "W", "H"],
-    )
-
-
-class FeRollingByTime(ToolSchema):
-    """Calculate rolling statistics for a DataFrame column over time intervals."""
-
-    df: pd.DataFrame = tool_field(description="DataFrame to process.")
-    time_col: str = tool_field(description="Column for time-based rolling.")
-    group_col: str = tool_field(description="Column for grouping before rolling.")
-    rolling_col: str = tool_field(description="Column for rolling calculations.")
-    periods: list = tool_field(description="Window sizes for rolling.")
-    freq: str = tool_field(
-        description="Frequency unit for time windows (e.g., 'D', 'M').",
-        enum=["D", "M", "Y", "W", "H"],
-    )
-    agg_funcs: list = tool_field(
-        description="""List of aggregation functions for rolling, like ['mean', 'std'].
-        Each function must be in ['mean', 'std', 'min', 'max', 'median', 'sum', 'count']."""
-    )
diff --git a/metagpt/tools/functions/schemas/ml_model.py b/metagpt/tools/functions/schemas/ml_model.py
deleted file mode 100644
index 9268156af..000000000
--- a/metagpt/tools/functions/schemas/ml_model.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import pandas as pd
-
-from metagpt.tools.functions.schemas.base import tool_field, ToolSchema
-
-
-class LogisticRegressionClassification(ToolSchema):
-    """Logistic Regression (aka logit, MaxEnt) classifier"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    penalty: str = tool_field(description="Specify the norm of the penalty", default="l2")
-    dual: bool = tool_field(description="Dual (constrained) or primal (regularized) formulation", default="l2")
-
-
-class RandomForestClassification(ToolSchema):
-    """random forest is a meta estimator that fits a number of decision tree classifiers on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    n_estimators: int = tool_field(description="The number of trees in the forest", default=100)
-    criterion: str = tool_field(description="The function to measure the quality of a split", default="gini")
-
-
-class GradientBoostingClassification(ToolSchema):
-    """Gradient Boosting for classification.This algorithm builds an additive model in a forward stage-wise fashion"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    n_estimators: int = tool_field(description="The number of boosting stages to perform", default=100)
-    learning_rate: float = tool_field(description="Learning rate shrinks the contribution of each tree by learning_rate", default=0.1)
-
-
-class LinearRegressionRegression(ToolSchema):
-    """Ordinary least squares Linear Regression."""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-
-
-class RandomForestRegression(ToolSchema):
-    """random forest is a meta estimator that fits a number of decision tree on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    n_estimators: int = tool_field(description="The number of trees in the forest", default=100)
-    criterion: str = tool_field(description="The function to measure the quality of a split", default="squared_error")
-
-
-class GradientBoostingRegression(ToolSchema):
-    """Gradient Boosting for regression.This estimator builds an additive model in a forward stage-wise fashion"""
-    df: pd.DataFrame = tool_field(description="input dataframe")
-    label: str = tool_field(description="target name")
-    test_size: float = tool_field(description="The proportion of the test set to all the data", default=0.2)
-    n_estimators: int = tool_field(description="The number of boosting stages to perform", default=100)
-    learning_rate: float = tool_field(description="Learning rate shrinks the contribution of each tree by learning_rate", default=0.1)

From 2a3f23ec62ebca8329c2748179d731025a685d0a Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Thu, 14 Dec 2023 10:32:58 +0800
Subject: [PATCH 28/34] fix unittest

---
 .../actions/test_write_analysis_code.py       | 33 ++++++++-----------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/tests/metagpt/actions/test_write_analysis_code.py b/tests/metagpt/actions/test_write_analysis_code.py
index 661202115..1a568cdcd 100644
--- a/tests/metagpt/actions/test_write_analysis_code.py
+++ b/tests/metagpt/actions/test_write_analysis_code.py
@@ -31,22 +31,15 @@ async def test_tool_recommendation():
     step 1: 对数据集进行去重
     step 2: 对数据集进行缺失值处理
     """
-    available_tools = [
-        {
-            "name": "fill_missing_value",
-            "description": "Completing missing values with simple strategies",
-        },
-        {
-            "name": "split_bins",
-            "description": "Bin continuous data into intervals and return the bin identifier encoded as an integer value",
-        },
-    ]
+    available_tools = {
+        "fill_missing_value": "Completing missing values with simple strategies",
+        "split_bins": "Bin continuous data into intervals and return the bin identifier encoded as an integer value",
+    }
     write_code = WriteCodeWithTools()
     tools = await write_code._tool_recommendation(task, code_steps, available_tools)
 
-    assert len(tools) == 2
-    assert tools[0] == []
-    assert tools[1] == ["fill_missing_value"]
+    assert len(tools) == 1
+    assert tools[0] == ["fill_missing_value"]
 
 
 @pytest.mark.asyncio
@@ -57,7 +50,7 @@ async def test_write_code_with_tools():
         "1": Task(
                 task_id="1",
                 instruction="随机生成一个pandas DataFrame数据集",
-                task_type="unknown",
+                task_type="other",
                 dependent_task_ids=[],
                 code="""
                 import pandas as pd
@@ -75,6 +68,10 @@ async def test_write_code_with_tools():
                 instruction="对数据集进行数据清洗",
                 task_type="data_preprocess",
                 dependent_task_ids=["1"],
+                code_steps="""
+                {"Step 1": "对数据集进行去重",
+                "Step 2": "对数据集进行缺失值处理"}
+                """
             ),
     }
     plan = Plan(
@@ -83,13 +80,9 @@ async def test_write_code_with_tools():
         task_map=task_map,
         current_task_id="2",
     )
-    task_guide = """
-    step 1: 对数据集进行去重
-    step 2: 对数据集进行缺失值处理
-    """
-    data_desc = "None"
+    column_info = ""
 
-    code = await write_code.run(messages, plan, task_guide, data_desc)
+    code = await write_code.run(messages, plan, column_info)
     assert len(code) > 0
     print(code)
 

From d84e9cae2c8dfc5345edb253f59ca1f0901cacab Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Thu, 14 Dec 2023 10:34:15 +0800
Subject: [PATCH 29/34] fix conflict

---
 metagpt/actions/write_analysis_code.py |  6 ++----
 metagpt/roles/ml_engineer.py           | 12 ++++++------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py
index 2c45281f9..6970fb4f0 100644
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@@ -24,8 +24,8 @@ from metagpt.utils.common import create_func_config, remove_comments
 
 
 class BaseWriteAnalysisCode(Action):
-    DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you."""  # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
-    REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!"""
+    DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.**"""  # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
+    # REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!"""
 
     def process_msg(self, prompt: Union[str, List[Dict], Message, List[Message]], system_msg: str = None):
         default_system_msg = system_msg or self.DEFAULT_SYSTEM_MSG
@@ -201,8 +201,6 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
                 module_name=module_name,
                 tool_catalog=tool_catalog,
             )
-
-
         else:
             prompt = GENERATE_CODE_PROMPT.format(
                 user_requirement=plan.goal,
diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index 8f06a541c..0b76711f4 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -159,12 +159,12 @@ class MLEngineer(Role):
             # print("*" * 10)
             # breakpoint()
             if counter > 0:
-                improve_code = await DebugCode().run(plan=self.plan.current_task.instruction,
-                                                     # finished_code=code_context,
-                                                     # finished_code_result=code_result,
-                                                     code=code,
-                                                     runtime_result=self.working_memory.get(),
-                                                     context=debug_context)
+                improve_code = await DebugCode().run(
+                    plan=self.plan.current_task.instruction,
+                    code=code,
+                    runtime_result=self.working_memory.get(),
+                    context=debug_context
+                )
 
             if improve_code != "":
                 code = improve_code

From 44334c0c9aa6b8a0d6314d3b24623d9633ce7c2d Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Thu, 14 Dec 2023 10:59:42 +0800
Subject: [PATCH 30/34] drop old schema import

---
 metagpt/tools/functions/libs/data_preprocess.py     | 7 +++++--
 metagpt/tools/functions/libs/feature_engineering.py | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/metagpt/tools/functions/libs/data_preprocess.py b/metagpt/tools/functions/libs/data_preprocess.py
index fa70bf8fc..ec3580889 100644
--- a/metagpt/tools/functions/libs/data_preprocess.py
+++ b/metagpt/tools/functions/libs/data_preprocess.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import LabelEncoder
 from sklearn.preprocessing import MaxAbsScaler
@@ -9,7 +10,6 @@ from sklearn.preprocessing import RobustScaler
 from sklearn.preprocessing import StandardScaler
 
 from metagpt.tools.functions.libs.base import MLProcess
-from metagpt.tools.functions.schemas.data_preprocess import *
 
 
 class FillMissingValue(MLProcess):
@@ -141,7 +141,10 @@ def get_column_info(df: pd.DataFrame) -> dict:
     for i in df.columns:
         nan_freq = float("%.2g" % (df[i].isna().mean() * 100))
         n_unique = df[i].nunique()
-        data.append([i, df[i].dtype, nan_freq, n_unique])
+        data_type = str(df[i].dtype).replace("dtype('", "").replace("')", "")
+        if data_type == "O":
+            data_type = "object"
+        data.append([i, data_type, nan_freq, n_unique])
 
     samples = pd.DataFrame(
         data,
diff --git a/metagpt/tools/functions/libs/feature_engineering.py b/metagpt/tools/functions/libs/feature_engineering.py
index de54e4db0..1ec2b9675 100644
--- a/metagpt/tools/functions/libs/feature_engineering.py
+++ b/metagpt/tools/functions/libs/feature_engineering.py
@@ -7,6 +7,7 @@
 import itertools
 
 import numpy as np
+import pandas as pd
 from dateutil.relativedelta import relativedelta
 from joblib import Parallel, delayed
 from pandas.api.types import is_numeric_dtype
@@ -15,7 +16,6 @@ from sklearn.model_selection import KFold
 from sklearn.preprocessing import PolynomialFeatures, KBinsDiscretizer
 
 from metagpt.tools.functions.libs.base import MLProcess
-from metagpt.tools.functions.schemas.feature_engineering import *
 
 
 class PolynomialExpansion(MLProcess):

From 5940c8d908b12d8c99cc03305dec4fcf8bcc3dd8 Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Thu, 14 Dec 2023 12:56:01 +0800
Subject: [PATCH 31/34] remove old comments

---
 metagpt/roles/ml_engineer.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index 0b76711f4..51faf1e0d 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -261,14 +261,12 @@ class MLEngineer(Role):
         """find useful memories only to reduce context length and improve performance"""
         # TODO dataset description , code steps
         if task_exclude_field is None:
+            # Shorten the context as we don't need code steps after we get the codes.
+            # This doesn't affect current_task below, which should hold the code steps
             task_exclude_field = {'code_steps'}
         user_requirement = self.plan.goal
         data_desc = self.plan.context
         tasks = [task.dict(exclude=task_exclude_field) for task in self.plan.tasks]
-        # for task in tasks:
-        #     # Shorten the context as we don't need code steps after we get the codes.
-        #     # This doesn't affect current_task below, which should hold the code steps
-        #     task.pop("code_steps")
         tasks = json.dumps(tasks, indent=4, ensure_ascii=False)
         current_task = self.plan.current_task.json() if self.plan.current_task else {}
         context = STRUCTURAL_CONTEXT.format(

From ef6e4a1b77a21cefeb165301dd1d47b5c273fdbb Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Thu, 14 Dec 2023 13:46:27 +0800
Subject: [PATCH 32/34] debug only when use_tools

---
 metagpt/roles/ml_engineer.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index 51faf1e0d..3755e7bac 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -147,7 +147,6 @@ class MLEngineer(Role):
         )
 
         counter = 0
-        improve_code = ""
         success = False
         debug_context = []
 
@@ -158,17 +157,14 @@ class MLEngineer(Role):
             # print(context)
             # print("*" * 10)
             # breakpoint()
-            if counter > 0:
-                improve_code = await DebugCode().run(
+            if counter > 0 and self.use_tools:
+                code = await DebugCode().run(
                     plan=self.plan.current_task.instruction,
                     code=code,
                     runtime_result=self.working_memory.get(),
                     context=debug_context
                 )
-
-            if improve_code != "":
-                code = improve_code
-                logger.info(f"new code \n{improve_code}")
+                logger.info(f"new code \n{code}")
                 cause_by = DebugCode
             elif not self.use_tools or self.plan.current_task.task_type == "other":
                 logger.info("Write code with pure generation")

From 97f707784bd8558b3bbd138d9380af55bb85f9a4 Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Thu, 14 Dec 2023 13:56:23 +0800
Subject: [PATCH 33/34] reformat

---
 metagpt/actions/debug_code.py | 124 ++++++++++++++++++----------------
 1 file changed, 64 insertions(+), 60 deletions(-)

diff --git a/metagpt/actions/debug_code.py b/metagpt/actions/debug_code.py
index 58d006a08..3e1705d8e 100644
--- a/metagpt/actions/debug_code.py
+++ b/metagpt/actions/debug_code.py
@@ -1,57 +1,56 @@
 from typing import Dict, List, Union, Tuple, Optional, Any
 
-from metagpt.actions import Action
 from metagpt.logs import logger
 from metagpt.schema import Message, Plan
 from metagpt.utils.common import CodeParser, create_func_config
 from metagpt.actions.write_analysis_code import BaseWriteAnalysisCode
 
-DEBUG_REFLECTION_EXAMPLE = '''Example 1:
-                           [previous impl]:
-                           ```python
-                           def add(a: int, b: int) -> int:
-                               """
-                               Given integers a and b, return the total value of a and b.
-                               """
-                               return a - b
-                           ```
+DEBUG_REFLECTION_EXAMPLE = '''
+Example 1:
+[previous impl]:
+```python
+def add(a: int, b: int) -> int:
+   """
+   Given integers a and b, return the total value of a and b.
+   """
+   return a - b
+```
 
-                           [runtime Error]:
-                           Tested passed:
+[runtime Error]:
+Tested passed:
 
-                           Tests failed:
-                           assert add(1, 2) == 3 # output: -1
-                           assert add(1, 2) == 4 # output: -1
+Tests failed:
+assert add(1, 2) == 3 # output: -1
+assert add(1, 2) == 4 # output: -1
 
-                           [reflection on previous impl]:
-                           The implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.
+[reflection on previous impl]:
+The implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.
 
-                           [improved impl]:
-                           ```python
-                           def add(a: int, b: int) -> int:
-                               """
-                               Given integers a and b, return the total value of a and b.
-                               """
-                               return a + b
-                           ```
-                           '''
+[improved impl]:
+```python
+def add(a: int, b: int) -> int:
+   """
+   Given integers a and b, return the total value of a and b.
+   """
+   return a + b
+```
+'''
 
 REFLECTION_PROMPT = """
-                       Here is an example for you.
-                       {debug_example}
-                       [context]
-                       {context}
-                       
-                       [previous impl]
-                       {code}
-                       [runtime Error]
-                       {runtime_result}
+Here is an example for you.
+{debug_example}
+[context]
+{context}
 
-                        Analysis the error step by step, provide me improve method and code. Remember to follow [context] rerquirement. Don't forget write code for steps behind the error step.
-                        [reflection on previous impl]:
-                        xxx
+[previous impl]
+{code}
+[runtime Error]
+{runtime_result}
 
-                       """
+Analysis the error step by step, provide me improve method and code. Remember to follow [context] rerquirement. Don't forget write code for steps behind the error step.
+[reflection on previous impl]:
+xxx
+"""
 
 CODE_REFLECTION = {
     "name": "execute_reflection_code",
@@ -85,10 +84,10 @@ class DebugCode(BaseWriteAnalysisCode):
     name: str = "debugcode"
     context: Optional[str] = None
     llm: None
-    
+
     def __init__(self, **kwargs: Any):
         super().__init__(**kwargs)
-    
+
     async def run_reflection(
         self,
         # goal,
@@ -100,23 +99,26 @@ class DebugCode(BaseWriteAnalysisCode):
     ) -> dict:
         info = []
         # finished_code_and_result = finished_code + "\n [finished results]\n\n" + finished_code_result
-        reflection_prompt = REFLECTION_PROMPT.format(debug_example=DEBUG_REFLECTION_EXAMPLE,
-                                                     context=context,
-                                                     # goal=goal,
-                                                     # finished_code=finished_code_and_result,
-                                                     code=code,
-                                                     runtime_result=runtime_result
-                                                     )
+        reflection_prompt = REFLECTION_PROMPT.format(
+            debug_example=DEBUG_REFLECTION_EXAMPLE,
+            context=context,
+            # goal=goal,
+            # finished_code=finished_code_and_result,
+            code=code,
+            runtime_result=runtime_result,
+        )
         system_prompt = "You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Write your full implementation "
         info.append(Message(role="system", content=system_prompt))
         info.append(Message(role="user", content=reflection_prompt))
-        
+
         # msg = messages_to_str(info)
         # resp = await self.llm.aask(msg=msg)
-        resp = await self.llm.aask_code(messages=info, **create_func_config(CODE_REFLECTION))
+        resp = await self.llm.aask_code(
+            messages=info, **create_func_config(CODE_REFLECTION)
+        )
         logger.info(f"reflection is {resp}")
         return resp
-    
+
     # async def rewrite_code(self, reflection: str = "", context: List[Message] = None) -> str:
     #     """
     #     根据reflection重写代码
@@ -131,14 +133,16 @@ class DebugCode(BaseWriteAnalysisCode):
     #     resp = await self.llm.aask(msg=msg)
     #     improv_code = CodeParser.parse_code(block=None, text=resp)
     #     return improv_code
-    
-    async def run(self,
-                  context: List[Message] = None,
-                  plan: str = "",
-                  # finished_code: str = "",
-                  # finished_code_result: str = "",
-                  code: str = "",
-                  runtime_result: str = "") -> str:
+
+    async def run(
+        self,
+        context: List[Message] = None,
+        plan: str = "",
+        # finished_code: str = "",
+        # finished_code_result: str = "",
+        code: str = "",
+        runtime_result: str = "",
+    ) -> str:
         """
         根据当前运行代码和报错信息进行reflection和纠错
         """
@@ -152,5 +156,5 @@ class DebugCode(BaseWriteAnalysisCode):
         )
         # 根据reflection结果重写代码
         # improv_code = await self.rewrite_code(reflection, context=context)
-        improv_code = reflection['improved_impl']
+        improv_code = reflection["improved_impl"]
         return improv_code

From 2da141abbe43fa2c046a8f4bbdb0edc9325b03d3 Mon Sep 17 00:00:00 2001
From: lidanyang <lidanyang@fuzhi.ai>
Date: Thu, 14 Dec 2023 13:57:39 +0800
Subject: [PATCH 34/34] recover code

---
 metagpt/tools/web_browser_engine.py | 2 +-
 metagpt/utils/__init__.py           | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/metagpt/tools/web_browser_engine.py b/metagpt/tools/web_browser_engine.py
index 7228ae9cf..453d87f31 100644
--- a/metagpt/tools/web_browser_engine.py
+++ b/metagpt/tools/web_browser_engine.py
@@ -7,7 +7,7 @@ from typing import Any, Callable, Coroutine, Literal, overload
 
 from metagpt.config import CONFIG
 from metagpt.tools import WebBrowserEngineType
-# from metagpt.utils.parse_html import WebPage
+from metagpt.utils.parse_html import WebPage
 
 
 class WebBrowserEngine:
diff --git a/metagpt/utils/__init__.py b/metagpt/utils/__init__.py
index 86cac50db..f13175cf8 100644
--- a/metagpt/utils/__init__.py
+++ b/metagpt/utils/__init__.py
@@ -6,7 +6,7 @@
 @File    : __init__.py
 """
 
-# from metagpt.utils.read_document import read_docx
+from metagpt.utils.read_document import read_docx
 from metagpt.utils.singleton import Singleton
 from metagpt.utils.token_counter import (
     TOKEN_COSTS,
@@ -16,7 +16,7 @@ from metagpt.utils.token_counter import (
 
 
 __all__ = [
-    # "read_docx",
+    "read_docx",
     "Singleton",
     "TOKEN_COSTS",
     "count_message_tokens",