Merge branch 'dev' of https://gitlab.deepwisdomai.com/agents/data_agents_opt into dev

2026-05-15 11:02:36 +02:00 · 2023-12-12 16:29:06 +08:00 · 2023-12-12 16:29:06 +08:00 · fc8c0d9e95
commit fc8c0d9e95
parent 0231cfdcc7 b4679b3511
12 changed files with 642 additions and 110 deletions
--- a/config/config.yaml
+++ b/config/config.yaml
@ -94,4 +94,7 @@ MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k
 ### browser path for pyppeteer engine, support Chrome, Chromium,MS Edge
 #PYPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable"

-PROMPT_FORMAT: json #json or markdown
+PROMPT_FORMAT: json #json or markdown
+
+# KAGGLE_USERNAME: ""
+# KAGGLE_KEY: ""
--- a/kaggle_team.py
+++ b/kaggle_team.py
@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import asyncio
+
+import fire
+
+from metagpt.roles.kaggle_manager import KaggleManager
+from metagpt.roles.ml_engineer import MLEngineer
+from metagpt.team import Team
+
+async def main(
+    # competition: str,
+    # data_desc: str,
+    # requirement: str,
+    investment: float = 5.0,
+    n_round: int = 10,
+    auto_run: bool = False,
+):
+    competition, data_desc, requirement = (
+        "titanic",
+        "Training set is train.csv.\nTest set is test.csv. We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.",
+        # "Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
+        # "generate a random prediction, replace the Survived column of gender_submission.csv, and save the prediction to a new submission file",
+        "Score as high as possible for the provided dataset, save the test prediction to a csv with two columns PassengerId and Survived"
+    )
+
+    team = Team()
+    team.hire(
+        [
+            KaggleManager(competition=competition, data_desc=data_desc),
+            MLEngineer(goal=requirement, auto_run=auto_run),
+        ]
+    )
+
+    team.invest(investment)
+    team.start_project(requirement)
+    await team.run(n_round=n_round)
+
+if __name__ == '__main__':
+    fire.Fire(main)
--- a/metagpt/actions/execute_code.py
+++ b/metagpt/actions/execute_code.py
@ -8,6 +8,7 @@ from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Dict, List, Tuple, Union
 import traceback
+import re

 import nbformat
 from nbclient import NotebookClient
@ -171,11 +172,32 @@ class ExecutePyCode(ExecuteCode, Action):
                # TODO: add max_tries for run code.
                cell_index = len(self.nb.cells) - 1
                await self.nb_client.async_execute_cell(self.nb.cells[-1], cell_index)
-                return self.parse_outputs(self.nb.cells[-1].outputs), True
+                outputs = self.parse_outputs(self.nb.cells[-1].outputs)
+                success = True
            except Exception as e:
-                # FIXME: CellExecutionError is hard to read. for example `1\0` raise ZeroDivisionError:
-                #  CellExecutionError('An error occurred while executing the following cell:\n------------------\nz=1/0\n------------------\n\n\n\x1b[0;31m---------------------------------------------------------------------------\x1b[0m\n\x1b[0;31mZeroDivisionError\x1b[0m                         Traceback (most recent call last)\nCell \x1b[0;32mIn[1], line 1\x1b[0m\n\x1b[0;32m----> 1\x1b[0m z\x1b[38;5;241m=\x1b[39m\x1b[38;5;241;43m1\x1b[39;49m\x1b[38;5;241;43m/\x1b[39;49m\x1b[38;5;241;43m0\x1b[39;49m\n\n\x1b[0;31mZeroDivisionError\x1b[0m: division by zero\n')
-                return traceback.format_exc(), False
+                outputs = traceback.format_exc()
+                success = False
+            return truncate(remove_escape_and_color_codes(outputs)), success
        else:
            # TODO: markdown
            raise NotImplementedError(f"Not support this code type : {language}, Only support code!")
+
+
+def truncate(result: str, keep_len: int = 2000) -> str:
+    desc = f"Truncated to show only the last {keep_len} characters\n"
+    if result.startswith(desc):
+        result = result[-len(desc) :]
+
+    if len(result) > keep_len:
+        result = result[-keep_len:]
+
+    if not result.startswith(desc):
+        return desc + result
+    return desc
+
+
+def remove_escape_and_color_codes(input_str):
+    # 使用正则表达式去除转义字符和颜色代码
+    pattern = re.compile(r'\x1b\[[0-9;]*[mK]')
+    result = pattern.sub('', input_str)
+    return result
--- a/metagpt/actions/ml_da_action.py
+++ b/metagpt/actions/ml_da_action.py
@ -0,0 +1,116 @@
+import json
+from typing import Dict, List, Union
+
+from metagpt.actions import Action
+from metagpt.schema import Message, Plan
+from metagpt.utils.common import CodeParser
+from metagpt.logs import logger
+
+
+class ReviewConst:
+    TASK_REVIEW_TRIGGER = "task"
+    CODE_REVIEW_TRIGGER = "code"
+    CONTINUE_WORD = ["confirm", "continue", "c", "yes", "y"]
+    CHANGE_WORD = ["change"]
+    EXIT_WORD = ["exit"]
+    TASK_REVIEW_INSTRUCTION = (
+        f"If you want to change, add, delete a task or merge tasks in the plan, say '{CHANGE_WORD[0]} task task_id or current task, ... (things to change)' "
+        f"If you confirm the output from the current task and wish to continue, type: {CONTINUE_WORD[0]}"
+    )
+    CODE_REVIEW_INSTRUCTION = (
+        f"If you want the codes to be rewritten, say '{CHANGE_WORD[0]} ... (your change advice)' "
+        f"If you want to leave it as is, type: {CONTINUE_WORD[0]} or {CONTINUE_WORD[1]}"
+    )
+    EXIT_INSTRUCTION = f"If you want to terminate the process, type: {EXIT_WORD[0]}"
+
+
+class AskReview(Action):
+    async def run(
+        self, context: List[Message], plan: Plan = None, trigger: str = "task"
+    ):
+        logger.info("Current overall plan:")
+        logger.info(
+            "\n".join(
+                [
+                    f"{task.task_id}: {task.instruction}, is_finished: {task.is_finished}"
+                    for task in plan.tasks
+                ]
+            )
+        )
+
+        logger.info("most recent context:")
+        latest_action = context[-1].cause_by.__name__ if context[-1].cause_by else ""
+        review_instruction = (
+            ReviewConst.TASK_REVIEW_INSTRUCTION
+            if trigger == ReviewConst.TASK_REVIEW_TRIGGER
+            else ReviewConst.CODE_REVIEW_INSTRUCTION
+        )
+        prompt = (
+            f"This is a <{trigger}> review. Please review output from {latest_action}\n"
+            f"{review_instruction}\n"
+            f"{ReviewConst.EXIT_INSTRUCTION}\n"
+            "Please type your review below:\n"
+        )
+
+        rsp = input(prompt)
+
+        if rsp.lower() in ReviewConst.EXIT_WORD:
+            exit()
+
+        # Confirmation can be one of "confirm", "continue", "c", "yes", "y" exactly, or sentences containing "confirm".
+        # One could say "confirm this task, but change the next task to ..."
+        confirmed = rsp.lower() in ReviewConst.CONTINUE_WORD or ReviewConst.CONTINUE_WORD[0] in rsp.lower()
+
+        return rsp, confirmed
+
+
+class SummarizeAnalysis(Action):
+    PROMPT_TEMPLATE = """
+    # Context
+    {context}
+    # Summary
+    Output a 30-word summary on analysis tool and modeling algorithms you have used, and the corresponding result. Make sure to announce the complete path to your test prediction file. Your summary:
+    """
+
+    def __init__(self, name: str = "", context=None, llm=None) -> str:
+        super().__init__(name, context, llm)
+
+    async def run(self, conmpleted_plan: Plan) -> str:
+        tasks = json.dumps(
+            [task.dict() for task in conmpleted_plan.tasks],
+            indent=4,
+            ensure_ascii=False,
+        )  # all tasks finished, return all task outputs
+        prompt = self.PROMPT_TEMPLATE.format(context=tasks)
+        summary = await self._aask(prompt)
+        return summary
+
+
+class Reflect(Action):
+    PROMPT_TEMPLATE = """
+    # Context
+    __context__
+    # Latest User Requirement
+    __user_requirement__
+    # Summary
+    Above is all your attempts to tackle the user requirement. You plan, act, submit your output, and get the result and feedback.
+    Output a json following the format:
+    ```json
+    {
+        "summary": str = "summarize each of your previous trial in a triple of (your methods, the corresponding result, potential improvement), list them out",
+        "takeaways": str = "carefully find key takeaways from your summarization",
+        "reflection": str = "give specific instruction to improve your next trial in a step-by-step thinking process",
+    }
+    ```
+    """
+    REWRITE_PLAN_INSTRUCTION = """Take this reflection for rewriting plan, modify the current plan in place, make reference to your specific instruction, think about you should
+    change which task, add or delete what tasks in the plan. Only make necessary changes, keep reusable tasks unchanged, output the COMPLETE new plan starting from the first task. Your plan should have no more than 5 tasks."""
+
+    async def run(self, context: str, user_requirement: str = "") -> str:
+        user_requirement = user_requirement or "Score as high as possible in a data modeling competition"
+        # prompt = self.PROMPT_TEMPLATE.format(context=context, user_requirement=user_requirement)
+        prompt = self.PROMPT_TEMPLATE.replace("__context__", context).replace("__user_requirement__", user_requirement)
+        rsp_json = await self._aask(prompt)
+        rsp = CodeParser.parse_code(block=None, text=rsp_json)
+        reflection = json.loads(rsp)["reflection"]
+        return reflection
--- a/metagpt/actions/write_plan.py
+++ b/metagpt/actions/write_plan.py
@ -4,12 +4,14 @@
@Author  :   orange-crow
@File    :   plan.py
 """
-from typing import List, Dict
+from typing import List, Dict, Tuple
 import json
+from copy import deepcopy
+import traceback

 from metagpt.actions import Action
 from metagpt.prompts.ml_engineer import ASSIGN_TASK_TYPE_PROMPT, ASSIGN_TASK_TYPE
-from metagpt.schema import Message, Task
+from metagpt.schema import Message, Task, Plan
 from metagpt.utils.common import CodeParser, create_func_config


@ -19,7 +21,7 @@ class WritePlan(Action):
    __context__
    # Task:
    Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to __max_tasks__ tasks.
-    If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes.
+    If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.
    Output a list of jsons following the format:
    ```json
    [
@ -67,8 +69,30 @@ class WritePlan(Action):
            rsp = await self.assign_task_type(json.loads(rsp))
        return rsp

-    @staticmethod
-    def rsp_to_tasks(rsp: str) -> List[Task]:
-        rsp = json.loads(rsp)
-        tasks = [Task(**task_config) for task_config in rsp]
-        return tasks
+def rsp_to_tasks(rsp: str) -> List[Task]:
+    rsp = json.loads(rsp)
+    tasks = [Task(**task_config) for task_config in rsp]
+    return tasks
+
+def update_plan_from_rsp(rsp: str, current_plan: Plan):
+    tasks = rsp_to_tasks(rsp)
+    if len(tasks) == 1:
+        # handle a single task
+        if current_plan.has_task_id(tasks[0].task_id):
+            # replace an existing task
+            current_plan.replace_task(tasks[0])
+        else:
+            # append one task
+            current_plan.append_task(tasks[0])
+
+    else:
+        # add tasks in general
+        current_plan.add_tasks(tasks)
+
+def precheck_update_plan_from_rsp(rsp: str, current_plan: Plan) -> Tuple[bool, str]:
+    temp_plan = deepcopy(current_plan)
+    try:
+        update_plan_from_rsp(rsp, temp_plan)
+        return True, ""
+    except Exception as e:
+        return False, e
--- a/metagpt/config.py
+++ b/metagpt/config.py
@ -95,6 +95,9 @@ class Config(metaclass=Singleton):

        self.prompt_format = self._get("PROMPT_FORMAT", "markdown")

+        self.kaggle_username = self._get("KAGGLE_USERNAME", "")
+        self.kaggle_key = self._get("KAGGLE_KEY", "")
+
    def _init_with_config_files_and_env(self, configs: dict, yaml_file):
        """Load from config/key.yaml, config/config.yaml, and env in decreasing order of priority"""
        configs.update(os.environ)
--- a/metagpt/prompts/ml_engineer.py
+++ b/metagpt/prompts/ml_engineer.py
@ -202,3 +202,14 @@ ML_MODULE_MAP = {
    "classification_model": "metagpt.tools.functions.libs.machine_learning.ml_model",
    "regression_model": "metagpt.tools.functions.libs.machine_learning.ml_model",
 }
+
+STRUCTURAL_CONTEXT = """
+## User Requirement
+{user_requirement}
+## Data Description
+{data_desc}
+## Current Plan
+{tasks}
+## Current Task
+{current_task}
+"""
--- a/metagpt/roles/kaggle_manager.py
+++ b/metagpt/roles/kaggle_manager.py
@ -0,0 +1,153 @@
+from typing import Dict, List, Union, Tuple
+import json
+import subprocess
+import os
+
+import fire
+import pandas as pd
+
+from metagpt.config import CONFIG
+from metagpt.const import WORKSPACE_ROOT
+from metagpt.roles import Role
+from metagpt.actions import Action, BossRequirement
+from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis
+from metagpt.schema import Message, Task, Plan
+from metagpt.logs import logger
+from metagpt.utils.common import CodeParser
+
+
+os.environ["KAGGLE_USERNAME"] = CONFIG.kaggle_username
+os.environ["KAGGLE_KEY"] = CONFIG.kaggle_key
+
+def run_command(cmd):
+    print(cmd)
+    output = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+    if output.returncode != 0:
+        print("Error output:", output.stderr)
+        exit()
+    else:
+        print(output.stdout)
+    return output.stdout
+
+class DownloadData(Action):
+
+    async def run(self, competition, data_desc="") -> str:
+        data_path = WORKSPACE_ROOT / competition
+        
+        output = run_command(f"kaggle competitions list --search {competition}")
+        assert output != "No competitions found", "You must provide the correct competition name"
+        
+        run_command(f"kaggle competitions download {competition} --path {WORKSPACE_ROOT}")
+        
+        if not os.path.exists(data_path):
+        # if True:
+            # run_command(f"rm -r {data_path / '*'}")
+            run_command(f"unzip -o {WORKSPACE_ROOT / '*.zip'} -d {data_path}")  # FIXME: not safe
+        
+        file_list = run_command(f"ls {data_path}")
+
+        rsp = f"""
+        Location:
+        Data downloaded at {data_path} folder, including {file_list}
+        Data Description:
+        {data_desc}
+        """
+        return rsp
+
+class SubmitResult(Action):
+    PROMPT_TEMPLATE = """
+    # Summary
+    __summary__
+    # Your task
+    Extract the file path for test set prediction from the summary above, output a json following the format:
+    ```json
+    {"file_path": str = "the file path, for example, /path/to/the/prediction/file/xxx.csv, /path/to/the/prediction/file/xxx.xlsx"}
+    ```
+    """
+
+    def __init__(self, name: str = "", context=None, llm=None) -> str:
+        super().__init__(name, context, llm)
+
+    async def _parse_submit_file_path(self, context) -> str:
+        prompt = self.PROMPT_TEMPLATE.replace("__summary__", context)
+        rsp = await self._aask(prompt)
+        rsp = CodeParser.parse_code(block=None, text=rsp)
+        file_path = json.loads(rsp)["file_path"]
+        return file_path
+
+    async def run(self, competition, submit_message="") -> str:
+        submit_file_path = await self._parse_submit_file_path(submit_message)
+
+        data_path = WORKSPACE_ROOT / competition
+        submit_message = submit_message.replace("'", "")
+
+        run_command(f"kaggle competitions submit {competition} -f {submit_file_path} -m '{submit_message}'")
+        run_command(f"kaggle competitions leaderboard --show --csv {competition} > {data_path / 'leaderboard.csv'}")
+        run_command(f"kaggle competitions submissions --csv {competition} > {data_path / 'submission.csv'}")
+        
+        leaderboard = pd.read_csv(data_path / 'leaderboard.csv')
+        submission = pd.read_csv(data_path / 'submission.csv')
+        print(submission)  # submission.to_json(orient="records")
+
+        submission_score = submission.loc[0, "publicScore"]
+        best_score = max(submission["publicScore"])  # might be min
+        rank = leaderboard.loc[leaderboard["score"] == best_score].index[0]
+        rank_pct = round(rank / len(leaderboard), 4) * 100
+
+        submission_summary = f"""
+        # All histories:
+        {submission.head(5).to_string()}
+        # Current
+        Current submission score: {submission_score}, best score: {best_score}, best rank: {rank} (top {rank_pct}%)
+        """
+        logger.info(submission_summary)
+        return submission_summary
+
+
+class KaggleManager(Role):
+    def __init__(
+        self, name="ABC", profile="KaggleManager", goal="", competition="titanic", data_desc=""
+    ):
+        super().__init__(name=name, profile=profile, goal=goal)
+        self._init_actions([DownloadData, SubmitResult])
+        self._watch([BossRequirement, SummarizeAnalysis])
+        self.competition = competition
+        self.data_desc = data_desc  # currently passed in, later can be scrapped down from web by another Role
+
+    async def _think(self):
+        observed = self.get_memories()[-1].cause_by
+        if observed == BossRequirement:
+            self._set_state(0)  # DownloadData, get competition of interest from human, download datasets
+        elif observed == SummarizeAnalysis:
+            self._set_state(1)  # SubmitResult, get prediction from MLEngineer and submit it to Kaggle
+
+    async def _act(self):
+        todo = self._rc.todo
+        logger.info(f"{self._setting}: ready to {self._rc.todo}")
+
+        if isinstance(todo, DownloadData):
+            rsp = await todo.run(self.competition, self.data_desc)
+
+        elif isinstance(todo, SubmitResult):
+            submit_message = self.get_memories()[-1].content  # use analysis summary from MLEngineer as submission message
+            rsp = await todo.run(competition=self.competition, submit_message=submit_message)
+
+        msg = Message(content=rsp, role="user", cause_by=type(todo))
+
+        return msg
+
+if __name__ == "__main__":
+    competition, data_desc, requirement = (
+        "titanic",
+        "Training set is train.csv.\nTest set is test.csv. We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.",
+        "Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
+    )
+
+    summary = "I used Python with pandas for data preprocessing, sklearn's RandomForestClassifier for modeling, and achieved 82.12% accuracy on validation. Predictions saved at '/Users/gary/Desktop/data_agents_opt/workspace/titanic/gender_submission.csv'."
+
+    async def main(requirement: str = requirement):
+        role = KaggleManager(competition=competition, data_desc=data_desc)
+        # await role.run(Message(content="", cause_by=BossRequirement))
+        await role.run(Message(content=summary, cause_by=SummarizeAnalysis))
+
+    fire.Fire(main)
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@ -8,78 +8,49 @@ import re
 from metagpt.roles import Role
 from metagpt.actions import Action
 from metagpt.schema import Message, Task, Plan
+from metagpt.memory import Memory
 from metagpt.logs import logger
-from metagpt.actions.write_plan import WritePlan
+from metagpt.actions.write_plan import WritePlan, update_plan_from_rsp, precheck_update_plan_from_rsp
 from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
+from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis, Reflect, ReviewConst
 from metagpt.actions.execute_code import ExecutePyCode
+from metagpt.roles.kaggle_manager import DownloadData, SubmitResult
+from metagpt.prompts.ml_engineer import STRUCTURAL_CONTEXT
 from metagpt.actions.write_code_steps import WriteCodeSteps

-STRUCTURAL_CONTEXT = """
-## User Requirement
-{user_requirement}
-## Current Plan
-{tasks}
-## Current Task
-{current_task}
-"""
-
-
-def truncate(result: str, keep_len: int = 1000) -> str:
-    desc = "Truncated to show only the last 1000 characters\n"
-    if result.startswith(desc):
-        result = result[-len(desc) :]
-
-    if len(result) > keep_len:
-        result = result[-keep_len:]
-
-    if not result.startswith(desc):
-        return desc + result
-    return desc
-
-
-def remove_escape_and_color_codes(input_str):
-    # 使用正则表达式去除转义字符和颜色代码
-    pattern = re.compile(r'\x1b\[[0-9;]*[mK]')
-    result = pattern.sub('', input_str)
-    return result
-
-
-class AskReview(Action):
-    async def run(self, context: List[Message], plan: Plan = None):
-        logger.info("Current overall plan:")
-        logger.info(
-            "\n".join([f"{task.task_id}: {task.instruction}, is_finished: {task.is_finished}" for task in plan.tasks])
-        )
-
-        logger.info("most recent context:")
-        latest_action = context[-1].cause_by.__name__ if context[-1].cause_by else ""
-        prompt = f"\nPlease review output from {latest_action}:\n" \
-            "If you want to change a task in the plan, say 'change task task_id, ... (things to change)'\n" \
-            "If you confirm the output and wish to continue with the current process, type CONFIRM\n" \
-            "If you want to terminate the process, type exit:\n"
-        rsp = input(prompt)
-
-        if rsp.lower() in ("exit"):
-            exit()
-
-        confirmed = rsp.lower() in ("confirm", "yes", "y")
-
-        return rsp, confirmed
-
-
 class MLEngineer(Role):
    def __init__(
        self, name="ABC", profile="MLEngineer", goal="", auto_run: bool = False
    ):
        super().__init__(name=name, profile=profile, goal=goal)
        self._set_react_mode(react_mode="plan_and_act")
+        self._watch([DownloadData, SubmitResult])
+
        self.plan = Plan(goal=goal)
        self.use_tools = False
        self.use_code_steps = True
        self.execute_code = ExecutePyCode()
        self.auto_run = auto_run

+        # memory for working on each task, discarded each time a task is done
+        self.working_memory = Memory()
+
    async def _plan_and_act(self):
+
+        ### Actions in a multi-agent multi-turn setting ###
+        memories = self.get_memories()
+        if memories:
+            latest_event = memories[-1].cause_by
+            if latest_event == DownloadData:
+                self.plan.context = memories[-1].content
+            elif latest_event == SubmitResult:
+                # self reflect on previous plan outcomes and think about how to improve the plan, add to working  memory
+                await self._reflect()
+
+                # get feedback for improvement from human, add to working memory
+                await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
+
+        ### Common Procedure in both single- and multi-agent setting ###
        # create initial plan and update until confirmation
        await self._update_plan()

@ -91,9 +62,9 @@ class MLEngineer(Role):
            code, result, success, code_steps = await self._write_and_exec_code()

            # ask for acceptance, users can other refuse and change tasks in the plan
-            task_result_confirmed = await self._ask_review()
+            review, task_result_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)

-            if success and task_result_confirmed:
+            if task_result_confirmed:
                # tick off this task and record progress
                task.code = code
                task.result = result
@ -101,9 +72,29 @@ class MLEngineer(Role):
                self.plan.finish_current_task()
                self.working_memory.clear()

+                confirmed_and_more = (ReviewConst.CONTINUE_WORD[0] in review.lower()
+                    and review.lower() not in ReviewConst.CONTINUE_WORD[0])  # "confirm, ... (more content, such as changing downstream tasks)"
+                if confirmed_and_more:
+                    self.working_memory.add(Message(content=review, role="user", cause_by=AskReview))
+                    await self._update_plan(review)
+            
+            elif "redo" in review:
+                # Ask the Role to redo this task with help of review feedback,
+                # useful when the code run is successful but the procedure or result is not what we want
+                continue
+
            else:
                # update plan according to user's feedback and to take on changed tasks
-                await self._update_plan()
+                await self._update_plan(review)
+
+        completed_plan_memory = self.get_useful_memories()  # completed plan as a outcome
+        self._rc.memory.add(completed_plan_memory[0])  # add to persistent memory
+
+        summary = await SummarizeAnalysis().run(self.plan)
+        rsp = Message(content=summary, cause_by=SummarizeAnalysis)
+        self._rc.memory.add(rsp)
+
+        return rsp

    async def _write_and_exec_code(self, max_retry: int = 3):
        code_steps = (
@ -139,34 +130,35 @@ class MLEngineer(Role):
            )

            result, success = await self.execute_code.run(code)
-            # truncated the result
-            _keep_result_len = 2000
-            truncate_result = truncate(remove_escape_and_color_codes(result), keep_len=_keep_result_len)
-            print(truncate_result)
-            # print(result)
+            print(result)
            self.working_memory.add(
-                Message(content=truncate_result, role="user", cause_by=ExecutePyCode)
+                Message(content=result, role="user", cause_by=ExecutePyCode)
            )

            if "!pip" in code:
                success = False
-            # if not success:
-            #     await self._ask_review()

            counter += 1

+            if not success and counter >= max_retry:
+                logger.info("coding failed!")
+                review, _ = await self._ask_review(auto_run=False, trigger=ReviewConst.CODE_REVIEW_TRIGGER)
+                if ReviewConst.CHANGE_WORD in review:
+                    counter = 0  # redo the task again with help of human suggestions
+
        return code, result, success, code_steps

-    async def _ask_review(self):
-        if not self.auto_run:
+    async def _ask_review(self, auto_run: bool = None, trigger: str = ReviewConst.TASK_REVIEW_TRIGGER):
+        auto_run = auto_run or self.auto_run
+        if not auto_run:
            context = self.get_useful_memories()
-            review, confirmed = await AskReview().run(context=context[-5:], plan=self.plan)
+            review, confirmed = await AskReview().run(context=context[-5:], plan=self.plan, trigger=trigger)
            if not confirmed:
                self.working_memory.add(Message(content=review, role="user", cause_by=AskReview))
-            return confirmed
-        return True
+            return review, confirmed
+        return "", True

-    async def _update_plan(self, max_tasks: int = 3):
+    async def _update_plan(self, review: str = "", max_tasks: int = 3, max_retries: int = 3):
        plan_confirmed = False
        while not plan_confirmed:
            context = self.get_useful_memories()
@ -176,30 +168,50 @@ class MLEngineer(Role):
            self.working_memory.add(
                Message(content=rsp, role="assistant", cause_by=WritePlan)
            )
-            plan_confirmed = await self._ask_review()

-        tasks = WritePlan.rsp_to_tasks(rsp)
-        self.plan.add_tasks(tasks)
+            # precheck plan before asking reviews
+            is_plan_valid, error = precheck_update_plan_from_rsp(rsp, self.plan)
+            if not is_plan_valid and max_retries > 0:
+                error_msg = f"The generated plan is not valid with error: {error}, try regenerating, remember to generate either the whole plan or the single changed task only"
+                logger.warning(error_msg)
+                self.working_memory.add(Message(content=error_msg, role="assistant", cause_by=WritePlan))
+                max_retries -= 1
+                continue
+
+            _, plan_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
+
+        update_plan_from_rsp(rsp, self.plan)
+
        self.working_memory.clear()
+    
+    async def _reflect(self):
+        context = self.get_memories()
+        context = "\n".join([str(msg) for msg in context])
+        # print("*" * 10)
+        # print(context)
+        # print("*" * 10)
+        reflection = await Reflect().run(context=context)
+        self.working_memory.add(Message(content=reflection, role="assistant"))
+        self.working_memory.add(Message(content=Reflect.REWRITE_PLAN_INSTRUCTION, role="user"))

    def get_useful_memories(self) -> List[Message]:
        """find useful memories only to reduce context length and improve performance"""
        # TODO dataset description , code steps
        user_requirement = self.plan.goal
+        data_desc = self.plan.context
        tasks = json.dumps(
            [task.dict() for task in self.plan.tasks], indent=4, ensure_ascii=False
        )
        current_task = self.plan.current_task.json() if self.plan.current_task else {}
        context = STRUCTURAL_CONTEXT.format(
-            user_requirement=user_requirement, tasks=tasks, current_task=current_task
+            user_requirement=user_requirement, data_desc=data_desc, tasks=tasks, current_task=current_task
        )
        context_msg = [Message(content=context, role="user")]

-        return context_msg + self.working_memory.get()
-
-    @property
-    def working_memory(self):
-        return self._rc.memory
+        return context_msg + self.get_working_memories()
+    
+    def get_working_memories(self) -> List[Message]:
+        return self.working_memory.get()


 if __name__ == "__main__":
--- a/metagpt/schema.py
+++ b/metagpt/schema.py
@ -86,6 +86,7 @@ class Task(BaseModel):

 class Plan(BaseModel):
    goal: str
+    context: str = ""
    tasks: list[Task] = []
    task_map: dict[str, Task] = {}
    current_task_id = ""
@ -149,14 +150,81 @@ class Plan(BaseModel):
            self.tasks = final_tasks
        
        # Update current_task_id to the first unfinished task in the merged list
-        for task in self.tasks:
-            if not task.is_finished:
-                self.current_task_id = task.task_id
-                break
+        self._update_current_task()

        # Update the task map for quick access to tasks by ID
        self.task_map = {task.task_id: task for task in self.tasks}
+    
+    def reset_task(self, task_id: str):
+        """
+        Clear code and result of the task based on task_id, and set the task as unfinished.

+        Args:
+            task_id (str): The ID of the task to be reset.
+
+        Returns:
+            None
+        """
+        if task_id in self.task_map:
+            task = self.task_map[task_id]
+            task.code = ""
+            task.result = ""
+            task.is_finished = False
+
+    def replace_task(self, new_task: Task):
+        """
+        Replace an existing task with the new input task based on task_id, and reset all tasks depending on it.
+
+        Args:
+            new_task (Task): The new task that will replace an existing one.
+
+        Returns:
+            None
+        """
+        if new_task.task_id in self.task_map:
+            # Replace the task in the task map and the task list
+            self.task_map[new_task.task_id] = new_task
+            for i, task in enumerate(self.tasks):
+                if task.task_id == new_task.task_id:
+                    self.tasks[i] = new_task
+                    break
+
+            # Reset dependent tasks
+            for task in self.tasks:
+                if new_task.task_id in task.dependent_task_ids:
+                    self.reset_task(task.task_id)
+
+    def append_task(self, new_task: Task):
+        """
+        Append a new task to the end of existing task sequences
+
+        Args:
+            new_task (Task): The new task to be appended to the existing task sequence
+        
+        Returns:
+            None
+        """
+        assert not self.has_task_id(new_task.task_id), "Task already in current plan, use replace_task instead"
+
+        assert all([self.has_task_id(dep_id) for dep_id in new_task.dependent_task_ids]), \
+            "New task has unknown dependencies"
+
+        # Existing tasks do not depend on the new task, it's fine to put it to the end of the sorted task sequence
+        self.tasks.append(new_task)
+        self.task_map[new_task.task_id] = new_task
+        self._update_current_task()
+
+    def has_task_id(self, task_id: str) -> bool:
+        return task_id in self.task_map
+
+    def _update_current_task(self):
+        current_task_id = ""
+        for task in self.tasks:
+            if not task.is_finished:
+                current_task_id = task.task_id
+                break
+        self.current_task_id = current_task_id  # all tasks finished
+    
    @property
    def current_task(self) -> Task:
        """Find current task to execute
@ -170,10 +238,8 @@ class Plan(BaseModel):
        """Finish current task, set Task.is_finished=True, set current task to next task
        """
        if self.current_task_id:
-            current_task = self.current_task
-            current_task.is_finished = True
-            next_task_index = self.tasks.index(current_task) + 1
-            self.current_task_id = self.tasks[next_task_index].task_id if next_task_index < len(self.tasks) else None
+            self.current_task.is_finished = True
+            self._update_current_task()  # set to next task

    def get_finished_tasks(self) -> list[Task]:
        """return all finished tasks in correct linearized order
--- a/tests/metagpt/actions/test_write_plan.py
+++ b/tests/metagpt/actions/test_write_plan.py
@ -1,13 +1,15 @@
 import pytest

-from metagpt.actions.write_plan import WritePlan
+from metagpt.actions.write_plan import WritePlan, precheck_update_plan_from_rsp, Plan, Task

+def test_precheck_update_plan_from_rsp():
+    plan = Plan(goal="")
+    plan.add_tasks([Task(task_id="1")])
+    rsp = '[{"task_id": "2"}]'
+    success, _ = precheck_update_plan_from_rsp(rsp, plan)
+    assert success
+    assert len(plan.tasks) == 1 and plan.tasks[0].task_id == "1"  # precheck should not change the original one

-@pytest.mark.asyncio
-async def test_plan():
-    p = WritePlan()
-    task_desc = """Here’s some background information on Cyclistic, a bike-sharing company designing a marketing strategy aimed at converting casual riders into annual members: So far, Cyclistic’s marketing strategy has relied on building general awareness and engaging a wide range of consumers. group. One way to help achieve these goals is the flexibility of its pricing plans: one-way passes, full-day passes, and annual memberships. Customers who purchase a one-way or full-day pass are known as recreational riders. Customers purchasing an annual membership are Cyclistic members. I will provide you with a data sheet that records user behavior: '/Users/vicis/Downloads/202103-divvy-tripdata.csv"""
-    rsp = await p.run(task_desc, role="data analyst")
-    assert len(rsp.content) > 0
-    assert rsp.sent_from == "WritePlan"
-    print(rsp)
+    invalid_rsp = 'wrong'
+    success, _ = precheck_update_plan_from_rsp(invalid_rsp, plan)
+    assert not success
--- a/tests/metagpt/test_schema.py
+++ b/tests/metagpt/test_schema.py
@ -5,6 +5,7 @@
@Author  : alexanderwu
@File    : test_schema.py
 """
+import pytest
 from metagpt.schema import AIMessage, Message, SystemMessage, UserMessage
 from metagpt.schema import Task, Plan

@ -104,3 +105,82 @@ class TestPlan:
        finished_tasks = plan.get_finished_tasks()
        assert len(finished_tasks) == 1
        assert finished_tasks[0].task_id == "1"
+
+    def test_reset_task_existing(self):
+        plan = Plan(goal="")
+        task = Task(task_id="1", instruction="Do something", code="print('Hello')", result="Hello", finished=True)
+        plan.add_tasks([task])
+        plan.reset_task("1")
+        reset_task = plan.task_map["1"]
+        assert reset_task.code == ""
+        assert reset_task.result == ""
+        assert not reset_task.is_finished
+
+    def test_reset_task_non_existing(self):
+        plan = Plan(goal="")
+        task = Task(task_id="1", instruction="Do something", code="print('Hello')", result="Hello", finished=True)
+        plan.add_tasks([task])
+        plan.reset_task("2")  # Task with ID 2 does not exist
+        assert "1" in plan.task_map
+        assert "2" not in plan.task_map
+
+    def test_replace_task_with_dependents(self):
+        plan = Plan(goal="")
+        tasks = [Task(task_id="1", instruction="First Task", finished=True),
+                 Task(task_id="2", instruction="Second Task", dependent_task_ids=["1"], finished=True)]
+        plan.add_tasks(tasks)
+        new_task = Task(task_id="1", instruction="Updated First Task")
+        plan.replace_task(new_task)
+        assert plan.task_map["1"].instruction == "Updated First Task"
+        assert not plan.task_map["2"].is_finished  # Dependent task should be reset
+        assert plan.task_map["2"].code == ""
+        assert plan.task_map["2"].result == ""
+
+    def test_replace_task_non_existing(self):
+        plan = Plan(goal="")
+        task = Task(task_id="1", instruction="First Task")
+        plan.add_tasks([task])
+        new_task = Task(task_id="2", instruction="New Task")
+        plan.replace_task(new_task)  # Task with ID 2 does not exist in plan
+        assert "1" in plan.task_map
+        assert "2" not in plan.task_map
+    
+    def test_append_task_with_valid_dependencies(self):
+        plan = Plan(goal="Test")
+        existing_task = [Task(task_id="1")]
+        plan.add_tasks(existing_task)
+        new_task = Task(task_id="2", dependent_task_ids=["1"])
+        plan.append_task(new_task)
+        assert plan.tasks[-1].task_id == "2"
+        assert plan.task_map["2"] == new_task
+
+    def test_append_task_with_invalid_dependencies(self):
+        new_task = Task(task_id="2", dependent_task_ids=["3"])
+        plan = Plan(goal="Test")
+        with pytest.raises(AssertionError):
+            plan.append_task(new_task)
+    
+    def test_append_task_without_dependencies(self):
+        plan = Plan(goal="Test")
+        existing_task = [Task(task_id="1")]
+        plan.add_tasks(existing_task)
+
+        new_task = Task(task_id="2")
+        plan.append_task(new_task)
+
+        assert len(plan.tasks) == 2
+        assert plan.current_task_id == "1"
+
+    def test_append_task_updates_current_task(self):
+        finished_task = Task(task_id="1", is_finished=True)
+        new_task = Task(task_id="2")
+        plan = Plan(goal="Test", tasks=[finished_task])
+        plan.append_task(new_task)
+        assert plan.current_task_id == "2"
+
+    def test_update_current_task(self):
+        task1 = Task(task_id="1", is_finished=True)
+        task2 = Task(task_id="2")
+        plan = Plan(goal="Test", tasks=[task1, task2])
+        plan._update_current_task()
+        assert plan.current_task_id == "2"