From 68635ff4aaac7af6abcf324a95baeb28cbd38cc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Thu, 30 Nov 2023 15:43:13 +0800
Subject: [PATCH 01/12] add typing-extensions-4.8.0 for nbclient

---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index c72260c04..1d1bc95a1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -50,4 +50,5 @@ nbclient==0.9.0
 nbformat==5.9.2
 ipython==8.17.2
 ipykernel==6.27.0
-scikit_learn==1.3.2
\ No newline at end of file
+scikit_learn==1.3.2
+typing-extensions==4.8.0
\ No newline at end of file

From b28111ab3476f6ce51beffa60819ab82f1fc28b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Thu, 30 Nov 2023 16:05:56 +0800
Subject: [PATCH 02/12] fix: "image/png" not in output["data"].

---
 metagpt/actions/execute_code.py            |   9 +-
 tests/metagpt/actions/test_execute_code.py | 114 +++++++++++++--------
 2 files changed, 76 insertions(+), 47 deletions(-)

diff --git a/metagpt/actions/execute_code.py b/metagpt/actions/execute_code.py
index 7b16d559a..981aa894c 100644
--- a/metagpt/actions/execute_code.py
+++ b/metagpt/actions/execute_code.py
@@ -17,6 +17,7 @@ from rich.syntax import Syntax
 
 from metagpt.actions import Action
 from metagpt.schema import Message
+from metagpt.logs import logger
 
 
 class ExecuteCode(ABC):
@@ -90,11 +91,14 @@ class ExecutePyCode(ExecuteCode, Action):
         if not outputs:
             return parsed_output
 
-        for output in outputs:
+        for i, output in enumerate(outputs):
             if output["output_type"] == "stream":
                 parsed_output += output["text"]
             elif output["output_type"] == "display_data":
-                self.show_bytes_figure(output["data"]["image/png"], self.interaction)
+                if "image/png" in output["data"]:
+                    self.show_bytes_figure(output["data"]["image/png"], self.interaction)
+                else:
+                    logger.info(f"{i}th output['data'] from nbclient outputs dont have image/png, continue next output ...")
             elif output["output_type"] == "execute_result":
                 parsed_output += output["data"]["text/plain"]
         return parsed_output
@@ -136,7 +140,6 @@ class ExecutePyCode(ExecuteCode, Action):
 
         if isinstance(code, str):
             return code, language
-
         if isinstance(code, dict):
             assert "code" in code
             if "language" not in code:
diff --git a/tests/metagpt/actions/test_execute_code.py b/tests/metagpt/actions/test_execute_code.py
index 88c5adf18..8894f2cb9 100644
--- a/tests/metagpt/actions/test_execute_code.py
+++ b/tests/metagpt/actions/test_execute_code.py
@@ -1,57 +1,83 @@
 import pytest
 
-from metagpt.actions import ExecutePyCode
+from metagpt.actions.execute_code import ExecutePyCode
 from metagpt.schema import Message
 
 
-@pytest.mark.asyncio
-async def test_code_running():
-    pi = ExecutePyCode()
-    output = await pi.run("print('hello world!')")
-    assert output.state == "done"
-    output = await pi.run({"code": "print('hello world!')", "language": "python"})
-    assert output.state == "done"
-    code_msg = Message("print('hello world!')")
-    output = await pi.run(code_msg)
-    assert output.state == "done"
+# @pytest.mark.asyncio
+# async def test_code_running():
+#     pi = ExecutePyCode()
+#     output = await pi.run("print('hello world!')")
+#     assert output[1] is True
+#     output = await pi.run({"code": "print('hello world!')", "language": "python"})
+#     assert output[1] is True
+#     code_msg = Message("print('hello world!')")
+#     output = await pi.run(code_msg)
+#     assert output[1] is True
+
+
+# @pytest.mark.asyncio
+# async def test_split_code_running():
+#     pi = ExecutePyCode()
+#     output = await pi.run("x=1\ny=2")
+#     output = await pi.run("z=x+y")
+#     output = await pi.run("assert z==3")
+#     assert output[1] is True
+
+
+# @pytest.mark.asyncio
+# async def test_execute_error():
+#     pi = ExecutePyCode()
+#     output = await pi.run("z=1/0")
+#     assert output[1] is False
+
+
+# @pytest.mark.asyncio
+# async def test_plotting_code():
+#     pi = ExecutePyCode()
+#     code = """
+#     import numpy as np
+#     import matplotlib.pyplot as plt
+
+#     # 生成随机数据
+#     random_data = np.random.randn(1000)  # 生成1000个符合标准正态分布的随机数
+
+#     # 绘制直方图
+#     plt.hist(random_data, bins=30, density=True, alpha=0.7, color='blue', edgecolor='black')
+
+#     # 添加标题和标签
+#     plt.title('Histogram of Random Data')
+#     plt.xlabel('Value')
+#     plt.ylabel('Frequency')
+
+#     # 显示图形
+#     plt.show()
+#     """
+#     output = await pi.run(code)
+#     assert output[1] is True
 
 
 @pytest.mark.asyncio
-async def test_split_code_running():
-    pi = ExecutePyCode()
-    output = await pi.run("x=1\ny=2")
-    output = await pi.run("z=x+y")
-    output = await pi.run("assert z==3")
-    assert output.state == "done"
-
-
-@pytest.mark.asyncio
-async def test_execute_error():
-    pi = ExecutePyCode()
-    output = await pi.run("z=1/0")
-    assert output.state == "error"
-
-
-@pytest.mark.asyncio
-async def test_plotting_code():
-    pi = ExecutePyCode()
+async def test_plotting_bug():
     code = """
-    import numpy as np
     import matplotlib.pyplot as plt
-
-    # 生成随机数据
-    random_data = np.random.randn(1000)  # 生成1000个符合标准正态分布的随机数
-
-    # 绘制直方图
-    plt.hist(random_data, bins=30, density=True, alpha=0.7, color='blue', edgecolor='black')
-
-    # 添加标题和标签
-    plt.title('Histogram of Random Data')
-    plt.xlabel('Value')
-    plt.ylabel('Frequency')
-
-    # 显示图形
+    import seaborn as sns
+    import pandas as pd
+    from sklearn.datasets import load_iris
+    # Load the Iris dataset
+    iris_data = load_iris()
+    # Convert the loaded Iris dataset into a DataFrame for easier manipulation
+    iris_df = pd.DataFrame(iris_data['data'], columns=iris_data['feature_names'])
+    # Add a column for the target
+    iris_df['species'] = pd.Categorical.from_codes(iris_data['target'], iris_data['target_names'])
+    # Set the style of seaborn
+    sns.set(style='whitegrid')
+    # Create a pairplot of the iris dataset
+    plt.figure(figsize=(10, 8))
+    pairplot = sns.pairplot(iris_df, hue='species')
+    # Show the plot
     plt.show()
     """
+    pi = ExecutePyCode()
     output = await pi.run(code)
-    assert output.state == "done"
+    assert output[1] is True

From 8aa096a33469fb5c3730d5c9b413772c1c7f2f8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Thu, 30 Nov 2023 16:07:12 +0800
Subject: [PATCH 03/12] fix: remove escape and color codes for output of
 nbclient.

---
 metagpt/roles/ml_engineer.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index 1e4367372..5120a9011 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -3,6 +3,7 @@ import json
 import subprocess
 
 import fire
+import re
 
 from metagpt.roles import Role
 from metagpt.actions import Action
@@ -35,6 +36,13 @@ def truncate(result: str, keep_len: int = 1000) -> str:
     return desc
 
 
+def remove_escape_and_color_codes(input_str):
+    # 使用正则表达式去除转义字符和颜色代码
+    pattern = re.compile(r'\x1b\[[0-9;]*[mK]')
+    result = pattern.sub('', input_str)
+    return result
+
+
 class AskReview(Action):
     async def run(self, context: List[Message], plan: Plan = None):
         logger.info("Current overall plan:")
@@ -137,8 +145,9 @@ class MLEngineer(Role):
             # truncated the result
             print(truncate(result))
             # print(result)
+            _result = truncate(remove_escape_and_color_codes(result))
             self.working_memory.add(
-                Message(content=result, role="user", cause_by=ExecutePyCode)
+                Message(content=_result, role="user", cause_by=ExecutePyCode)
             )
 
             if "!pip" in code:

From 2dd754d97740243602edec17a4611bbaa8a0c0dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Thu, 30 Nov 2023 16:36:35 +0800
Subject: [PATCH 04/12] fix: reuse variables.

---
 metagpt/actions/write_analysis_code.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py
index 66e2137fe..ee4555ee1 100644
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@@ -40,8 +40,8 @@ class BaseWriteAnalysisCode(Action):
 
 class WriteCodeByGenerate(BaseWriteAnalysisCode):
     """Write code fully by generation"""
-    DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: Use !pip install in a standalone block to install missing packages.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
-    REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!"""
+    DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Reuse variables in other code directly. Use !pip install in a standalone block to install missing packages.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
+    # REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!"""
 
     def __init__(self, name: str = "", context=None, llm=None) -> str:
         super().__init__(name, context, llm)
@@ -89,7 +89,7 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
         system_msg: str = None,
         **kwargs,
     ) -> str:
-        context.append(Message(content=self.REUSE_CODE_INSTRUCTION, role="user"))
+        # context.append(Message(content=self.REUSE_CODE_INSTRUCTION, role="user"))
         prompt = self.process_msg(context, system_msg)
         code_content = await self.llm.aask_code(prompt, **kwargs)
         return code_content["code"]

From 9d49caa8cc8566aeee5a8f8a7ad0c22d1271dae6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Thu, 30 Nov 2023 17:09:43 +0800
Subject: [PATCH 05/12] test: set temperature=0.0

---
 tests/metagpt/actions/test_write_analysis_code.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/metagpt/actions/test_write_analysis_code.py b/tests/metagpt/actions/test_write_analysis_code.py
index 80d9438af..d4bccb552 100644
--- a/tests/metagpt/actions/test_write_analysis_code.py
+++ b/tests/metagpt/actions/test_write_analysis_code.py
@@ -159,7 +159,7 @@ async def test_write_code_reuse_code_long():
         Message(content=structural_context, role="user"),
     ]
     trials_num = 5
-    trials = [WriteCodeByGenerate().run(context=context) for _ in range(trials_num)]
+    trials = [WriteCodeByGenerate().run(context=context, temperature=0.0) for _ in range(trials_num)]
     trial_results = await asyncio.gather(*trials)
     print(*trial_results, sep="\n\n***\n\n")
     success = ["load_iris" not in result and "iris_data" in result \

From 870ece45b23dbdd27fb9407b8127865a21279d8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Thu, 30 Nov 2023 17:10:36 +0800
Subject: [PATCH 06/12] fix: set temperature=0.0 for WriteCodeByGenerate.

---
 metagpt/roles/ml_engineer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index 5120a9011..f5bb559e1 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -128,7 +128,7 @@ class MLEngineer(Role):
             if not self.use_tools or self.plan.current_task.task_type == "":
                 # code = "print('abc')"
                 code = await WriteCodeByGenerate().run(
-                    context=context, plan=self.plan, task_guide=task_guide
+                    context=context, plan=self.plan, task_guide=task_guide, temperature=0.0
                 )
                 cause_by = WriteCodeByGenerate
             else:

From c2dba151fbe139291d8fd185aea87e15a04a093a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Thu, 30 Nov 2023 17:42:55 +0800
Subject: [PATCH 07/12] add unit test : write_code_reuse_code_long_for_wine.

---
 .../actions/test_write_analysis_code.py       | 274 +++++++++++-------
 1 file changed, 173 insertions(+), 101 deletions(-)

diff --git a/tests/metagpt/actions/test_write_analysis_code.py b/tests/metagpt/actions/test_write_analysis_code.py
index d4bccb552..1a727a9e4 100644
--- a/tests/metagpt/actions/test_write_analysis_code.py
+++ b/tests/metagpt/actions/test_write_analysis_code.py
@@ -6,110 +6,110 @@ from metagpt.actions.execute_code import ExecutePyCode
 from metagpt.schema import Message
 from metagpt.logs import logger
 
-@pytest.mark.asyncio
-async def test_write_code_by_list_plan():
-    write_code = WriteCodeByGenerate()
-    execute_code = ExecutePyCode()
-    messages = []
-    plan = ["随机生成一个pandas DataFrame时间序列", "绘制这个时间序列的直方图", "求均值"]
-    for task in plan:
-        print(f"\n任务: {task}\n\n")
-        messages.append(Message(task, role='assistant'))
-        code = await write_code.run(messages)
-        messages.append(Message(code, role='assistant'))
-        assert len(code) > 0
-        output = await execute_code.run(code)
-        print(f"\n[Output]: 任务{task}的执行结果是: \n{output}\n")
-        messages.append(output[0])
+# @pytest.mark.asyncio
+# async def test_write_code_by_list_plan():
+#     write_code = WriteCodeByGenerate()
+#     execute_code = ExecutePyCode()
+#     messages = []
+#     plan = ["随机生成一个pandas DataFrame时间序列", "绘制这个时间序列的直方图", "求均值"]
+#     for task in plan:
+#         print(f"\n任务: {task}\n\n")
+#         messages.append(Message(task, role='assistant'))
+#         code = await write_code.run(messages)
+#         messages.append(Message(code, role='assistant'))
+#         assert len(code) > 0
+#         output = await execute_code.run(code)
+#         print(f"\n[Output]: 任务{task}的执行结果是: \n{output}\n")
+#         messages.append(output[0])
 
-@pytest.mark.asyncio
-async def test_write_code_to_correct_error():
+# @pytest.mark.asyncio
+# async def test_write_code_to_correct_error():
 
-    structural_context = """
-    ## User Requirement
-    read a dataset test.csv and print its head
-    ## Current Plan
-    [
-        {
-            "task_id": "1",
-            "dependent_task_ids": [],
-            "instruction": "import pandas and load the dataset from 'test.csv'.",
-            "task_type": "",
-            "code": "",
-            "result": "",
-            "is_finished": false
-        },
-        {
-            "task_id": "2",
-            "dependent_task_ids": [
-                "1"
-            ],
-            "instruction": "Print the head of the dataset to display the first few rows.",
-            "task_type": "",
-            "code": "",
-            "result": "",
-            "is_finished": false
-        }
-    ]
-    ## Current Task
-    {"task_id": "1", "dependent_task_ids": [], "instruction": "import pandas and load the dataset from 'test.csv'.", "task_type": "", "code": "", "result": "", "is_finished": false}
-    """
-    wrong_code = """import pandas as pd\ndata = pd.read_excel('test.csv')\ndata"""  # use read_excel to read a csv
-    error = """
-    Traceback (most recent call last):
-        File "<stdin>", line 2, in <module>
-        File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 478, in read_excel
-            io = ExcelFile(io, storage_options=storage_options, engine=engine)
-        File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 1500, in __init__
-            raise ValueError(
-        ValueError: Excel file format cannot be determined, you must specify an engine manually.
-    """
-    context = [
-        Message(content=structural_context, role="user"),
-        Message(content=wrong_code, role="assistant"),
-        Message(content=error, role="user"),
-    ]
-    new_code = await WriteCodeByGenerate().run(context=context)
-    print(new_code)
-    assert "read_csv" in new_code # should correct read_excel to read_csv
+#     structural_context = """
+#     ## User Requirement
+#     read a dataset test.csv and print its head
+#     ## Current Plan
+#     [
+#         {
+#             "task_id": "1",
+#             "dependent_task_ids": [],
+#             "instruction": "import pandas and load the dataset from 'test.csv'.",
+#             "task_type": "",
+#             "code": "",
+#             "result": "",
+#             "is_finished": false
+#         },
+#         {
+#             "task_id": "2",
+#             "dependent_task_ids": [
+#                 "1"
+#             ],
+#             "instruction": "Print the head of the dataset to display the first few rows.",
+#             "task_type": "",
+#             "code": "",
+#             "result": "",
+#             "is_finished": false
+#         }
+#     ]
+#     ## Current Task
+#     {"task_id": "1", "dependent_task_ids": [], "instruction": "import pandas and load the dataset from 'test.csv'.", "task_type": "", "code": "", "result": "", "is_finished": false}
+#     """
+#     wrong_code = """import pandas as pd\ndata = pd.read_excel('test.csv')\ndata"""  # use read_excel to read a csv
+#     error = """
+#     Traceback (most recent call last):
+#         File "<stdin>", line 2, in <module>
+#         File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 478, in read_excel
+#             io = ExcelFile(io, storage_options=storage_options, engine=engine)
+#         File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 1500, in __init__
+#             raise ValueError(
+#         ValueError: Excel file format cannot be determined, you must specify an engine manually.
+#     """
+#     context = [
+#         Message(content=structural_context, role="user"),
+#         Message(content=wrong_code, role="assistant"),
+#         Message(content=error, role="user"),
+#     ]
+#     new_code = await WriteCodeByGenerate().run(context=context)
+#     print(new_code)
+#     assert "read_csv" in new_code # should correct read_excel to read_csv
 
-@pytest.mark.asyncio
-async def test_write_code_reuse_code_simple():
-    structural_context = """
-    ## User Requirement
-    read a dataset test.csv and print its head
-    ## Current Plan
-    [
-        {
-            "task_id": "1",
-            "dependent_task_ids": [],
-            "instruction": "import pandas and load the dataset from 'test.csv'.",
-            "task_type": "",
-            "code": "import pandas as pd\ndata = pd.read_csv('test.csv')",
-            "result": "",
-            "is_finished": true
-        },
-        {
-            "task_id": "2",
-            "dependent_task_ids": [
-                "1"
-            ],
-            "instruction": "Print the head of the dataset to display the first few rows.",
-            "task_type": "",
-            "code": "",
-            "result": "",
-            "is_finished": false
-        }
-    ]
-    ## Current Task
-    {"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Print the head of the dataset to display the first few rows.", "task_type": "", "code": "", "result": "", "is_finished": false}
-    """
-    context = [
-        Message(content=structural_context, role="user"),
-    ]
-    code = await WriteCodeByGenerate().run(context=context)
-    print(code)
-    assert "pandas" not in code and "read_csv" not in code # should reuse import and read statement from previous one
+# @pytest.mark.asyncio
+# async def test_write_code_reuse_code_simple():
+#     structural_context = """
+#     ## User Requirement
+#     read a dataset test.csv and print its head
+#     ## Current Plan
+#     [
+#         {
+#             "task_id": "1",
+#             "dependent_task_ids": [],
+#             "instruction": "import pandas and load the dataset from 'test.csv'.",
+#             "task_type": "",
+#             "code": "import pandas as pd\ndata = pd.read_csv('test.csv')",
+#             "result": "",
+#             "is_finished": true
+#         },
+#         {
+#             "task_id": "2",
+#             "dependent_task_ids": [
+#                 "1"
+#             ],
+#             "instruction": "Print the head of the dataset to display the first few rows.",
+#             "task_type": "",
+#             "code": "",
+#             "result": "",
+#             "is_finished": false
+#         }
+#     ]
+#     ## Current Task
+#     {"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Print the head of the dataset to display the first few rows.", "task_type": "", "code": "", "result": "", "is_finished": false}
+#     """
+#     context = [
+#         Message(content=structural_context, role="user"),
+#     ]
+#     code = await WriteCodeByGenerate().run(context=context)
+#     print(code)
+#     assert "pandas" not in code and "read_csv" not in code # should reuse import and read statement from previous one
 
 @pytest.mark.asyncio
 async def test_write_code_reuse_code_long():
@@ -167,3 +167,75 @@ async def test_write_code_reuse_code_long():
     success_rate = sum(success) / trials_num
     logger.info(f"success rate: {success_rate :.2f}")
     assert success_rate >= 0.8
+
+
+@pytest.mark.asyncio
+async def test_write_code_reuse_code_long_for_wine():
+    """test code reuse for long context"""
+
+    structural_context = """
+    ## User Requirement
+    Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy
+    ## Current Plan
+    [
+        {
+            "task_id": "1",
+            "dependent_task_ids": [],
+            "instruction": "Load the sklearn Wine recognition dataset and perform exploratory data analysis."
+            "task_type": "",
+            "code": "from sklearn.datasets import load_wine\n# Load the Wine recognition dataset\nwine_data = load_wine()\n# Perform exploratory data analysis\nwine_data.keys()",
+            "result": "Truncated to show only the last 1000 characters\ndict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])",
+            "is_finished": true
+        },
+        {
+            "task_id": "2",
+            "dependent_task_ids": ["1"],
+            "instruction": "Create a plot to visualize some aspect of the wine dataset."
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        },
+        {
+            "task_id": "3",
+            "dependent_task_ids": ["1"],
+            "instruction": "Split the dataset into training and validation sets with a 20% validation size.",
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        },
+        {
+            "task_id": "4",
+            "dependent_task_ids": ["3"],
+            "instruction": "Train a model on the training set to predict wine class.",
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        },
+        {
+            "task_id": "5",
+            "dependent_task_ids": ["4"],
+            "instruction": "Evaluate the model on the validation set and report the accuracy.",
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        }
+    ]
+    ## Current Task
+    {"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Create a plot to visualize some aspect of the Wine dataset.", "task_type": "", "code": "", "result": "", "is_finished": false}
+    """
+    context = [
+        Message(content=structural_context, role="user"),
+    ]
+    trials_num = 5
+    trials = [WriteCodeByGenerate().run(context=context, temperature=0.0) for _ in range(trials_num)]
+    trial_results = await asyncio.gather(*trials)
+    print(*trial_results, sep="\n\n***\n\n")
+    success = ["load_wine" not in result\
+        for result in trial_results]  # should reuse iris_data from previous tasks
+    success_rate = sum(success) / trials_num
+    logger.info(f"success rate: {success_rate :.2f}")
+    assert success_rate >= 0.8

From 25c536f3e10f9f08584c07b23ceca16dab85dc0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Thu, 30 Nov 2023 17:44:22 +0800
Subject: [PATCH 08/12] fix: reuse variables in code.

---
 metagpt/actions/write_analysis_code.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py
index ee4555ee1..2b56d6fc1 100644
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@@ -40,7 +40,7 @@ class BaseWriteAnalysisCode(Action):
 
 class WriteCodeByGenerate(BaseWriteAnalysisCode):
     """Write code fully by generation"""
-    DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Reuse variables in other code directly. Use !pip install in a standalone block to install missing packages.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
+    DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
     # REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!"""
 
     def __init__(self, name: str = "", context=None, llm=None) -> str:

From 87acf9b4535f6269a1869d0e054e7c713c04b82d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Thu, 30 Nov 2023 17:48:24 +0800
Subject: [PATCH 09/12] chore

---
 tests/metagpt/actions/test_write_analysis_code.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/metagpt/actions/test_write_analysis_code.py b/tests/metagpt/actions/test_write_analysis_code.py
index 1a727a9e4..e0c3c5230 100644
--- a/tests/metagpt/actions/test_write_analysis_code.py
+++ b/tests/metagpt/actions/test_write_analysis_code.py
@@ -234,7 +234,7 @@ async def test_write_code_reuse_code_long_for_wine():
     trials = [WriteCodeByGenerate().run(context=context, temperature=0.0) for _ in range(trials_num)]
     trial_results = await asyncio.gather(*trials)
     print(*trial_results, sep="\n\n***\n\n")
-    success = ["load_wine" not in result\
+    success = ["load_wine" not in result and "wine_data" in result\
         for result in trial_results]  # should reuse iris_data from previous tasks
     success_rate = sum(success) / trials_num
     logger.info(f"success rate: {success_rate :.2f}")

From 897d1bf0d0c737d465679a38352659485d80e570 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Thu, 30 Nov 2023 17:49:38 +0800
Subject: [PATCH 10/12] chore

---
 .../actions/test_write_analysis_code.py       | 202 +++++++++---------
 1 file changed, 101 insertions(+), 101 deletions(-)

diff --git a/tests/metagpt/actions/test_write_analysis_code.py b/tests/metagpt/actions/test_write_analysis_code.py
index e0c3c5230..211c6ba13 100644
--- a/tests/metagpt/actions/test_write_analysis_code.py
+++ b/tests/metagpt/actions/test_write_analysis_code.py
@@ -6,110 +6,110 @@ from metagpt.actions.execute_code import ExecutePyCode
 from metagpt.schema import Message
 from metagpt.logs import logger
 
-# @pytest.mark.asyncio
-# async def test_write_code_by_list_plan():
-#     write_code = WriteCodeByGenerate()
-#     execute_code = ExecutePyCode()
-#     messages = []
-#     plan = ["随机生成一个pandas DataFrame时间序列", "绘制这个时间序列的直方图", "求均值"]
-#     for task in plan:
-#         print(f"\n任务: {task}\n\n")
-#         messages.append(Message(task, role='assistant'))
-#         code = await write_code.run(messages)
-#         messages.append(Message(code, role='assistant'))
-#         assert len(code) > 0
-#         output = await execute_code.run(code)
-#         print(f"\n[Output]: 任务{task}的执行结果是: \n{output}\n")
-#         messages.append(output[0])
+@pytest.mark.asyncio
+async def test_write_code_by_list_plan():
+    write_code = WriteCodeByGenerate()
+    execute_code = ExecutePyCode()
+    messages = []
+    plan = ["随机生成一个pandas DataFrame时间序列", "绘制这个时间序列的直方图", "求均值"]
+    for task in plan:
+        print(f"\n任务: {task}\n\n")
+        messages.append(Message(task, role='assistant'))
+        code = await write_code.run(messages)
+        messages.append(Message(code, role='assistant'))
+        assert len(code) > 0
+        output = await execute_code.run(code)
+        print(f"\n[Output]: 任务{task}的执行结果是: \n{output}\n")
+        messages.append(output[0])
 
-# @pytest.mark.asyncio
-# async def test_write_code_to_correct_error():
+@pytest.mark.asyncio
+async def test_write_code_to_correct_error():
 
-#     structural_context = """
-#     ## User Requirement
-#     read a dataset test.csv and print its head
-#     ## Current Plan
-#     [
-#         {
-#             "task_id": "1",
-#             "dependent_task_ids": [],
-#             "instruction": "import pandas and load the dataset from 'test.csv'.",
-#             "task_type": "",
-#             "code": "",
-#             "result": "",
-#             "is_finished": false
-#         },
-#         {
-#             "task_id": "2",
-#             "dependent_task_ids": [
-#                 "1"
-#             ],
-#             "instruction": "Print the head of the dataset to display the first few rows.",
-#             "task_type": "",
-#             "code": "",
-#             "result": "",
-#             "is_finished": false
-#         }
-#     ]
-#     ## Current Task
-#     {"task_id": "1", "dependent_task_ids": [], "instruction": "import pandas and load the dataset from 'test.csv'.", "task_type": "", "code": "", "result": "", "is_finished": false}
-#     """
-#     wrong_code = """import pandas as pd\ndata = pd.read_excel('test.csv')\ndata"""  # use read_excel to read a csv
-#     error = """
-#     Traceback (most recent call last):
-#         File "<stdin>", line 2, in <module>
-#         File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 478, in read_excel
-#             io = ExcelFile(io, storage_options=storage_options, engine=engine)
-#         File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 1500, in __init__
-#             raise ValueError(
-#         ValueError: Excel file format cannot be determined, you must specify an engine manually.
-#     """
-#     context = [
-#         Message(content=structural_context, role="user"),
-#         Message(content=wrong_code, role="assistant"),
-#         Message(content=error, role="user"),
-#     ]
-#     new_code = await WriteCodeByGenerate().run(context=context)
-#     print(new_code)
-#     assert "read_csv" in new_code # should correct read_excel to read_csv
+    structural_context = """
+    ## User Requirement
+    read a dataset test.csv and print its head
+    ## Current Plan
+    [
+        {
+            "task_id": "1",
+            "dependent_task_ids": [],
+            "instruction": "import pandas and load the dataset from 'test.csv'.",
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        },
+        {
+            "task_id": "2",
+            "dependent_task_ids": [
+                "1"
+            ],
+            "instruction": "Print the head of the dataset to display the first few rows.",
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        }
+    ]
+    ## Current Task
+    {"task_id": "1", "dependent_task_ids": [], "instruction": "import pandas and load the dataset from 'test.csv'.", "task_type": "", "code": "", "result": "", "is_finished": false}
+    """
+    wrong_code = """import pandas as pd\ndata = pd.read_excel('test.csv')\ndata"""  # use read_excel to read a csv
+    error = """
+    Traceback (most recent call last):
+        File "<stdin>", line 2, in <module>
+        File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 478, in read_excel
+            io = ExcelFile(io, storage_options=storage_options, engine=engine)
+        File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 1500, in __init__
+            raise ValueError(
+        ValueError: Excel file format cannot be determined, you must specify an engine manually.
+    """
+    context = [
+        Message(content=structural_context, role="user"),
+        Message(content=wrong_code, role="assistant"),
+        Message(content=error, role="user"),
+    ]
+    new_code = await WriteCodeByGenerate().run(context=context)
+    print(new_code)
+    assert "read_csv" in new_code # should correct read_excel to read_csv
 
-# @pytest.mark.asyncio
-# async def test_write_code_reuse_code_simple():
-#     structural_context = """
-#     ## User Requirement
-#     read a dataset test.csv and print its head
-#     ## Current Plan
-#     [
-#         {
-#             "task_id": "1",
-#             "dependent_task_ids": [],
-#             "instruction": "import pandas and load the dataset from 'test.csv'.",
-#             "task_type": "",
-#             "code": "import pandas as pd\ndata = pd.read_csv('test.csv')",
-#             "result": "",
-#             "is_finished": true
-#         },
-#         {
-#             "task_id": "2",
-#             "dependent_task_ids": [
-#                 "1"
-#             ],
-#             "instruction": "Print the head of the dataset to display the first few rows.",
-#             "task_type": "",
-#             "code": "",
-#             "result": "",
-#             "is_finished": false
-#         }
-#     ]
-#     ## Current Task
-#     {"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Print the head of the dataset to display the first few rows.", "task_type": "", "code": "", "result": "", "is_finished": false}
-#     """
-#     context = [
-#         Message(content=structural_context, role="user"),
-#     ]
-#     code = await WriteCodeByGenerate().run(context=context)
-#     print(code)
-#     assert "pandas" not in code and "read_csv" not in code # should reuse import and read statement from previous one
+@pytest.mark.asyncio
+async def test_write_code_reuse_code_simple():
+    structural_context = """
+    ## User Requirement
+    read a dataset test.csv and print its head
+    ## Current Plan
+    [
+        {
+            "task_id": "1",
+            "dependent_task_ids": [],
+            "instruction": "import pandas and load the dataset from 'test.csv'.",
+            "task_type": "",
+            "code": "import pandas as pd\ndata = pd.read_csv('test.csv')",
+            "result": "",
+            "is_finished": true
+        },
+        {
+            "task_id": "2",
+            "dependent_task_ids": [
+                "1"
+            ],
+            "instruction": "Print the head of the dataset to display the first few rows.",
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        }
+    ]
+    ## Current Task
+    {"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Print the head of the dataset to display the first few rows.", "task_type": "", "code": "", "result": "", "is_finished": false}
+    """
+    context = [
+        Message(content=structural_context, role="user"),
+    ]
+    code = await WriteCodeByGenerate().run(context=context)
+    print(code)
+    assert "pandas" not in code and "read_csv" not in code # should reuse import and read statement from previous one
 
 @pytest.mark.asyncio
 async def test_write_code_reuse_code_long():

From f440ff69d04768da1f8183cb4386d36bd9886456 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Thu, 30 Nov 2023 18:04:55 +0800
Subject: [PATCH 11/12] chore

---
 tests/metagpt/actions/test_execute_code.py | 82 +++++++++++-----------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/tests/metagpt/actions/test_execute_code.py b/tests/metagpt/actions/test_execute_code.py
index 8894f2cb9..73b5886dc 100644
--- a/tests/metagpt/actions/test_execute_code.py
+++ b/tests/metagpt/actions/test_execute_code.py
@@ -4,57 +4,57 @@ from metagpt.actions.execute_code import ExecutePyCode
 from metagpt.schema import Message
 
 
-# @pytest.mark.asyncio
-# async def test_code_running():
-#     pi = ExecutePyCode()
-#     output = await pi.run("print('hello world!')")
-#     assert output[1] is True
-#     output = await pi.run({"code": "print('hello world!')", "language": "python"})
-#     assert output[1] is True
-#     code_msg = Message("print('hello world!')")
-#     output = await pi.run(code_msg)
-#     assert output[1] is True
+@pytest.mark.asyncio
+async def test_code_running():
+    pi = ExecutePyCode()
+    output = await pi.run("print('hello world!')")
+    assert output[1] is True
+    output = await pi.run({"code": "print('hello world!')", "language": "python"})
+    assert output[1] is True
+    code_msg = Message("print('hello world!')")
+    output = await pi.run(code_msg)
+    assert output[1] is True
 
 
-# @pytest.mark.asyncio
-# async def test_split_code_running():
-#     pi = ExecutePyCode()
-#     output = await pi.run("x=1\ny=2")
-#     output = await pi.run("z=x+y")
-#     output = await pi.run("assert z==3")
-#     assert output[1] is True
+@pytest.mark.asyncio
+async def test_split_code_running():
+    pi = ExecutePyCode()
+    output = await pi.run("x=1\ny=2")
+    output = await pi.run("z=x+y")
+    output = await pi.run("assert z==3")
+    assert output[1] is True
 
 
-# @pytest.mark.asyncio
-# async def test_execute_error():
-#     pi = ExecutePyCode()
-#     output = await pi.run("z=1/0")
-#     assert output[1] is False
+@pytest.mark.asyncio
+async def test_execute_error():
+    pi = ExecutePyCode()
+    output = await pi.run("z=1/0")
+    assert output[1] is False
 
 
-# @pytest.mark.asyncio
-# async def test_plotting_code():
-#     pi = ExecutePyCode()
-#     code = """
-#     import numpy as np
-#     import matplotlib.pyplot as plt
+@pytest.mark.asyncio
+async def test_plotting_code():
+    pi = ExecutePyCode()
+    code = """
+    import numpy as np
+    import matplotlib.pyplot as plt
 
-#     # 生成随机数据
-#     random_data = np.random.randn(1000)  # 生成1000个符合标准正态分布的随机数
+    # 生成随机数据
+    random_data = np.random.randn(1000)  # 生成1000个符合标准正态分布的随机数
 
-#     # 绘制直方图
-#     plt.hist(random_data, bins=30, density=True, alpha=0.7, color='blue', edgecolor='black')
+    # 绘制直方图
+    plt.hist(random_data, bins=30, density=True, alpha=0.7, color='blue', edgecolor='black')
 
-#     # 添加标题和标签
-#     plt.title('Histogram of Random Data')
-#     plt.xlabel('Value')
-#     plt.ylabel('Frequency')
+    # 添加标题和标签
+    plt.title('Histogram of Random Data')
+    plt.xlabel('Value')
+    plt.ylabel('Frequency')
 
-#     # 显示图形
-#     plt.show()
-#     """
-#     output = await pi.run(code)
-#     assert output[1] is True
+    # 显示图形
+    plt.show()
+    """
+    output = await pi.run(code)
+    assert output[1] is True
 
 
 @pytest.mark.asyncio

From 59af6d96921fadbba22c57ea171ab0725d8e5b0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= <liubangbang@fuzhi.ai>
Date: Fri, 1 Dec 2023 15:21:40 +0800
Subject: [PATCH 12/12] chore: remove _result.

---
 metagpt/roles/ml_engineer.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py
index f5bb559e1..ae346579b 100644
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@@ -145,9 +145,8 @@ class MLEngineer(Role):
             # truncated the result
             print(truncate(result))
             # print(result)
-            _result = truncate(remove_escape_and_color_codes(result))
             self.working_memory.add(
-                Message(content=_result, role="user", cause_by=ExecutePyCode)
+                Message(content=truncate(remove_escape_and_color_codes(result)), role="user", cause_by=ExecutePyCode)
             )
 
             if "!pip" in code: