Merge branch 'dev_reusecode' into 'dev'

Dev reusecode See merge request agents/data_agents_opt!12
2026-06-11 15:15:18 +02:00 · 2023-12-01 07:54:29 +00:00 · 2023-12-01 07:54:29 +00:00 · e36476056e
commit e36476056e
parent a9c8c6b73f 59af6d9692
6 changed files with 127 additions and 17 deletions
--- a/metagpt/actions/execute_code.py
+++ b/metagpt/actions/execute_code.py
@ -17,6 +17,7 @@ from rich.syntax import Syntax

 from metagpt.actions import Action
 from metagpt.schema import Message
+from metagpt.logs import logger


 class ExecuteCode(ABC):
@ -90,11 +91,14 @@ class ExecutePyCode(ExecuteCode, Action):
        if not outputs:
            return parsed_output

-        for output in outputs:
+        for i, output in enumerate(outputs):
            if output["output_type"] == "stream":
                parsed_output += output["text"]
            elif output["output_type"] == "display_data":
-                self.show_bytes_figure(output["data"]["image/png"], self.interaction)
+                if "image/png" in output["data"]:
+                    self.show_bytes_figure(output["data"]["image/png"], self.interaction)
+                else:
+                    logger.info(f"{i}th output['data'] from nbclient outputs dont have image/png, continue next output ...")
            elif output["output_type"] == "execute_result":
                parsed_output += output["data"]["text/plain"]
        return parsed_output
@ -136,7 +140,6 @@ class ExecutePyCode(ExecuteCode, Action):

        if isinstance(code, str):
            return code, language
-
        if isinstance(code, dict):
            assert "code" in code
            if "language" not in code:
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@ -40,8 +40,8 @@ class BaseWriteAnalysisCode(Action):

 class WriteCodeByGenerate(BaseWriteAnalysisCode):
    """Write code fully by generation"""
-    DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: Use !pip install in a standalone block to install missing packages.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
-    REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!"""
+    DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
+    # REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!"""

    def __init__(self, name: str = "", context=None, llm=None) -> str:
        super().__init__(name, context, llm)
@ -89,7 +89,7 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
        system_msg: str = None,
        **kwargs,
    ) -> str:
-        context.append(Message(content=self.REUSE_CODE_INSTRUCTION, role="user"))
+        # context.append(Message(content=self.REUSE_CODE_INSTRUCTION, role="user"))
        prompt = self.process_msg(context, system_msg)
        code_content = await self.llm.aask_code(prompt, **kwargs)
        return code_content["code"]
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@ -3,6 +3,7 @@ import json
 import subprocess

 import fire
+import re

 from metagpt.roles import Role
 from metagpt.actions import Action
@ -35,6 +36,13 @@ def truncate(result: str, keep_len: int = 1000) -> str:
    return desc


+def remove_escape_and_color_codes(input_str):
+    # 使用正则表达式去除转义字符和颜色代码
+    pattern = re.compile(r'\x1b\[[0-9;]*[mK]')
+    result = pattern.sub('', input_str)
+    return result
+
+
 class AskReview(Action):
    async def run(self, context: List[Message], plan: Plan = None):
        logger.info("Current overall plan:")
@ -120,7 +128,7 @@ class MLEngineer(Role):
            if not self.use_tools or self.plan.current_task.task_type == "":
                # code = "print('abc')"
                code = await WriteCodeByGenerate().run(
-                    context=context, plan=self.plan, task_guide=task_guide
+                    context=context, plan=self.plan, task_guide=task_guide, temperature=0.0
                )
                cause_by = WriteCodeByGenerate
            else:
@ -138,7 +146,7 @@ class MLEngineer(Role):
            print(truncate(result))
            # print(result)
            self.working_memory.add(
-                Message(content=result, role="user", cause_by=ExecutePyCode)
+                Message(content=truncate(remove_escape_and_color_codes(result)), role="user", cause_by=ExecutePyCode)
            )

            if "!pip" in code:
--- a/requirements.txt
+++ b/requirements.txt
@ -50,4 +50,5 @@ nbclient==0.9.0
 nbformat==5.9.2
 ipython==8.17.2
 ipykernel==6.27.0
-scikit_learn==1.3.2
+scikit_learn==1.3.2
+typing-extensions==4.8.0
--- a/tests/metagpt/actions/test_execute_code.py
+++ b/tests/metagpt/actions/test_execute_code.py
@ -1,6 +1,6 @@
 import pytest

-from metagpt.actions import ExecutePyCode
+from metagpt.actions.execute_code import ExecutePyCode
 from metagpt.schema import Message


@ -8,12 +8,12 @@ from metagpt.schema import Message
 async def test_code_running():
    pi = ExecutePyCode()
    output = await pi.run("print('hello world!')")
-    assert output.state == "done"
+    assert output[1] is True
    output = await pi.run({"code": "print('hello world!')", "language": "python"})
-    assert output.state == "done"
+    assert output[1] is True
    code_msg = Message("print('hello world!')")
    output = await pi.run(code_msg)
-    assert output.state == "done"
+    assert output[1] is True


@pytest.mark.asyncio
@ -22,14 +22,14 @@ async def test_split_code_running():
    output = await pi.run("x=1\ny=2")
    output = await pi.run("z=x+y")
    output = await pi.run("assert z==3")
-    assert output.state == "done"
+    assert output[1] is True


@pytest.mark.asyncio
 async def test_execute_error():
    pi = ExecutePyCode()
    output = await pi.run("z=1/0")
-    assert output.state == "error"
+    assert output[1] is False


@pytest.mark.asyncio
@ -54,4 +54,30 @@ async def test_plotting_code():
    plt.show()
    """
    output = await pi.run(code)
-    assert output.state == "done"
+    assert output[1] is True
+
+
+@pytest.mark.asyncio
+async def test_plotting_bug():
+    code = """
+    import matplotlib.pyplot as plt
+    import seaborn as sns
+    import pandas as pd
+    from sklearn.datasets import load_iris
+    # Load the Iris dataset
+    iris_data = load_iris()
+    # Convert the loaded Iris dataset into a DataFrame for easier manipulation
+    iris_df = pd.DataFrame(iris_data['data'], columns=iris_data['feature_names'])
+    # Add a column for the target
+    iris_df['species'] = pd.Categorical.from_codes(iris_data['target'], iris_data['target_names'])
+    # Set the style of seaborn
+    sns.set(style='whitegrid')
+    # Create a pairplot of the iris dataset
+    plt.figure(figsize=(10, 8))
+    pairplot = sns.pairplot(iris_df, hue='species')
+    # Show the plot
+    plt.show()
+    """
+    pi = ExecutePyCode()
+    output = await pi.run(code)
+    assert output[1] is True
--- a/tests/metagpt/actions/test_write_analysis_code.py
+++ b/tests/metagpt/actions/test_write_analysis_code.py
@ -159,7 +159,7 @@ async def test_write_code_reuse_code_long():
        Message(content=structural_context, role="user"),
    ]
    trials_num = 5
-    trials = [WriteCodeByGenerate().run(context=context) for _ in range(trials_num)]
+    trials = [WriteCodeByGenerate().run(context=context, temperature=0.0) for _ in range(trials_num)]
    trial_results = await asyncio.gather(*trials)
    print(*trial_results, sep="\n\n***\n\n")
    success = ["load_iris" not in result and "iris_data" in result \
@ -167,3 +167,75 @@ async def test_write_code_reuse_code_long():
    success_rate = sum(success) / trials_num
    logger.info(f"success rate: {success_rate :.2f}")
    assert success_rate >= 0.8
+
+
+@pytest.mark.asyncio
+async def test_write_code_reuse_code_long_for_wine():
+    """test code reuse for long context"""
+
+    structural_context = """
+    ## User Requirement
+    Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy
+    ## Current Plan
+    [
+        {
+            "task_id": "1",
+            "dependent_task_ids": [],
+            "instruction": "Load the sklearn Wine recognition dataset and perform exploratory data analysis."
+            "task_type": "",
+            "code": "from sklearn.datasets import load_wine\n# Load the Wine recognition dataset\nwine_data = load_wine()\n# Perform exploratory data analysis\nwine_data.keys()",
+            "result": "Truncated to show only the last 1000 characters\ndict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])",
+            "is_finished": true
+        },
+        {
+            "task_id": "2",
+            "dependent_task_ids": ["1"],
+            "instruction": "Create a plot to visualize some aspect of the wine dataset."
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        },
+        {
+            "task_id": "3",
+            "dependent_task_ids": ["1"],
+            "instruction": "Split the dataset into training and validation sets with a 20% validation size.",
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        },
+        {
+            "task_id": "4",
+            "dependent_task_ids": ["3"],
+            "instruction": "Train a model on the training set to predict wine class.",
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        },
+        {
+            "task_id": "5",
+            "dependent_task_ids": ["4"],
+            "instruction": "Evaluate the model on the validation set and report the accuracy.",
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        }
+    ]
+    ## Current Task
+    {"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Create a plot to visualize some aspect of the Wine dataset.", "task_type": "", "code": "", "result": "", "is_finished": false}
+    """
+    context = [
+        Message(content=structural_context, role="user"),
+    ]
+    trials_num = 5
+    trials = [WriteCodeByGenerate().run(context=context, temperature=0.0) for _ in range(trials_num)]
+    trial_results = await asyncio.gather(*trials)
+    print(*trial_results, sep="\n\n***\n\n")
+    success = ["load_wine" not in result and "wine_data" in result\
+        for result in trial_results]  # should reuse iris_data from previous tasks
+    success_rate = sum(success) / trials_num
+    logger.info(f"success rate: {success_rate :.2f}")
+    assert success_rate >= 0.8