Merge branch 'dev' into kaggle_team

2026-06-20 15:38:09 +02:00 · 2023-12-04 14:43:00 +08:00 · 2023-12-04 14:43:00 +08:00 · f7989b0ce0
commit f7989b0ce0
parent 8d7657f347 20a918bf39
8 changed files with 286 additions and 41 deletions
--- a/tests/metagpt/actions/test_execute_code.py
+++ b/tests/metagpt/actions/test_execute_code.py
@ -1,6 +1,6 @@
 import pytest

-from metagpt.actions import ExecutePyCode
+from metagpt.actions.execute_code import ExecutePyCode
 from metagpt.schema import Message


@ -8,12 +8,12 @@ from metagpt.schema import Message
 async def test_code_running():
    pi = ExecutePyCode()
    output = await pi.run("print('hello world!')")
-    assert output.state == "done"
+    assert output[1] is True
    output = await pi.run({"code": "print('hello world!')", "language": "python"})
-    assert output.state == "done"
+    assert output[1] is True
    code_msg = Message("print('hello world!')")
    output = await pi.run(code_msg)
-    assert output.state == "done"
+    assert output[1] is True


@pytest.mark.asyncio
@ -22,14 +22,14 @@ async def test_split_code_running():
    output = await pi.run("x=1\ny=2")
    output = await pi.run("z=x+y")
    output = await pi.run("assert z==3")
-    assert output.state == "done"
+    assert output[1] is True


@pytest.mark.asyncio
 async def test_execute_error():
    pi = ExecutePyCode()
    output = await pi.run("z=1/0")
-    assert output.state == "error"
+    assert output[1] is False


@pytest.mark.asyncio
@ -54,4 +54,30 @@ async def test_plotting_code():
    plt.show()
    """
    output = await pi.run(code)
-    assert output.state == "done"
+    assert output[1] is True
+
+
+@pytest.mark.asyncio
+async def test_plotting_bug():
+    code = """
+    import matplotlib.pyplot as plt
+    import seaborn as sns
+    import pandas as pd
+    from sklearn.datasets import load_iris
+    # Load the Iris dataset
+    iris_data = load_iris()
+    # Convert the loaded Iris dataset into a DataFrame for easier manipulation
+    iris_df = pd.DataFrame(iris_data['data'], columns=iris_data['feature_names'])
+    # Add a column for the target
+    iris_df['species'] = pd.Categorical.from_codes(iris_data['target'], iris_data['target_names'])
+    # Set the style of seaborn
+    sns.set(style='whitegrid')
+    # Create a pairplot of the iris dataset
+    plt.figure(figsize=(10, 8))
+    pairplot = sns.pairplot(iris_df, hue='species')
+    # Show the plot
+    plt.show()
+    """
+    pi = ExecutePyCode()
+    output = await pi.run(code)
+    assert output[1] is True
--- a/tests/metagpt/actions/test_write_analysis_code.py
+++ b/tests/metagpt/actions/test_write_analysis_code.py
@ -1,11 +1,12 @@
 import asyncio
 import pytest

-from metagpt.actions.write_analysis_code import WriteCodeByGenerate
+from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
 from metagpt.actions.execute_code import ExecutePyCode
-from metagpt.schema import Message
+from metagpt.schema import Message, Plan, Task
 from metagpt.logs import logger

+
@pytest.mark.asyncio
 async def test_write_code_by_list_plan():
    write_code = WriteCodeByGenerate()
@ -22,6 +23,77 @@ async def test_write_code_by_list_plan():
        print(f"\n[Output]: 任务{task}的执行结果是: \n{output}\n")
        messages.append(output[0])

+
+@pytest.mark.asyncio
+async def test_tool_recommendation():
+    task = "对已经读取的数据集进行数据清洗"
+    code_steps = """
+    step 1: 对数据集进行去重
+    step 2: 对数据集进行缺失值处理
+    """
+    available_tools = [
+        {
+            "name": "fill_missing_value",
+            "description": "Completing missing values with simple strategies",
+        },
+        {
+            "name": "split_bins",
+            "description": "Bin continuous data into intervals and return the bin identifier encoded as an integer value",
+        },
+    ]
+    write_code = WriteCodeWithTools()
+    tools = await write_code._tool_recommendation(task, code_steps, available_tools)
+
+    assert len(tools) == 2
+    assert tools[0] == []
+    assert tools[1] == ["fill_missing_value"]
+
+
+@pytest.mark.asyncio
+async def test_write_code_with_tools():
+    write_code = WriteCodeWithTools()
+    messages = []
+    task_map = {
+        "1": Task(
+                task_id="1",
+                instruction="随机生成一个pandas DataFrame数据集",
+                task_type="unknown",
+                dependent_task_ids=[],
+                code="""
+                import pandas as pd
+                df = pd.DataFrame({
+                    'a': [1, 2, 3, 4, 5],
+                    'b': [1.1, 2.2, 3.3, 4.4, np.nan],
+                    'c': ['aa', 'bb', 'cc', 'dd', 'ee'],
+                    'd': [1, 2, 3, 4, 5]
+                })
+                """,
+                is_finished=True,
+            ),
+        "2": Task(
+                task_id="2",
+                instruction="对数据集进行数据清洗",
+                task_type="data_preprocess",
+                dependent_task_ids=["1"],
+            ),
+    }
+    plan = Plan(
+        goal="构造数据集并进行数据清洗",
+        tasks=list(task_map.values()),
+        task_map=task_map,
+        current_task_id="2",
+    )
+    task_guide = """
+    step 1: 对数据集进行去重
+    step 2: 对数据集进行缺失值处理
+    """
+    data_desc = "None"
+
+    code = await write_code.run(messages, plan, task_guide, data_desc)
+    assert len(code) > 0
+    print(code)
+
+
@pytest.mark.asyncio
 async def test_write_code_to_correct_error():

@ -159,7 +231,7 @@ async def test_write_code_reuse_code_long():
        Message(content=structural_context, role="user"),
    ]
    trials_num = 5
-    trials = [WriteCodeByGenerate().run(context=context) for _ in range(trials_num)]
+    trials = [WriteCodeByGenerate().run(context=context, temperature=0.0) for _ in range(trials_num)]
    trial_results = await asyncio.gather(*trials)
    print(*trial_results, sep="\n\n***\n\n")
    success = ["load_iris" not in result and "iris_data" in result \
@ -167,3 +239,75 @@ async def test_write_code_reuse_code_long():
    success_rate = sum(success) / trials_num
    logger.info(f"success rate: {success_rate :.2f}")
    assert success_rate >= 0.8
+
+
+@pytest.mark.asyncio
+async def test_write_code_reuse_code_long_for_wine():
+    """test code reuse for long context"""
+
+    structural_context = """
+    ## User Requirement
+    Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy
+    ## Current Plan
+    [
+        {
+            "task_id": "1",
+            "dependent_task_ids": [],
+            "instruction": "Load the sklearn Wine recognition dataset and perform exploratory data analysis."
+            "task_type": "",
+            "code": "from sklearn.datasets import load_wine\n# Load the Wine recognition dataset\nwine_data = load_wine()\n# Perform exploratory data analysis\nwine_data.keys()",
+            "result": "Truncated to show only the last 1000 characters\ndict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])",
+            "is_finished": true
+        },
+        {
+            "task_id": "2",
+            "dependent_task_ids": ["1"],
+            "instruction": "Create a plot to visualize some aspect of the wine dataset."
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        },
+        {
+            "task_id": "3",
+            "dependent_task_ids": ["1"],
+            "instruction": "Split the dataset into training and validation sets with a 20% validation size.",
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        },
+        {
+            "task_id": "4",
+            "dependent_task_ids": ["3"],
+            "instruction": "Train a model on the training set to predict wine class.",
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        },
+        {
+            "task_id": "5",
+            "dependent_task_ids": ["4"],
+            "instruction": "Evaluate the model on the validation set and report the accuracy.",
+            "task_type": "",
+            "code": "",
+            "result": "",
+            "is_finished": false
+        }
+    ]
+    ## Current Task
+    {"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Create a plot to visualize some aspect of the Wine dataset.", "task_type": "", "code": "", "result": "", "is_finished": false}
+    """
+    context = [
+        Message(content=structural_context, role="user"),
+    ]
+    trials_num = 5
+    trials = [WriteCodeByGenerate().run(context=context, temperature=0.0) for _ in range(trials_num)]
+    trial_results = await asyncio.gather(*trials)
+    print(*trial_results, sep="\n\n***\n\n")
+    success = ["load_wine" not in result and "wine_data" in result\
+        for result in trial_results]  # should reuse iris_data from previous tasks
+    success_rate = sum(success) / trials_num
+    logger.info(f"success rate: {success_rate :.2f}")
+    assert success_rate >= 0.8