mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-20 15:38:09 +02:00
Merge branch 'dev' into kaggle_team
This commit is contained in:
commit
f7989b0ce0
8 changed files with 286 additions and 41 deletions
|
|
@ -1,6 +1,6 @@
|
|||
import pytest
|
||||
|
||||
from metagpt.actions import ExecutePyCode
|
||||
from metagpt.actions.execute_code import ExecutePyCode
|
||||
from metagpt.schema import Message
|
||||
|
||||
|
||||
|
|
@ -8,12 +8,12 @@ from metagpt.schema import Message
|
|||
async def test_code_running():
|
||||
pi = ExecutePyCode()
|
||||
output = await pi.run("print('hello world!')")
|
||||
assert output.state == "done"
|
||||
assert output[1] is True
|
||||
output = await pi.run({"code": "print('hello world!')", "language": "python"})
|
||||
assert output.state == "done"
|
||||
assert output[1] is True
|
||||
code_msg = Message("print('hello world!')")
|
||||
output = await pi.run(code_msg)
|
||||
assert output.state == "done"
|
||||
assert output[1] is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
@ -22,14 +22,14 @@ async def test_split_code_running():
|
|||
output = await pi.run("x=1\ny=2")
|
||||
output = await pi.run("z=x+y")
|
||||
output = await pi.run("assert z==3")
|
||||
assert output.state == "done"
|
||||
assert output[1] is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_error():
|
||||
pi = ExecutePyCode()
|
||||
output = await pi.run("z=1/0")
|
||||
assert output.state == "error"
|
||||
assert output[1] is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
@ -54,4 +54,30 @@ async def test_plotting_code():
|
|||
plt.show()
|
||||
"""
|
||||
output = await pi.run(code)
|
||||
assert output.state == "done"
|
||||
assert output[1] is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_plotting_bug():
|
||||
code = """
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import pandas as pd
|
||||
from sklearn.datasets import load_iris
|
||||
# Load the Iris dataset
|
||||
iris_data = load_iris()
|
||||
# Convert the loaded Iris dataset into a DataFrame for easier manipulation
|
||||
iris_df = pd.DataFrame(iris_data['data'], columns=iris_data['feature_names'])
|
||||
# Add a column for the target
|
||||
iris_df['species'] = pd.Categorical.from_codes(iris_data['target'], iris_data['target_names'])
|
||||
# Set the style of seaborn
|
||||
sns.set(style='whitegrid')
|
||||
# Create a pairplot of the iris dataset
|
||||
plt.figure(figsize=(10, 8))
|
||||
pairplot = sns.pairplot(iris_df, hue='species')
|
||||
# Show the plot
|
||||
plt.show()
|
||||
"""
|
||||
pi = ExecutePyCode()
|
||||
output = await pi.run(code)
|
||||
assert output[1] is True
|
||||
|
|
|
|||
|
|
@ -1,11 +1,12 @@
|
|||
import asyncio
|
||||
import pytest
|
||||
|
||||
from metagpt.actions.write_analysis_code import WriteCodeByGenerate
|
||||
from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
|
||||
from metagpt.actions.execute_code import ExecutePyCode
|
||||
from metagpt.schema import Message
|
||||
from metagpt.schema import Message, Plan, Task
|
||||
from metagpt.logs import logger
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_code_by_list_plan():
|
||||
write_code = WriteCodeByGenerate()
|
||||
|
|
@ -22,6 +23,77 @@ async def test_write_code_by_list_plan():
|
|||
print(f"\n[Output]: 任务{task}的执行结果是: \n{output}\n")
|
||||
messages.append(output[0])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_recommendation():
|
||||
task = "对已经读取的数据集进行数据清洗"
|
||||
code_steps = """
|
||||
step 1: 对数据集进行去重
|
||||
step 2: 对数据集进行缺失值处理
|
||||
"""
|
||||
available_tools = [
|
||||
{
|
||||
"name": "fill_missing_value",
|
||||
"description": "Completing missing values with simple strategies",
|
||||
},
|
||||
{
|
||||
"name": "split_bins",
|
||||
"description": "Bin continuous data into intervals and return the bin identifier encoded as an integer value",
|
||||
},
|
||||
]
|
||||
write_code = WriteCodeWithTools()
|
||||
tools = await write_code._tool_recommendation(task, code_steps, available_tools)
|
||||
|
||||
assert len(tools) == 2
|
||||
assert tools[0] == []
|
||||
assert tools[1] == ["fill_missing_value"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_code_with_tools():
|
||||
write_code = WriteCodeWithTools()
|
||||
messages = []
|
||||
task_map = {
|
||||
"1": Task(
|
||||
task_id="1",
|
||||
instruction="随机生成一个pandas DataFrame数据集",
|
||||
task_type="unknown",
|
||||
dependent_task_ids=[],
|
||||
code="""
|
||||
import pandas as pd
|
||||
df = pd.DataFrame({
|
||||
'a': [1, 2, 3, 4, 5],
|
||||
'b': [1.1, 2.2, 3.3, 4.4, np.nan],
|
||||
'c': ['aa', 'bb', 'cc', 'dd', 'ee'],
|
||||
'd': [1, 2, 3, 4, 5]
|
||||
})
|
||||
""",
|
||||
is_finished=True,
|
||||
),
|
||||
"2": Task(
|
||||
task_id="2",
|
||||
instruction="对数据集进行数据清洗",
|
||||
task_type="data_preprocess",
|
||||
dependent_task_ids=["1"],
|
||||
),
|
||||
}
|
||||
plan = Plan(
|
||||
goal="构造数据集并进行数据清洗",
|
||||
tasks=list(task_map.values()),
|
||||
task_map=task_map,
|
||||
current_task_id="2",
|
||||
)
|
||||
task_guide = """
|
||||
step 1: 对数据集进行去重
|
||||
step 2: 对数据集进行缺失值处理
|
||||
"""
|
||||
data_desc = "None"
|
||||
|
||||
code = await write_code.run(messages, plan, task_guide, data_desc)
|
||||
assert len(code) > 0
|
||||
print(code)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_code_to_correct_error():
|
||||
|
||||
|
|
@ -159,7 +231,7 @@ async def test_write_code_reuse_code_long():
|
|||
Message(content=structural_context, role="user"),
|
||||
]
|
||||
trials_num = 5
|
||||
trials = [WriteCodeByGenerate().run(context=context) for _ in range(trials_num)]
|
||||
trials = [WriteCodeByGenerate().run(context=context, temperature=0.0) for _ in range(trials_num)]
|
||||
trial_results = await asyncio.gather(*trials)
|
||||
print(*trial_results, sep="\n\n***\n\n")
|
||||
success = ["load_iris" not in result and "iris_data" in result \
|
||||
|
|
@ -167,3 +239,75 @@ async def test_write_code_reuse_code_long():
|
|||
success_rate = sum(success) / trials_num
|
||||
logger.info(f"success rate: {success_rate :.2f}")
|
||||
assert success_rate >= 0.8
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_code_reuse_code_long_for_wine():
|
||||
"""test code reuse for long context"""
|
||||
|
||||
structural_context = """
|
||||
## User Requirement
|
||||
Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy
|
||||
## Current Plan
|
||||
[
|
||||
{
|
||||
"task_id": "1",
|
||||
"dependent_task_ids": [],
|
||||
"instruction": "Load the sklearn Wine recognition dataset and perform exploratory data analysis."
|
||||
"task_type": "",
|
||||
"code": "from sklearn.datasets import load_wine\n# Load the Wine recognition dataset\nwine_data = load_wine()\n# Perform exploratory data analysis\nwine_data.keys()",
|
||||
"result": "Truncated to show only the last 1000 characters\ndict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])",
|
||||
"is_finished": true
|
||||
},
|
||||
{
|
||||
"task_id": "2",
|
||||
"dependent_task_ids": ["1"],
|
||||
"instruction": "Create a plot to visualize some aspect of the wine dataset."
|
||||
"task_type": "",
|
||||
"code": "",
|
||||
"result": "",
|
||||
"is_finished": false
|
||||
},
|
||||
{
|
||||
"task_id": "3",
|
||||
"dependent_task_ids": ["1"],
|
||||
"instruction": "Split the dataset into training and validation sets with a 20% validation size.",
|
||||
"task_type": "",
|
||||
"code": "",
|
||||
"result": "",
|
||||
"is_finished": false
|
||||
},
|
||||
{
|
||||
"task_id": "4",
|
||||
"dependent_task_ids": ["3"],
|
||||
"instruction": "Train a model on the training set to predict wine class.",
|
||||
"task_type": "",
|
||||
"code": "",
|
||||
"result": "",
|
||||
"is_finished": false
|
||||
},
|
||||
{
|
||||
"task_id": "5",
|
||||
"dependent_task_ids": ["4"],
|
||||
"instruction": "Evaluate the model on the validation set and report the accuracy.",
|
||||
"task_type": "",
|
||||
"code": "",
|
||||
"result": "",
|
||||
"is_finished": false
|
||||
}
|
||||
]
|
||||
## Current Task
|
||||
{"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Create a plot to visualize some aspect of the Wine dataset.", "task_type": "", "code": "", "result": "", "is_finished": false}
|
||||
"""
|
||||
context = [
|
||||
Message(content=structural_context, role="user"),
|
||||
]
|
||||
trials_num = 5
|
||||
trials = [WriteCodeByGenerate().run(context=context, temperature=0.0) for _ in range(trials_num)]
|
||||
trial_results = await asyncio.gather(*trials)
|
||||
print(*trial_results, sep="\n\n***\n\n")
|
||||
success = ["load_wine" not in result and "wine_data" in result\
|
||||
for result in trial_results] # should reuse iris_data from previous tasks
|
||||
success_rate = sum(success) / trials_num
|
||||
logger.info(f"success rate: {success_rate :.2f}")
|
||||
assert success_rate >= 0.8
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue