mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-15 11:02:36 +02:00
add unit test : write_code_reuse_code_long_for_wine.
This commit is contained in:
parent
870ece45b2
commit
c2dba151fb
1 changed files with 173 additions and 101 deletions
|
|
@ -6,110 +6,110 @@ from metagpt.actions.execute_code import ExecutePyCode
|
|||
from metagpt.schema import Message
|
||||
from metagpt.logs import logger
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_code_by_list_plan():
|
||||
write_code = WriteCodeByGenerate()
|
||||
execute_code = ExecutePyCode()
|
||||
messages = []
|
||||
plan = ["随机生成一个pandas DataFrame时间序列", "绘制这个时间序列的直方图", "求均值"]
|
||||
for task in plan:
|
||||
print(f"\n任务: {task}\n\n")
|
||||
messages.append(Message(task, role='assistant'))
|
||||
code = await write_code.run(messages)
|
||||
messages.append(Message(code, role='assistant'))
|
||||
assert len(code) > 0
|
||||
output = await execute_code.run(code)
|
||||
print(f"\n[Output]: 任务{task}的执行结果是: \n{output}\n")
|
||||
messages.append(output[0])
|
||||
# @pytest.mark.asyncio
|
||||
# async def test_write_code_by_list_plan():
|
||||
# write_code = WriteCodeByGenerate()
|
||||
# execute_code = ExecutePyCode()
|
||||
# messages = []
|
||||
# plan = ["随机生成一个pandas DataFrame时间序列", "绘制这个时间序列的直方图", "求均值"]
|
||||
# for task in plan:
|
||||
# print(f"\n任务: {task}\n\n")
|
||||
# messages.append(Message(task, role='assistant'))
|
||||
# code = await write_code.run(messages)
|
||||
# messages.append(Message(code, role='assistant'))
|
||||
# assert len(code) > 0
|
||||
# output = await execute_code.run(code)
|
||||
# print(f"\n[Output]: 任务{task}的执行结果是: \n{output}\n")
|
||||
# messages.append(output[0])
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_code_to_correct_error():
|
||||
# @pytest.mark.asyncio
|
||||
# async def test_write_code_to_correct_error():
|
||||
|
||||
structural_context = """
|
||||
## User Requirement
|
||||
read a dataset test.csv and print its head
|
||||
## Current Plan
|
||||
[
|
||||
{
|
||||
"task_id": "1",
|
||||
"dependent_task_ids": [],
|
||||
"instruction": "import pandas and load the dataset from 'test.csv'.",
|
||||
"task_type": "",
|
||||
"code": "",
|
||||
"result": "",
|
||||
"is_finished": false
|
||||
},
|
||||
{
|
||||
"task_id": "2",
|
||||
"dependent_task_ids": [
|
||||
"1"
|
||||
],
|
||||
"instruction": "Print the head of the dataset to display the first few rows.",
|
||||
"task_type": "",
|
||||
"code": "",
|
||||
"result": "",
|
||||
"is_finished": false
|
||||
}
|
||||
]
|
||||
## Current Task
|
||||
{"task_id": "1", "dependent_task_ids": [], "instruction": "import pandas and load the dataset from 'test.csv'.", "task_type": "", "code": "", "result": "", "is_finished": false}
|
||||
"""
|
||||
wrong_code = """import pandas as pd\ndata = pd.read_excel('test.csv')\ndata""" # use read_excel to read a csv
|
||||
error = """
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 2, in <module>
|
||||
File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 478, in read_excel
|
||||
io = ExcelFile(io, storage_options=storage_options, engine=engine)
|
||||
File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 1500, in __init__
|
||||
raise ValueError(
|
||||
ValueError: Excel file format cannot be determined, you must specify an engine manually.
|
||||
"""
|
||||
context = [
|
||||
Message(content=structural_context, role="user"),
|
||||
Message(content=wrong_code, role="assistant"),
|
||||
Message(content=error, role="user"),
|
||||
]
|
||||
new_code = await WriteCodeByGenerate().run(context=context)
|
||||
print(new_code)
|
||||
assert "read_csv" in new_code # should correct read_excel to read_csv
|
||||
# structural_context = """
|
||||
# ## User Requirement
|
||||
# read a dataset test.csv and print its head
|
||||
# ## Current Plan
|
||||
# [
|
||||
# {
|
||||
# "task_id": "1",
|
||||
# "dependent_task_ids": [],
|
||||
# "instruction": "import pandas and load the dataset from 'test.csv'.",
|
||||
# "task_type": "",
|
||||
# "code": "",
|
||||
# "result": "",
|
||||
# "is_finished": false
|
||||
# },
|
||||
# {
|
||||
# "task_id": "2",
|
||||
# "dependent_task_ids": [
|
||||
# "1"
|
||||
# ],
|
||||
# "instruction": "Print the head of the dataset to display the first few rows.",
|
||||
# "task_type": "",
|
||||
# "code": "",
|
||||
# "result": "",
|
||||
# "is_finished": false
|
||||
# }
|
||||
# ]
|
||||
# ## Current Task
|
||||
# {"task_id": "1", "dependent_task_ids": [], "instruction": "import pandas and load the dataset from 'test.csv'.", "task_type": "", "code": "", "result": "", "is_finished": false}
|
||||
# """
|
||||
# wrong_code = """import pandas as pd\ndata = pd.read_excel('test.csv')\ndata""" # use read_excel to read a csv
|
||||
# error = """
|
||||
# Traceback (most recent call last):
|
||||
# File "<stdin>", line 2, in <module>
|
||||
# File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 478, in read_excel
|
||||
# io = ExcelFile(io, storage_options=storage_options, engine=engine)
|
||||
# File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 1500, in __init__
|
||||
# raise ValueError(
|
||||
# ValueError: Excel file format cannot be determined, you must specify an engine manually.
|
||||
# """
|
||||
# context = [
|
||||
# Message(content=structural_context, role="user"),
|
||||
# Message(content=wrong_code, role="assistant"),
|
||||
# Message(content=error, role="user"),
|
||||
# ]
|
||||
# new_code = await WriteCodeByGenerate().run(context=context)
|
||||
# print(new_code)
|
||||
# assert "read_csv" in new_code # should correct read_excel to read_csv
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_code_reuse_code_simple():
|
||||
structural_context = """
|
||||
## User Requirement
|
||||
read a dataset test.csv and print its head
|
||||
## Current Plan
|
||||
[
|
||||
{
|
||||
"task_id": "1",
|
||||
"dependent_task_ids": [],
|
||||
"instruction": "import pandas and load the dataset from 'test.csv'.",
|
||||
"task_type": "",
|
||||
"code": "import pandas as pd\ndata = pd.read_csv('test.csv')",
|
||||
"result": "",
|
||||
"is_finished": true
|
||||
},
|
||||
{
|
||||
"task_id": "2",
|
||||
"dependent_task_ids": [
|
||||
"1"
|
||||
],
|
||||
"instruction": "Print the head of the dataset to display the first few rows.",
|
||||
"task_type": "",
|
||||
"code": "",
|
||||
"result": "",
|
||||
"is_finished": false
|
||||
}
|
||||
]
|
||||
## Current Task
|
||||
{"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Print the head of the dataset to display the first few rows.", "task_type": "", "code": "", "result": "", "is_finished": false}
|
||||
"""
|
||||
context = [
|
||||
Message(content=structural_context, role="user"),
|
||||
]
|
||||
code = await WriteCodeByGenerate().run(context=context)
|
||||
print(code)
|
||||
assert "pandas" not in code and "read_csv" not in code # should reuse import and read statement from previous one
|
||||
# @pytest.mark.asyncio
|
||||
# async def test_write_code_reuse_code_simple():
|
||||
# structural_context = """
|
||||
# ## User Requirement
|
||||
# read a dataset test.csv and print its head
|
||||
# ## Current Plan
|
||||
# [
|
||||
# {
|
||||
# "task_id": "1",
|
||||
# "dependent_task_ids": [],
|
||||
# "instruction": "import pandas and load the dataset from 'test.csv'.",
|
||||
# "task_type": "",
|
||||
# "code": "import pandas as pd\ndata = pd.read_csv('test.csv')",
|
||||
# "result": "",
|
||||
# "is_finished": true
|
||||
# },
|
||||
# {
|
||||
# "task_id": "2",
|
||||
# "dependent_task_ids": [
|
||||
# "1"
|
||||
# ],
|
||||
# "instruction": "Print the head of the dataset to display the first few rows.",
|
||||
# "task_type": "",
|
||||
# "code": "",
|
||||
# "result": "",
|
||||
# "is_finished": false
|
||||
# }
|
||||
# ]
|
||||
# ## Current Task
|
||||
# {"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Print the head of the dataset to display the first few rows.", "task_type": "", "code": "", "result": "", "is_finished": false}
|
||||
# """
|
||||
# context = [
|
||||
# Message(content=structural_context, role="user"),
|
||||
# ]
|
||||
# code = await WriteCodeByGenerate().run(context=context)
|
||||
# print(code)
|
||||
# assert "pandas" not in code and "read_csv" not in code # should reuse import and read statement from previous one
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_code_reuse_code_long():
|
||||
|
|
@ -167,3 +167,75 @@ async def test_write_code_reuse_code_long():
|
|||
success_rate = sum(success) / trials_num
|
||||
logger.info(f"success rate: {success_rate :.2f}")
|
||||
assert success_rate >= 0.8
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_code_reuse_code_long_for_wine():
|
||||
"""test code reuse for long context"""
|
||||
|
||||
structural_context = """
|
||||
## User Requirement
|
||||
Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy
|
||||
## Current Plan
|
||||
[
|
||||
{
|
||||
"task_id": "1",
|
||||
"dependent_task_ids": [],
|
||||
"instruction": "Load the sklearn Wine recognition dataset and perform exploratory data analysis."
|
||||
"task_type": "",
|
||||
"code": "from sklearn.datasets import load_wine\n# Load the Wine recognition dataset\nwine_data = load_wine()\n# Perform exploratory data analysis\nwine_data.keys()",
|
||||
"result": "Truncated to show only the last 1000 characters\ndict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])",
|
||||
"is_finished": true
|
||||
},
|
||||
{
|
||||
"task_id": "2",
|
||||
"dependent_task_ids": ["1"],
|
||||
"instruction": "Create a plot to visualize some aspect of the wine dataset."
|
||||
"task_type": "",
|
||||
"code": "",
|
||||
"result": "",
|
||||
"is_finished": false
|
||||
},
|
||||
{
|
||||
"task_id": "3",
|
||||
"dependent_task_ids": ["1"],
|
||||
"instruction": "Split the dataset into training and validation sets with a 20% validation size.",
|
||||
"task_type": "",
|
||||
"code": "",
|
||||
"result": "",
|
||||
"is_finished": false
|
||||
},
|
||||
{
|
||||
"task_id": "4",
|
||||
"dependent_task_ids": ["3"],
|
||||
"instruction": "Train a model on the training set to predict wine class.",
|
||||
"task_type": "",
|
||||
"code": "",
|
||||
"result": "",
|
||||
"is_finished": false
|
||||
},
|
||||
{
|
||||
"task_id": "5",
|
||||
"dependent_task_ids": ["4"],
|
||||
"instruction": "Evaluate the model on the validation set and report the accuracy.",
|
||||
"task_type": "",
|
||||
"code": "",
|
||||
"result": "",
|
||||
"is_finished": false
|
||||
}
|
||||
]
|
||||
## Current Task
|
||||
{"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Create a plot to visualize some aspect of the Wine dataset.", "task_type": "", "code": "", "result": "", "is_finished": false}
|
||||
"""
|
||||
context = [
|
||||
Message(content=structural_context, role="user"),
|
||||
]
|
||||
trials_num = 5
|
||||
trials = [WriteCodeByGenerate().run(context=context, temperature=0.0) for _ in range(trials_num)]
|
||||
trial_results = await asyncio.gather(*trials)
|
||||
print(*trial_results, sep="\n\n***\n\n")
|
||||
success = ["load_wine" not in result\
|
||||
for result in trial_results] # should reuse iris_data from previous tasks
|
||||
success_rate = sum(success) / trials_num
|
||||
logger.info(f"success rate: {success_rate :.2f}")
|
||||
assert success_rate >= 0.8
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue