Merge pull request #736 from garylin2099/code_intepreter

Integrate CodeIntepreter
This commit is contained in:
geekan 2024-02-05 20:46:37 +08:00 committed by GitHub
commit 23c27627ce
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
61 changed files with 5076 additions and 84 deletions

View file

@ -0,0 +1,12 @@
import pytest
from metagpt.actions.ci.ask_review import AskReview
@pytest.mark.asyncio
async def test_ask_review(mocker):
mock_review_input = "confirm"
mocker.patch("builtins.input", return_value=mock_review_input)
rsp, confirmed = await AskReview().run()
assert rsp == mock_review_input
assert confirmed

View file

@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
# @Date : 1/11/2024 8:51 PM
# @Author : stellahong (stellahong@fuzhi.ai)
# @Desc :
import pytest
from metagpt.actions.ci.debug_code import DebugCode
from metagpt.schema import Message
ErrorStr = """Tested passed:
Tests failed:
assert sort_array([1, 5, 2, 3, 4]) == [1, 2, 3, 4, 5] # output: [1, 2, 4, 3, 5]
"""
CODE = """
def sort_array(arr):
# Helper function to count the number of ones in the binary representation
def count_ones(n):
return bin(n).count('1')
# Sort the array using a custom key function
# The key function returns a tuple (number of ones, value) for each element
# This ensures that if two elements have the same number of ones, they are sorted by their value
sorted_arr = sorted(arr, key=lambda x: (count_ones(x), x))
return sorted_arr
```
"""
DebugContext = '''Solve the problem in Python:
def sort_array(arr):
"""
In this Kata, you have to sort an array of non-negative integers according to
number of ones in their binary representation in ascending order.
For similar number of ones, sort based on decimal value.
It must be implemented like this:
>>> sort_array([1, 5, 2, 3, 4]) == [1, 2, 3, 4, 5]
>>> sort_array([-2, -3, -4, -5, -6]) == [-6, -5, -4, -3, -2]
>>> sort_array([1, 0, 2, 3, 4]) [0, 1, 2, 3, 4]
"""
'''
@pytest.mark.asyncio
async def test_debug_code():
debug_context = Message(content=DebugContext)
new_code = await DebugCode().run(context=debug_context, code=CODE, runtime_result=ErrorStr)
assert "def sort_array(arr)" in new_code["code"]

View file

@ -0,0 +1,116 @@
import pytest
from metagpt.actions.ci.execute_nb_code import ExecuteNbCode, truncate
@pytest.mark.asyncio
async def test_code_running():
executor = ExecuteNbCode()
output, is_success = await executor.run("print('hello world!')")
assert is_success
@pytest.mark.asyncio
async def test_split_code_running():
executor = ExecuteNbCode()
_ = await executor.run("x=1\ny=2")
_ = await executor.run("z=x+y")
output, is_success = await executor.run("assert z==3")
assert is_success
@pytest.mark.asyncio
async def test_execute_error():
executor = ExecuteNbCode()
output, is_success = await executor.run("z=1/0")
assert not is_success
PLOT_CODE = """
import numpy as np
import matplotlib.pyplot as plt
# 生成随机数据
random_data = np.random.randn(1000) # 生成1000个符合标准正态分布的随机数
# 绘制直方图
plt.hist(random_data, bins=30, density=True, alpha=0.7, color='blue', edgecolor='black')
# 添加标题和标签
plt.title('Histogram of Random Data')
plt.xlabel('Value')
plt.ylabel('Frequency')
# 显示图形
plt.show()
plt.close()
"""
@pytest.mark.asyncio
async def test_plotting_code():
executor = ExecuteNbCode()
output, is_success = await executor.run(PLOT_CODE)
assert is_success
def test_truncate():
# 代码执行成功
output, is_success = truncate("hello world", 5, True)
assert "Truncated to show only first 5 characters\nhello" in output
assert is_success
# 代码执行失败
output, is_success = truncate("hello world", 5, False)
assert "Truncated to show only last 5 characters\nworld" in output
assert not is_success
# 异步
output, is_success = truncate("<coroutine object", 5, True)
assert not is_success
assert "await" in output
@pytest.mark.asyncio
async def test_run_with_timeout():
executor = ExecuteNbCode(timeout=1)
code = "import time; time.sleep(2)"
message, success = await executor.run(code)
assert not success
assert message.startswith("Cell execution timed out")
@pytest.mark.asyncio
async def test_run_code_text():
executor = ExecuteNbCode()
message, success = await executor.run(code='print("This is a code!")', language="python")
assert success
assert message == "This is a code!\n"
message, success = await executor.run(code="# This is a code!", language="markdown")
assert success
assert message == "# This is a code!"
mix_text = "# Title!\n ```python\n print('This is a code!')```"
message, success = await executor.run(code=mix_text, language="markdown")
assert success
assert message == mix_text
@pytest.mark.asyncio
async def test_terminate():
executor = ExecuteNbCode()
await executor.run(code='print("This is a code!")', language="python")
is_kernel_alive = await executor.nb_client.km.is_alive()
assert is_kernel_alive
await executor.terminate()
import time
time.sleep(2)
assert executor.nb_client.km is None
@pytest.mark.asyncio
async def test_reset():
executor = ExecuteNbCode()
await executor.run(code='print("This is a code!")', language="python")
is_kernel_alive = await executor.nb_client.km.is_alive()
assert is_kernel_alive
await executor.reset()
assert executor.nb_client.km is None

View file

@ -0,0 +1,46 @@
import pytest
from metagpt.actions.ci.ml_action import WriteCodeWithToolsML
from metagpt.schema import Plan, Task
@pytest.mark.asyncio
async def test_write_code_with_tools():
write_code_ml = WriteCodeWithToolsML()
task_map = {
"1": Task(
task_id="1",
instruction="随机生成一个pandas DataFrame数据集",
task_type="other",
dependent_task_ids=[],
code="""
import pandas as pd
df = pd.DataFrame({
'a': [1, 2, 3, 4, 5],
'b': [1.1, 2.2, 3.3, 4.4, np.nan],
'c': ['aa', 'bb', 'cc', 'dd', 'ee'],
'd': [1, 2, 3, 4, 5]
})
""",
is_finished=True,
),
"2": Task(
task_id="2",
instruction="对数据集进行数据清洗",
task_type="data_preprocess",
dependent_task_ids=["1"],
),
}
plan = Plan(
goal="构造数据集并进行数据清洗",
tasks=list(task_map.values()),
task_map=task_map,
current_task_id="2",
)
column_info = ""
_, code_with_ml = await write_code_ml.run([], plan, column_info)
code_with_ml = code_with_ml["code"]
assert len(code_with_ml) > 0
print(code_with_ml)

View file

@ -0,0 +1,324 @@
import asyncio
import pytest
from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
from metagpt.actions.ci.write_analysis_code import (
WriteCodeWithoutTools,
WriteCodeWithTools,
)
from metagpt.logs import logger
from metagpt.schema import Message, Plan, Task
from metagpt.strategy.planner import STRUCTURAL_CONTEXT
@pytest.mark.skip
@pytest.mark.asyncio
async def test_write_code_by_list_plan():
write_code = WriteCodeWithoutTools()
execute_code = ExecuteNbCode()
messages = []
plan = ["随机生成一个pandas DataFrame时间序列", "绘制这个时间序列的直方图", "回顾已完成的任务", "求均值", "总结"]
for task in plan:
print(f"\n任务: {task}\n\n")
messages.append(Message(task, role="assistant"))
code = await write_code.run(messages)
if task.startswith(("回顾", "总结")):
assert code["language"] == "markdown"
else:
assert code["language"] == "python"
messages.append(Message(code["code"], role="assistant"))
assert len(code) > 0
output, _ = await execute_code.run(**code)
print(f"\n[Output]: 任务{task}的执行结果是: \n{output}\n")
messages.append(output)
@pytest.mark.asyncio
async def test_tool_recommendation():
task = "clean and preprocess the data"
available_tools = {
"FillMissingValue": "Filling missing values",
"SplitBins": "Bin continuous data into intervals and return the bin identifier encoded as an integer value",
}
write_code = WriteCodeWithTools()
tools = await write_code._recommend_tool(task, available_tools)
assert len(tools) == 1
assert "FillMissingValue" in tools
@pytest.mark.asyncio
async def test_write_code_with_tools():
write_code = WriteCodeWithTools()
requirement = "构造数据集并进行数据清洗"
task_map = {
"1": Task(
task_id="1",
instruction="随机生成一个pandas DataFrame数据集",
task_type="other",
dependent_task_ids=[],
code="""
import pandas as pd
df = pd.DataFrame({
'a': [1, 2, 3, 4, 5],
'b': [1.1, 2.2, 3.3, 4.4, np.nan],
'c': ['aa', 'bb', 'cc', 'dd', 'ee'],
'd': [1, 2, 3, 4, 5]
})
""",
is_finished=True,
),
"2": Task(
task_id="2",
instruction="对数据集进行数据清洗",
task_type="data_preprocess",
dependent_task_ids=["1"],
),
}
plan = Plan(
goal="构造数据集并进行数据清洗",
tasks=list(task_map.values()),
task_map=task_map,
current_task_id="2",
)
context = STRUCTURAL_CONTEXT.format(
user_requirement=requirement,
context=plan.context,
tasks=list(task_map.values()),
current_task=plan.current_task.model_dump_json(),
)
context_msg = [Message(content=context, role="user")]
code = await write_code.run(context_msg, plan)
code = code["code"]
assert len(code) > 0
print(code)
@pytest.mark.asyncio
async def test_write_code_to_correct_error():
structural_context = """
## User Requirement
read a dataset test.csv and print its head
## Current Plan
[
{
"task_id": "1",
"dependent_task_ids": [],
"instruction": "import pandas and load the dataset from 'test.csv'.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
},
{
"task_id": "2",
"dependent_task_ids": [
"1"
],
"instruction": "Print the head of the dataset to display the first few rows.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
}
]
## Current Task
{"task_id": "1", "dependent_task_ids": [], "instruction": "import pandas and load the dataset from 'test.csv'.", "task_type": "", "code": "", "result": "", "is_finished": false}
"""
wrong_code = """import pandas as pd\ndata = pd.read_excel('test.csv')\ndata""" # use read_excel to read a csv
error = """
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 478, in read_excel
io = ExcelFile(io, storage_options=storage_options, engine=engine)
File "/Users/gary/miniconda3/envs/py39_scratch/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 1500, in __init__
raise ValueError(
ValueError: Excel file format cannot be determined, you must specify an engine manually.
"""
context = [
Message(content=structural_context, role="user"),
Message(content=wrong_code, role="assistant"),
Message(content=error, role="user"),
]
new_code = await WriteCodeWithoutTools().run(context=context)
new_code = new_code["code"]
print(new_code)
assert "read_csv" in new_code # should correct read_excel to read_csv
@pytest.mark.asyncio
async def test_write_code_reuse_code_simple():
structural_context = """
## User Requirement
read a dataset test.csv and print its head
## Current Plan
[
{
"task_id": "1",
"dependent_task_ids": [],
"instruction": "import pandas and load the dataset from 'test.csv'.",
"task_type": "",
"code": "import pandas as pd\ndata = pd.read_csv('test.csv')",
"result": "",
"is_finished": true
},
{
"task_id": "2",
"dependent_task_ids": [
"1"
],
"instruction": "Print the head of the dataset to display the first few rows.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
}
]
## Current Task
{"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Print the head of the dataset to display the first few rows.", "task_type": "", "code": "", "result": "", "is_finished": false}
"""
context = [
Message(content=structural_context, role="user"),
]
code = await WriteCodeWithoutTools().run(context=context)
code = code["code"]
print(code)
assert "pandas" not in code and "read_csv" not in code # should reuse import and read statement from previous one
@pytest.mark.skip
@pytest.mark.asyncio
async def test_write_code_reuse_code_long():
"""test code reuse for long context"""
structural_context = """
## User Requirement
Run data analysis on sklearn Iris dataset, include a plot
## Current Plan
[
{
"task_id": "1",
"dependent_task_ids": [],
"instruction": "Load the Iris dataset from sklearn.",
"task_type": "",
"code": "from sklearn.datasets import load_iris\niris_data = load_iris()\niris_data['data'][0:5], iris_data['target'][0:5]",
"result": "(array([[5.1, 3.5, 1.4, 0.2],\n [4.9, 3. , 1.4, 0.2],\n [4.7, 3.2, 1.3, 0.2],\n [4.6, 3.1, 1.5, 0.2],\n [5. , 3.6, 1.4, 0.2]]),\n array([0, 0, 0, 0, 0]))",
"is_finished": true
},
{
"task_id": "2",
"dependent_task_ids": [
"1"
],
"instruction": "Perform exploratory data analysis on the Iris dataset.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
},
{
"task_id": "3",
"dependent_task_ids": [
"2"
],
"instruction": "Create a plot visualizing the Iris dataset features.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
}
]
## Current Task
{"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Perform exploratory data analysis on the Iris dataset.", "task_type": "", "code": "", "result": "", "is_finished": false}
"""
context = [
Message(content=structural_context, role="user"),
]
trials_num = 5
trials = [WriteCodeWithoutTools().run(context=context, temperature=0.0) for _ in range(trials_num)]
trial_results = await asyncio.gather(*trials)
print(*trial_results, sep="\n\n***\n\n")
success = [
"load_iris" not in result["code"] and "iris_data" in result["code"] for result in trial_results
] # should reuse iris_data from previous tasks
success_rate = sum(success) / trials_num
logger.info(f"success rate: {success_rate :.2f}")
assert success_rate >= 0.8
@pytest.mark.skip
@pytest.mark.asyncio
async def test_write_code_reuse_code_long_for_wine():
"""test code reuse for long context"""
structural_context = """
## User Requirement
Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy
## Current Plan
[
{
"task_id": "1",
"dependent_task_ids": [],
"instruction": "Load the sklearn Wine recognition dataset and perform exploratory data analysis."
"task_type": "",
"code": "from sklearn.datasets import load_wine\n# Load the Wine recognition dataset\nwine_data = load_wine()\n# Perform exploratory data analysis\nwine_data.keys()",
"result": "Truncated to show only the last 1000 characters\ndict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])",
"is_finished": true
},
{
"task_id": "2",
"dependent_task_ids": ["1"],
"instruction": "Create a plot to visualize some aspect of the wine dataset."
"task_type": "",
"code": "",
"result": "",
"is_finished": false
},
{
"task_id": "3",
"dependent_task_ids": ["1"],
"instruction": "Split the dataset into training and validation sets with a 20% validation size.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
},
{
"task_id": "4",
"dependent_task_ids": ["3"],
"instruction": "Train a model on the training set to predict wine class.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
},
{
"task_id": "5",
"dependent_task_ids": ["4"],
"instruction": "Evaluate the model on the validation set and report the accuracy.",
"task_type": "",
"code": "",
"result": "",
"is_finished": false
}
]
## Current Task
{"task_id": "2", "dependent_task_ids": ["1"], "instruction": "Create a plot to visualize some aspect of the Wine dataset.", "task_type": "", "code": "", "result": "", "is_finished": false}
"""
context = [
Message(content=structural_context, role="user"),
]
trials_num = 5
trials = [WriteCodeWithoutTools().run(context=context, temperature=0.0) for _ in range(trials_num)]
trial_results = await asyncio.gather(*trials)
print(*trial_results, sep="\n\n***\n\n")
success = [
"load_wine" not in result["code"] and "wine_data" in result["code"] for result in trial_results
] # should reuse iris_data from previous tasks
success_rate = sum(success) / trials_num
logger.info(f"success rate: {success_rate :.2f}")
assert success_rate >= 0.8

View file

@ -0,0 +1,34 @@
import pytest
from metagpt.actions.ci.write_plan import (
Plan,
Task,
WritePlan,
precheck_update_plan_from_rsp,
)
from metagpt.schema import Message
def test_precheck_update_plan_from_rsp():
plan = Plan(goal="")
plan.add_tasks([Task(task_id="1")])
rsp = '[{"task_id": "2"}]'
success, _ = precheck_update_plan_from_rsp(rsp, plan)
assert success
assert len(plan.tasks) == 1 and plan.tasks[0].task_id == "1" # precheck should not change the original one
invalid_rsp = "wrong"
success, _ = precheck_update_plan_from_rsp(invalid_rsp, plan)
assert not success
@pytest.mark.asyncio
@pytest.mark.parametrize("use_tools", [(False), (True)])
async def test_write_plan(use_tools):
rsp = await WritePlan().run(
context=[Message("run analysis on sklearn iris dataset", role="user")], use_tools=use_tools
)
assert "task_id" in rsp
assert "instruction" in rsp
assert "json" not in rsp # the output should be the content inside ```json ```

View file

@ -1,49 +1,25 @@
import json
import pytest
from openai.types.chat import (
ChatCompletion,
ChatCompletionMessage,
ChatCompletionMessageToolCall,
)
from openai.types.chat.chat_completion import Choice
from openai.types.chat.chat_completion_message_tool_call import Function
from PIL import Image
from metagpt.const import TEST_DATA_PATH
from metagpt.llm import LLM
from metagpt.logs import logger
from metagpt.provider import OpenAILLM
from metagpt.schema import UserMessage
from tests.metagpt.provider.mock_llm_config import (
mock_llm_config,
mock_llm_config_proxy,
)
@pytest.mark.asyncio
async def test_aask_code():
llm = LLM()
msg = [{"role": "user", "content": "Write a python hello world code."}]
rsp = await llm.aask_code(msg) # -> {'language': 'python', 'code': "print('Hello, World!')"}
logger.info(rsp)
assert "language" in rsp
assert "code" in rsp
assert len(rsp["code"]) > 0
@pytest.mark.asyncio
async def test_aask_code_str():
llm = LLM()
msg = "Write a python hello world code."
rsp = await llm.aask_code(msg) # -> {'language': 'python', 'code': "print('Hello, World!')"}
assert "language" in rsp
assert "code" in rsp
assert len(rsp["code"]) > 0
@pytest.mark.asyncio
async def test_aask_code_message():
llm = LLM()
msg = UserMessage("Write a python hello world code.")
rsp = await llm.aask_code(msg) # -> {'language': 'python', 'code': "print('Hello, World!')"}
assert "language" in rsp
assert "code" in rsp
assert len(rsp["code"]) > 0
@pytest.mark.asyncio
async def test_text_to_speech():
llm = LLM()
@ -63,16 +39,41 @@ async def test_speech_to_text():
assert "你好" == resp.text
@pytest.mark.asyncio
async def test_gen_image():
llm = LLM()
model = "dall-e-3"
prompt = 'a logo with word "MetaGPT"'
images: list[Image] = await llm.gen_image(model=model, prompt=prompt)
assert images[0].size == (1024, 1024)
@pytest.fixture
def tool_calls_rsp():
function_rsps = [
Function(arguments='{\n"language": "python",\n"code": "print(\'hello world\')"}', name="execute"),
]
tool_calls = [
ChatCompletionMessageToolCall(type="function", id=f"call_{i}", function=f) for i, f in enumerate(function_rsps)
]
messages = [ChatCompletionMessage(content=None, role="assistant", tool_calls=[t]) for t in tool_calls]
# 添加一个纯文本响应
messages.append(
ChatCompletionMessage(content="Completed a python code for hello world!", role="assistant", tool_calls=None)
)
# 添加 openai tool calls respond bug, code 出现在ChatCompletionMessage.content中
messages.extend(
[
ChatCompletionMessage(content="```python\nprint('hello world')```", role="assistant", tool_calls=None),
]
)
choices = [
Choice(finish_reason="tool_calls", logprobs=None, index=i, message=msg) for i, msg in enumerate(messages)
]
return [
ChatCompletion(id=str(i), choices=[c], created=i, model="gpt-4", object="chat.completion")
for i, c in enumerate(choices)
]
images: list[Image] = await llm.gen_image(model=model, prompt=prompt, resp_format="b64_json")
assert images[0].size == (1024, 1024)
@pytest.fixture
def json_decode_error():
function_rsp = Function(arguments='{\n"language": \'python\',\n"code": "print(\'hello world\')"}', name="execute")
tool_calls = [ChatCompletionMessageToolCall(type="function", id=f"call_{0}", function=function_rsp)]
message = ChatCompletionMessage(content=None, role="assistant", tool_calls=tool_calls)
choices = [Choice(finish_reason="tool_calls", logprobs=None, index=0, message=message)]
return ChatCompletion(id="0", choices=choices, created=0, model="gpt-4", object="chat.completion")
class TestOpenAI:
@ -87,3 +88,36 @@ class TestOpenAI:
instance = OpenAILLM(mock_llm_config_proxy)
kwargs = instance._make_client_kwargs()
assert "http_client" in kwargs
def test_get_choice_function_arguments_for_aask_code(self, tool_calls_rsp):
instance = OpenAILLM(mock_llm_config_proxy)
for i, rsp in enumerate(tool_calls_rsp):
code = instance.get_choice_function_arguments(rsp)
logger.info(f"\ntest get function call arguments {i}: {code}")
assert "code" in code
assert "language" in code
assert "hello world" in code["code"]
logger.info(f'code is : {code["code"]}')
if "Completed a python code for hello world!" == code["code"]:
code["language"] == "markdown"
else:
code["language"] == "python"
def test_aask_code_json_decode_error(self, json_decode_error):
instance = OpenAILLM(mock_llm_config)
with pytest.raises(json.decoder.JSONDecodeError) as e:
instance.get_choice_function_arguments(json_decode_error)
assert "JSONDecodeError" in str(e)
@pytest.mark.asyncio
async def test_gen_image():
llm = LLM()
model = "dall-e-3"
prompt = 'a logo with word "MetaGPT"'
images: list[Image] = await llm.gen_image(model=model, prompt=prompt)
assert images[0].size == (1024, 1024)
images: list[Image] = await llm.gen_image(model=model, prompt=prompt, resp_format="b64_json")
assert images[0].size == (1024, 1024)

View file

@ -0,0 +1,19 @@
import pytest
from metagpt.logs import logger
from metagpt.roles.ci.code_interpreter import CodeInterpreter
@pytest.mark.asyncio
@pytest.mark.parametrize("auto_run", [(True), (False)])
async def test_code_interpreter(mocker, auto_run):
mocker.patch("metagpt.actions.ci.execute_nb_code.ExecuteNbCode.run", return_value=("a successful run", True))
mocker.patch("builtins.input", return_value="confirm")
requirement = "Run data analysis on sklearn Iris dataset, include a plot"
tools = []
ci = CodeInterpreter(auto_run=auto_run, use_tools=True, tools=tools)
rsp = await ci.run(requirement)
logger.info(rsp)
assert len(rsp.content) > 0

View file

@ -0,0 +1,90 @@
import pytest
from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
from metagpt.logs import logger
from metagpt.roles.ci.ml_engineer import MLEngineer
from metagpt.schema import Message, Plan, Task
from metagpt.tools.tool_type import ToolType
from tests.metagpt.actions.ci.test_debug_code import CODE, DebugContext, ErrorStr
def test_mle_init():
ci = MLEngineer(goal="test", auto_run=True, use_tools=True, tools=["tool1", "tool2"])
assert ci.tools == []
MockPlan = Plan(
goal="This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: 'tests/data/ml_datasets/titanic/split_train.csv', eval data path: 'tests/data/ml_datasets/titanic/split_eval.csv'.",
context="",
tasks=[
Task(
task_id="1",
dependent_task_ids=[],
instruction="Perform exploratory data analysis on the train dataset to understand the features and target variable.",
task_type="eda",
code="",
result="",
is_success=False,
is_finished=False,
)
],
task_map={
"1": Task(
task_id="1",
dependent_task_ids=[],
instruction="Perform exploratory data analysis on the train dataset to understand the features and target variable.",
task_type="eda",
code="",
result="",
is_success=False,
is_finished=False,
)
},
current_task_id="1",
)
@pytest.mark.asyncio
async def test_mle_write_code(mocker):
data_path = "tests/data/ml_datasets/titanic"
mle = MLEngineer(auto_run=True, use_tools=True)
mle.planner.plan = MockPlan
code, _ = await mle._write_code()
assert data_path in code["code"]
@pytest.mark.asyncio
async def test_mle_update_data_columns(mocker):
mle = MLEngineer(auto_run=True, use_tools=True)
mle.planner.plan = MockPlan
# manually update task type to test update
mle.planner.plan.current_task.task_type = ToolType.DATA_PREPROCESS.value
result = await mle._update_data_columns()
assert result is not None
@pytest.mark.asyncio
async def test_mle_debug_code(mocker):
mle = MLEngineer(auto_run=True, use_tools=True)
mle.working_memory.add(Message(content=ErrorStr, cause_by=ExecuteNbCode))
mle.latest_code = CODE
mle.debug_context = DebugContext
code, _ = await mle._write_code()
assert len(code) > 0
@pytest.mark.skip
@pytest.mark.asyncio
async def test_ml_engineer():
data_path = "tests/data/ml_datasets/titanic"
requirement = f"This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'."
tools = ["FillMissingValue", "CatCross", "dummy_tool"]
mle = MLEngineer(auto_run=True, use_tools=True, tools=tools)
rsp = await mle.run(requirement)
logger.info(rsp)
assert len(rsp.content) > 0

View file

@ -25,7 +25,9 @@ from metagpt.schema import (
Document,
Message,
MessageQueue,
Plan,
SystemMessage,
Task,
UserMessage,
)
from metagpt.utils.common import any_to_str
@ -181,5 +183,173 @@ def test_class_view():
)
class TestPlan:
def test_add_tasks_ordering(self):
plan = Plan(goal="")
tasks = [
Task(task_id="1", dependent_task_ids=["2", "3"], instruction="Third"),
Task(task_id="2", instruction="First"),
Task(task_id="3", dependent_task_ids=["2"], instruction="Second"),
] # 2 -> 3 -> 1
plan.add_tasks(tasks)
assert [task.task_id for task in plan.tasks] == ["2", "3", "1"]
def test_add_tasks_to_existing_no_common_prefix(self):
plan = Plan(goal="")
tasks = [
Task(task_id="1", dependent_task_ids=["2", "3"], instruction="Third"),
Task(task_id="2", instruction="First"),
Task(task_id="3", dependent_task_ids=["2"], instruction="Second", is_finished=True),
] # 2 -> 3 -> 1
plan.add_tasks(tasks)
new_tasks = [Task(task_id="3", instruction="")]
plan.add_tasks(new_tasks)
assert [task.task_id for task in plan.tasks] == ["3"]
assert not plan.tasks[0].is_finished # must be the new unfinished task
def test_add_tasks_to_existing_with_common_prefix(self):
plan = Plan(goal="")
tasks = [
Task(task_id="1", dependent_task_ids=["2", "3"], instruction="Third"),
Task(task_id="2", instruction="First"),
Task(task_id="3", dependent_task_ids=["2"], instruction="Second"),
] # 2 -> 3 -> 1
plan.add_tasks(tasks)
plan.finish_current_task() # finish 2
plan.finish_current_task() # finish 3
new_tasks = [
Task(task_id="4", dependent_task_ids=["3"], instruction="Third"),
Task(task_id="2", instruction="First"),
Task(task_id="3", dependent_task_ids=["2"], instruction="Second"),
] # 2 -> 3 -> 4, so the common prefix is 2 -> 3, and these two should be obtained from the existing tasks
plan.add_tasks(new_tasks)
assert [task.task_id for task in plan.tasks] == ["2", "3", "4"]
assert (
plan.tasks[0].is_finished and plan.tasks[1].is_finished
) # "2" and "3" should be the original finished one
assert plan.current_task_id == "4"
def test_current_task(self):
plan = Plan(goal="")
tasks = [
Task(task_id="1", dependent_task_ids=["2"], instruction="Second"),
Task(task_id="2", instruction="First"),
]
plan.add_tasks(tasks)
assert plan.current_task.task_id == "2"
def test_finish_task(self):
plan = Plan(goal="")
tasks = [
Task(task_id="1", instruction="First"),
Task(task_id="2", dependent_task_ids=["1"], instruction="Second"),
]
plan.add_tasks(tasks)
plan.finish_current_task()
assert plan.current_task.task_id == "2"
def test_finished_tasks(self):
plan = Plan(goal="")
tasks = [
Task(task_id="1", instruction="First"),
Task(task_id="2", dependent_task_ids=["1"], instruction="Second"),
]
plan.add_tasks(tasks)
plan.finish_current_task()
finished_tasks = plan.get_finished_tasks()
assert len(finished_tasks) == 1
assert finished_tasks[0].task_id == "1"
def test_reset_task_existing(self):
plan = Plan(goal="")
task = Task(task_id="1", instruction="Do something", code="print('Hello')", result="Hello", finished=True)
plan.add_tasks([task])
plan.reset_task("1")
reset_task = plan.task_map["1"]
assert reset_task.code == ""
assert reset_task.result == ""
assert not reset_task.is_finished
def test_reset_task_non_existing(self):
plan = Plan(goal="")
task = Task(task_id="1", instruction="Do something", code="print('Hello')", result="Hello", finished=True)
plan.add_tasks([task])
plan.reset_task("2") # Task with ID 2 does not exist
assert "1" in plan.task_map
assert "2" not in plan.task_map
def test_replace_task_with_dependents(self):
plan = Plan(goal="")
tasks = [
Task(task_id="1", instruction="First Task", finished=True),
Task(task_id="2", instruction="Second Task", dependent_task_ids=["1"], finished=True),
]
plan.add_tasks(tasks)
new_task = Task(task_id="1", instruction="Updated First Task")
plan.replace_task(new_task)
assert plan.task_map["1"].instruction == "Updated First Task"
assert not plan.task_map["2"].is_finished # Dependent task should be reset
assert plan.task_map["2"].code == ""
assert plan.task_map["2"].result == ""
def test_replace_task_non_existing(self):
plan = Plan(goal="")
task = Task(task_id="1", instruction="First Task")
plan.add_tasks([task])
new_task = Task(task_id="2", instruction="New Task")
with pytest.raises(AssertionError):
plan.replace_task(new_task) # Task with ID 2 does not exist in plan
assert "1" in plan.task_map
assert "2" not in plan.task_map
def test_append_task_with_valid_dependencies(self):
plan = Plan(goal="Test")
existing_task = [Task(task_id="1")]
plan.add_tasks(existing_task)
new_task = Task(task_id="2", dependent_task_ids=["1"])
plan.append_task(new_task)
assert plan.tasks[-1].task_id == "2"
assert plan.task_map["2"] == new_task
def test_append_task_with_invalid_dependencies(self):
new_task = Task(task_id="2", dependent_task_ids=["3"])
plan = Plan(goal="Test")
with pytest.raises(AssertionError):
plan.append_task(new_task)
def test_append_task_without_dependencies(self):
plan = Plan(goal="Test")
existing_task = [Task(task_id="1")]
plan.add_tasks(existing_task)
new_task = Task(task_id="2")
plan.append_task(new_task)
assert len(plan.tasks) == 2
assert plan.current_task_id == "1"
def test_append_task_updates_current_task(self):
finished_task = Task(task_id="1", is_finished=True)
new_task = Task(task_id="2")
plan = Plan(goal="Test", tasks=[finished_task])
plan.append_task(new_task)
assert plan.current_task_id == "2"
def test_update_current_task(self):
task1 = Task(task_id="1", is_finished=True)
task2 = Task(task_id="2")
plan = Plan(goal="Test", tasks=[task1, task2])
plan._update_current_task()
assert plan.current_task_id == "2"
if __name__ == "__main__":
pytest.main([__file__, "-s"])

View file

@ -0,0 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2024/1/11 16:14
# @Author : lidanyang
# @File : __init__.py
# @Desc :

View file

@ -0,0 +1,111 @@
from datetime import datetime
import numpy as np
import numpy.testing as npt
import pandas as pd
import pytest
from metagpt.tools.libs.data_preprocess import (
FillMissingValue,
LabelEncode,
MaxAbsScale,
MinMaxScale,
OneHotEncode,
OrdinalEncode,
RobustScale,
StandardScale,
get_column_info,
)
@pytest.fixture
def mock_datasets():
return pd.DataFrame(
{
"num1": [1, 2, np.nan, 4, 5],
"cat1": ["A", "B", np.nan, "D", "A"],
"date1": [
datetime(2020, 1, 1),
datetime(2020, 1, 2),
datetime(2020, 1, 3),
datetime(2020, 1, 4),
datetime(2020, 1, 5),
],
}
)
def test_fill_missing_value(mock_datasets):
fm = FillMissingValue(features=["num1"], strategy="mean")
transformed = fm.fit_transform(mock_datasets.copy())
assert transformed["num1"].isnull().sum() == 0
def test_min_max_scale(mock_datasets):
mms = MinMaxScale(features=["num1"])
transformed = mms.fit_transform(mock_datasets.copy())
npt.assert_allclose(transformed["num1"].min(), 0)
npt.assert_allclose(transformed["num1"].max(), 1)
def test_standard_scale(mock_datasets):
ss = StandardScale(features=["num1"])
transformed = ss.fit_transform(mock_datasets.copy())
assert int(transformed["num1"].mean()) == 0
assert int(transformed["num1"].std()) == 1
def test_max_abs_scale(mock_datasets):
mas = MaxAbsScale(features=["num1"])
transformed = mas.fit_transform(mock_datasets.copy())
npt.assert_allclose(transformed["num1"].abs().max(), 1)
def test_robust_scale(mock_datasets):
rs = RobustScale(features=["num1"])
transformed = rs.fit_transform(mock_datasets.copy())
assert int(transformed["num1"].median()) == 0
def test_ordinal_encode(mock_datasets):
oe = OrdinalEncode(features=["cat1"])
transformed = oe.fit_transform(mock_datasets.copy())
assert transformed["cat1"].max() == 2
def test_one_hot_encode(mock_datasets):
ohe = OneHotEncode(features=["cat1"])
transformed = ohe.fit_transform(mock_datasets.copy())
assert transformed["cat1_A"].max() == 1
def test_label_encode(mock_datasets):
le = LabelEncode(features=["cat1"])
transformed = le.fit_transform(mock_datasets.copy())
assert transformed["cat1"].max() == 3
# test transform with unseen data
test = mock_datasets.copy()
test["cat1"] = ["A", "B", "C", "D", "E"]
transformed = le.transform(test)
assert transformed["cat1"].max() == 4
def test_get_column_info(mock_datasets):
df = mock_datasets
column_info = get_column_info(df)
assert column_info == {
"Category": ["cat1"],
"Numeric": ["num1"],
"Datetime": ["date1"],
"Others": [],
}

View file

@ -0,0 +1,175 @@
import numpy as np
import pandas as pd
import pytest
from sklearn.datasets import fetch_california_housing, load_breast_cancer, load_iris
from metagpt.tools.libs.feature_engineering import (
CatCount,
CatCross,
ExtractTimeComps,
GeneralSelection,
GroupStat,
KFoldTargetMeanEncoder,
PolynomialExpansion,
SplitBins,
TargetMeanEncoder,
TreeBasedSelection,
VarianceBasedSelection,
)
@pytest.fixture
def mock_dataset():
return pd.DataFrame(
{
"num1": [1, 2, np.nan, 4, 5, 6, 7, 3],
"num2": [1, 3, 2, 1, np.nan, 5, 6, 4],
"num3": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
"cat1": ["A", "B", np.nan, "D", "E", "C", "B", "A"],
"cat2": ["A", "A", "A", "A", "A", "A", "A", "A"],
"date1": [
"2020-01-01",
"2020-01-02",
"2020-01-03",
"2020-01-04",
"2020-01-05",
"2020-01-06",
"2020-01-07",
"2020-01-08",
],
"label": [0, 1, 0, 1, 0, 1, 0, 1],
}
)
def load_sklearn_data(data_name):
if data_name == "iris":
data = load_iris()
elif data_name == "breast_cancer":
data = load_breast_cancer()
elif data_name == "housing":
data = fetch_california_housing()
else:
raise ValueError("data_name not supported")
X, y, feature_names = data.data, data.target, data.feature_names
data = pd.DataFrame(X, columns=feature_names)
data["label"] = y
return data
def test_polynomial_expansion(mock_dataset):
pe = PolynomialExpansion(cols=["num1", "num2", "label"], degree=2, label_col="label")
transformed = pe.fit_transform(mock_dataset)
assert len(transformed.columns) == len(mock_dataset.columns) + 3
# when too many columns
data = load_sklearn_data("breast_cancer")
cols = [c for c in data.columns if c != "label"]
pe = PolynomialExpansion(cols=cols, degree=2, label_col="label")
transformed = pe.fit_transform(data)
assert len(transformed.columns) == len(data.columns) + 55
def test_cat_count(mock_dataset):
cc = CatCount(col="cat1")
transformed = cc.fit_transform(mock_dataset)
assert "cat1_cnt" in transformed.columns
assert transformed["cat1_cnt"][0] == 2
def test_target_mean_encoder(mock_dataset):
tme = TargetMeanEncoder(col="cat1", label="label")
transformed = tme.fit_transform(mock_dataset)
assert "cat1_target_mean" in transformed.columns
assert transformed["cat1_target_mean"][0] == 0.5
def test_kfold_target_mean_encoder(mock_dataset):
kfme = KFoldTargetMeanEncoder(col="cat1", label="label")
transformed = kfme.fit_transform(mock_dataset)
assert "cat1_kf_target_mean" in transformed.columns
def test_cat_cross(mock_dataset):
cc = CatCross(cols=["cat1", "cat2"])
transformed = cc.fit_transform(mock_dataset)
assert "cat1_cat2" in transformed.columns
cc = CatCross(cols=["cat1", "cat2"], max_cat_num=3)
transformed = cc.fit_transform(mock_dataset)
assert "cat1_cat2" not in transformed.columns
def test_group_stat(mock_dataset):
gs = GroupStat(group_col="cat1", agg_col="num1", agg_funcs=["mean", "sum"])
transformed = gs.fit_transform(mock_dataset)
assert "num1_mean_by_cat1" in transformed.columns
assert "num1_sum_by_cat1" in transformed.columns
def test_split_bins(mock_dataset):
sb = SplitBins(cols=["num1"])
transformed = sb.fit_transform(mock_dataset)
assert transformed["num1"].nunique() <= 5
assert all(0 <= x < 5 for x in transformed["num1"])
def test_extract_time_comps(mock_dataset):
time_comps = ["year", "month", "day", "hour", "dayofweek", "is_weekend"]
etc = ExtractTimeComps(time_col="date1", time_comps=time_comps)
transformed = etc.fit_transform(mock_dataset.copy())
for comp in time_comps:
assert comp in transformed.columns
assert transformed["year"][0] == 2020
assert transformed["month"][0] == 1
assert transformed["day"][0] == 1
assert transformed["hour"][0] == 0
assert transformed["dayofweek"][0] == 3
assert transformed["is_weekend"][0] == 0
def test_general_selection(mock_dataset):
gs = GeneralSelection(label_col="label")
transformed = gs.fit_transform(mock_dataset.copy())
assert "num3" not in transformed.columns
assert "cat2" not in transformed.columns
@pytest.mark.skip # skip because TreeBasedSelection needs lgb as dependency
def test_tree_based_selection(mock_dataset):
# regression
data = load_sklearn_data("housing")
tbs = TreeBasedSelection(label_col="label", task_type="reg")
transformed = tbs.fit_transform(data)
assert len(transformed.columns) > 1
# classification
data = load_sklearn_data("breast_cancer")
tbs = TreeBasedSelection(label_col="label", task_type="cls")
transformed = tbs.fit_transform(data)
assert len(transformed.columns) > 1
# multi-classification
data = load_sklearn_data("iris")
tbs = TreeBasedSelection(label_col="label", task_type="mcls")
transformed = tbs.fit_transform(data)
assert len(transformed.columns) > 1
def test_variance_based_selection(mock_dataset):
vbs = VarianceBasedSelection(label_col="label")
transformed = vbs.fit_transform(mock_dataset.copy())
assert "num3" not in transformed.columns

View file

@ -0,0 +1,40 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/01/15
@Author : mannaandpoem
@File : test_gpt_v_generator.py
"""
import pytest
from metagpt import logs
from metagpt.tools.libs.gpt_v_generator import GPTvGenerator
@pytest.fixture
def mock_webpages(mocker):
mock_data = """```html\n<html>\n<script src="scripts.js"></script>
<link rel="stylesheet" href="styles.css(">\n</html>\n```\n
```css\n.class { ... }\n```\n
```javascript\nfunction() { ... }\n```\n"""
mocker.patch("metagpt.tools.libs.gpt_v_generator.GPTvGenerator.generate_webpages", return_value=mock_data)
return mocker
def test_vision_generate_webpages(mock_webpages):
image_path = "image.png"
generator = GPTvGenerator()
rsp = generator.generate_webpages(image_path=image_path)
logs.logger.info(rsp)
assert "html" in rsp
assert "css" in rsp
assert "javascript" in rsp
def test_save_webpages(mock_webpages):
image_path = "image.png"
generator = GPTvGenerator()
webpages = generator.generate_webpages(image_path)
webpages_dir = generator.save_webpages(image_path=image_path, webpages=webpages)
logs.logger.info(webpages_dir)
assert webpages_dir.exists()

View file

@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
# @Date : 1/10/2024 10:07 PM
# @Author : stellahong (stellahong@fuzhi.ai)
# @Desc :
import base64
import io
import json
import pytest
from PIL import Image, ImageDraw
from metagpt.tools.libs.sd_engine import SDEngine
def generate_mock_image_data():
# 创建一个简单的图片对象
image = Image.new("RGB", (100, 100), color="white")
draw = ImageDraw.Draw(image)
draw.text((10, 10), "Mock Image", fill="black")
# 将图片转换为二进制数据
with io.BytesIO() as buffer:
image.save(buffer, format="PNG")
image_binary = buffer.getvalue()
# 对图片二进制数据进行 base64 编码
image_base64 = base64.b64encode(image_binary).decode("utf-8")
return image_base64
def test_sd_tools(mocker):
mock_response = mocker.MagicMock()
mock_response.json.return_value = {"images": [generate_mock_image_data()]}
mocker.patch("requests.Session.post", return_value=mock_response)
engine = SDEngine(sd_url="http://example_localhost:7860")
prompt = "1boy, hansom"
engine.construct_payload(prompt)
engine.simple_run_t2i(engine.payload)
def test_sd_construct_payload():
engine = SDEngine(sd_url="http://example_localhost:7860")
prompt = "1boy, hansom"
engine.construct_payload(prompt)
assert "negative_prompt" in engine.payload
@pytest.mark.asyncio
async def test_sd_asyn_t2i(mocker):
mock_post = mocker.patch("aiohttp.ClientSession.post")
mock_response = mocker.AsyncMock()
mock_response.read.return_value = json.dumps({"images": [generate_mock_image_data()]})
mock_post.return_value.__aenter__.return_value = mock_response
engine = SDEngine(sd_url="http://example_localhost:7860")
prompt = "1boy, hansom"
engine.construct_payload(prompt)
await engine.run_t2i([engine.payload])
assert "negative_prompt" in engine.payload

View file

@ -0,0 +1,23 @@
import pytest
from metagpt.tools.libs.web_scraping import scrape_web_playwright
@pytest.mark.asyncio
async def test_scrape_web_playwright():
test_url = "https://www.deepwisdom.ai"
result = await scrape_web_playwright(test_url)
# Assert that the result is a dictionary
assert isinstance(result, dict)
# Assert that the result contains 'inner_text' and 'html' keys
assert "inner_text" in result
assert "html" in result
# Assert startswith and endswith
assert not result["inner_text"].startswith(" ")
assert not result["inner_text"].endswith(" ")
assert not result["html"].startswith(" ")
assert not result["html"].endswith(" ")

View file

@ -0,0 +1,154 @@
import pandas as pd
from metagpt.tools.tool_convert import convert_code_to_tool_schema, docstring_to_schema
def test_docstring_to_schema():
docstring = """
Some test desc.
Args:
features (list): Columns to be processed.
strategy (str, optional): The imputation strategy, notice 'mean' and 'median' can only be
used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.
fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values.
Defaults to None.
Returns:
pd.DataFrame: The transformed DataFrame.
"""
expected = {
"description": "Some test desc.",
"parameters": {
"properties": {
"features": {"type": "list", "description": "Columns to be processed."},
"strategy": {
"type": "str",
"description": "The imputation strategy, notice 'mean' and 'median' can only be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.",
"default": "'mean'",
"enum": ["'mean'", "'median'", "'most_frequent'", "'constant'"],
},
"fill_value": {
"type": "int",
"description": "Fill_value is used to replace all occurrences of missing_values. Defaults to None.",
"default": "None",
},
},
"required": ["features"],
},
"returns": [{"type": "pd.DataFrame", "description": "The transformed DataFrame."}],
}
schema = docstring_to_schema(docstring)
assert schema == expected
class DummyClass:
"""
Completing missing values with simple strategies.
"""
def __init__(self, features: list, strategy: str = "mean", fill_value=None):
"""
Initialize self.
Args:
features (list): Columns to be processed.
strategy (str, optional): The imputation strategy, notice 'mean' and 'median' can only
be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.
fill_value (int, optional): Fill_value is used to replace all occurrences of missing_values.
Defaults to None.
"""
pass
def fit(self, df: pd.DataFrame):
"""
Fit the FillMissingValue model.
Args:
df (pd.DataFrame): The input DataFrame.
"""
pass
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Transform the input DataFrame with the fitted model.
Args:
df (pd.DataFrame): The input DataFrame.
Returns:
pd.DataFrame: The transformed DataFrame.
"""
pass
def dummy_fn(df: pd.DataFrame) -> dict:
"""
Analyzes a DataFrame and categorizes its columns based on data types.
Args:
df (pd.DataFrame): The DataFrame to be analyzed.
Returns:
dict: A dictionary with four keys ('Category', 'Numeric', 'Datetime', 'Others').
Each key corresponds to a list of column names belonging to that category.
"""
pass
def test_convert_code_to_tool_schema_class():
expected = {
"type": "class",
"description": "Completing missing values with simple strategies.",
"methods": {
"__init__": {
"description": "Initialize self.",
"parameters": {
"properties": {
"features": {"type": "list", "description": "Columns to be processed."},
"strategy": {
"type": "str",
"description": "The imputation strategy, notice 'mean' and 'median' can only be used for numeric features. Enum: ['mean', 'median', 'most_frequent', 'constant']. Defaults to 'mean'.",
"default": "'mean'",
"enum": ["'mean'", "'median'", "'most_frequent'", "'constant'"],
},
"fill_value": {
"type": "int",
"description": "Fill_value is used to replace all occurrences of missing_values. Defaults to None.",
"default": "None",
},
},
"required": ["features"],
},
},
"fit": {
"description": "Fit the FillMissingValue model.",
"parameters": {
"properties": {"df": {"type": "pd.DataFrame", "description": "The input DataFrame."}},
"required": ["df"],
},
},
"transform": {
"description": "Transform the input DataFrame with the fitted model.",
"parameters": {
"properties": {"df": {"type": "pd.DataFrame", "description": "The input DataFrame."}},
"required": ["df"],
},
"returns": [{"type": "pd.DataFrame", "description": "The transformed DataFrame."}],
},
},
}
schema = convert_code_to_tool_schema(DummyClass)
assert schema == expected
def test_convert_code_to_tool_schema_function():
expected = {
"type": "function",
"description": "Analyzes a DataFrame and categorizes its columns based on data types.",
"parameters": {
"properties": {"df": {"type": "pd.DataFrame", "description": "The DataFrame to be analyzed."}},
"required": ["df"],
},
}
schema = convert_code_to_tool_schema(dummy_fn)
assert schema == expected

View file

@ -0,0 +1,102 @@
import pytest
from metagpt.tools.tool_registry import ToolRegistry
from metagpt.tools.tool_type import ToolType
@pytest.fixture
def tool_registry():
return ToolRegistry()
@pytest.fixture
def tool_registry_full():
return ToolRegistry(tool_types=ToolType)
# Test Initialization
def test_initialization(tool_registry):
assert isinstance(tool_registry, ToolRegistry)
assert tool_registry.tools == {}
assert tool_registry.tool_types == {}
assert tool_registry.tools_by_types == {}
# Test Initialization with tool types
def test_initialize_with_tool_types(tool_registry_full):
assert isinstance(tool_registry_full, ToolRegistry)
assert tool_registry_full.tools == {}
assert tool_registry_full.tools_by_types == {}
assert "data_preprocess" in tool_registry_full.tool_types
class TestClassTool:
"""test class"""
def test_class_fn(self):
"""test class fn"""
pass
def test_fn():
"""test function"""
pass
# Test Tool Registration Class
def test_register_tool_class(tool_registry):
tool_registry.register_tool("TestClassTool", "/path/to/tool", tool_source_object=TestClassTool)
assert "TestClassTool" in tool_registry.tools
# Test Tool Registration Function
def test_register_tool_fn(tool_registry):
tool_registry.register_tool("test_fn", "/path/to/tool", tool_source_object=test_fn)
assert "test_fn" in tool_registry.tools
# Test Tool Existence Checks
def test_has_tool(tool_registry):
tool_registry.register_tool("TestClassTool", "/path/to/tool", tool_source_object=TestClassTool)
assert tool_registry.has_tool("TestClassTool")
assert not tool_registry.has_tool("NonexistentTool")
# Test Tool Retrieval
def test_get_tool(tool_registry):
tool_registry.register_tool("TestClassTool", "/path/to/tool", tool_source_object=TestClassTool)
tool = tool_registry.get_tool("TestClassTool")
assert tool is not None
assert tool.name == "TestClassTool"
assert tool.path == "/path/to/tool"
assert "description" in tool.schemas
# Similar tests for has_tool_type, get_tool_type, get_tools_by_type
def test_has_tool_type(tool_registry_full):
assert tool_registry_full.has_tool_type("data_preprocess")
assert not tool_registry_full.has_tool_type("NonexistentType")
def test_get_tool_type(tool_registry_full):
retrieved_type = tool_registry_full.get_tool_type("data_preprocess")
assert retrieved_type is not None
assert retrieved_type.name == "data_preprocess"
def test_get_tools_by_type(tool_registry):
tool_type_name = "TestType"
tool_name = "TestTool"
tool_path = "/path/to/tool"
tool_registry.register_tool(tool_name, tool_path, tool_type=tool_type_name, tool_source_object=TestClassTool)
tools_by_type = tool_registry.get_tools_by_type(tool_type_name)
assert tools_by_type is not None
assert tool_name in tools_by_type
# Test case for when the tool type does not exist
def test_get_tools_by_nonexistent_type(tool_registry):
tools_by_type = tool_registry.get_tools_by_type("NonexistentType")
assert not tools_by_type

View file

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
# @Date : 12/12/2023 4:17 PM
# @Author : stellahong (stellahong@fuzhi.ai)
# @Desc :
import nbformat
import pytest
from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
from metagpt.utils.common import read_json_file
from metagpt.utils.save_code import DATA_PATH, save_code_file
def test_save_code_file_python():
save_code_file("example", "print('Hello, World!')")
file_path = DATA_PATH / "output" / "example" / "code.py"
assert file_path.exists(), f"File does not exist: {file_path}"
content = file_path.read_text()
assert "print('Hello, World!')" in content, "File content does not match"
def test_save_code_file_json():
save_code_file("example_json", "print('Hello, JSON!')", file_format="json")
file_path = DATA_PATH / "output" / "example_json" / "code.json"
data = read_json_file(file_path)
assert "code" in data, "JSON key 'code' is missing"
assert data["code"] == "print('Hello, JSON!')", "JSON content does not match"
@pytest.mark.asyncio
async def test_save_code_file_notebook():
code = "print('Hello, World!')"
executor = ExecuteNbCode()
await executor.run(code)
# Save as a Notebook file
save_code_file("example_nb", executor.nb, file_format="ipynb")
file_path = DATA_PATH / "output" / "example_nb" / "code.ipynb"
assert file_path.exists(), f"Notebook file does not exist: {file_path}"
# Additional checks specific to notebook format
notebook = nbformat.read(file_path, as_version=4)
assert len(notebook.cells) > 0, "Notebook should have at least one cell"
first_cell_source = notebook.cells[0].source
assert "print('Hello, World!')" in first_cell_source, "Notebook cell content does not match"