format using precommit

2026-06-08 15:05:17 +02:00 · 2024-01-10 14:16:04 +08:00 · 2024-01-10 14:16:04 +08:00 · 4ec6151691
commit 4ec6151691
parent 767c99388f
7 changed files with 102 additions and 84 deletions
--- a/tests/metagpt/actions/test_make_tools.py
+++ b/tests/metagpt/actions/test_make_tools.py
@ -8,7 +8,7 @@ from metagpt.logs import logger
@pytest.mark.asyncio
 async def test_make_tools():
    code = "import yfinance as yf\n\n# Collect Alibaba stock data\nalibaba = yf.Ticker('BABA')\ndata = alibaba.history(period='1d', start='2022-01-01', end='2022-12-31')\nprint(data.head())"
-    msgs = [{'role': 'assistant', 'content': code}]
+    msgs = [{"role": "assistant", "content": code}]
    mt = MakeTools()
    tool_code = await mt.run(msgs)
    logger.debug(tool_code)
@ -21,10 +21,10 @@ async def test_make_tools():

@pytest.mark.asyncio
 async def test_make_tools2():
-    code = '''import pandas as pd\npath = "./tests/data/test.csv"\ndf = pd.read_csv(path)\ndata = df.copy()\n
+    code = """import pandas as pd\npath = "./tests/data/test.csv"\ndf = pd.read_csv(path)\ndata = df.copy()\n
    data['started_at'] = data['started_at'].apply(lambda r: pd.to_datetime(r))\n
-    data['ended_at'] = data['ended_at'].apply(lambda r: pd.to_datetime(r))\ndata.head()'''
-    msgs = [{'role': 'assistant', 'content': code}]
+    data['ended_at'] = data['ended_at'].apply(lambda r: pd.to_datetime(r))\ndata.head()"""
+    msgs = [{"role": "assistant", "content": code}]
    mt = MakeTools()
    tool_code = await mt.run(msgs)
    logger.debug(tool_code)
@ -37,11 +37,11 @@ async def test_make_tools2():

@pytest.mark.asyncio
 async def test_make_tools3():
-    code = '''import pandas as pd\npath = "./tests/data/test.csv"\ndf = pd.read_csv(path)\ndata = df.copy()\n
+    code = """import pandas as pd\npath = "./tests/data/test.csv"\ndf = pd.read_csv(path)\ndata = df.copy()\n
    data['started_at'] = data['started_at'].apply(lambda r: pd.to_datetime(r))\n
    data['ended_at'] = data['ended_at'].apply(lambda r: pd.to_datetime(r))\n
-    data['duration_hour'] = (data['ended_at'] - data['started_at']).dt.seconds/3600\ndata.head()'''
-    msgs = [{'role': 'assistant', 'content': code}]
+    data['duration_hour'] = (data['ended_at'] - data['started_at']).dt.seconds/3600\ndata.head()"""
+    msgs = [{"role": "assistant", "content": code}]
    mt = MakeTools()
    tool_code = await mt.run(msgs)
    logger.debug(tool_code)
--- a/tests/metagpt/actions/test_write_analysis_code.py
+++ b/tests/metagpt/actions/test_write_analysis_code.py
@ -1,10 +1,11 @@
 import asyncio
+
 import pytest

-from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
 from metagpt.actions.execute_code import ExecutePyCode
-from metagpt.schema import Message, Plan, Task
+from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
 from metagpt.logs import logger
+from metagpt.schema import Message, Plan, Task


@pytest.mark.asyncio
@ -15,9 +16,9 @@ async def test_write_code_by_list_plan():
    plan = ["随机生成一个pandas DataFrame时间序列", "绘制这个时间序列的直方图", "求均值"]
    for task in plan:
        print(f"\n任务: {task}\n\n")
-        messages.append(Message(task, role='assistant'))
+        messages.append(Message(task, role="assistant"))
        code = await write_code.run(messages)
-        messages.append(Message(code, role='assistant'))
+        messages.append(Message(code, role="assistant"))
        assert len(code) > 0
        output = await execute_code.run(code)
        print(f"\n[Output]: 任务{task}的执行结果是: \n{output}\n")
@ -48,11 +49,11 @@ async def test_write_code_with_tools():
    messages = []
    task_map = {
        "1": Task(
-                task_id="1",
-                instruction="随机生成一个pandas DataFrame数据集",
-                task_type="other",
-                dependent_task_ids=[],
-                code="""
+            task_id="1",
+            instruction="随机生成一个pandas DataFrame数据集",
+            task_type="other",
+            dependent_task_ids=[],
+            code="""
                import pandas as pd
                df = pd.DataFrame({
                    'a': [1, 2, 3, 4, 5],
@ -61,18 +62,18 @@ async def test_write_code_with_tools():
                    'd': [1, 2, 3, 4, 5]
                })
                """,
-                is_finished=True,
-            ),
+            is_finished=True,
+        ),
        "2": Task(
-                task_id="2",
-                instruction="对数据集进行数据清洗",
-                task_type="data_preprocess",
-                dependent_task_ids=["1"],
-                code_steps="""
+            task_id="2",
+            instruction="对数据集进行数据清洗",
+            task_type="data_preprocess",
+            dependent_task_ids=["1"],
+            code_steps="""
                {"Step 1": "对数据集进行去重",
                "Step 2": "对数据集进行缺失值处理"}
-                """
-            ),
+                """,
+        ),
    }
    plan = Plan(
        goal="构造数据集并进行数据清洗",
@ -89,7 +90,6 @@ async def test_write_code_with_tools():

@pytest.mark.asyncio
 async def test_write_code_to_correct_error():
-
    structural_context = """
    ## User Requirement
    read a dataset test.csv and print its head
@ -136,7 +136,8 @@ async def test_write_code_to_correct_error():
    ]
    new_code = await WriteCodeByGenerate().run(context=context)
    print(new_code)
-    assert "read_csv" in new_code # should correct read_excel to read_csv
+    assert "read_csv" in new_code  # should correct read_excel to read_csv
+

@pytest.mark.asyncio
 async def test_write_code_reuse_code_simple():
@ -174,7 +175,8 @@ async def test_write_code_reuse_code_simple():
    ]
    code = await WriteCodeByGenerate().run(context=context)
    print(code)
-    assert "pandas" not in code and "read_csv" not in code # should reuse import and read statement from previous one
+    assert "pandas" not in code and "read_csv" not in code  # should reuse import and read statement from previous one
+

@pytest.mark.asyncio
 async def test_write_code_reuse_code_long():
@ -227,8 +229,9 @@ async def test_write_code_reuse_code_long():
    trials = [WriteCodeByGenerate().run(context=context, temperature=0.0) for _ in range(trials_num)]
    trial_results = await asyncio.gather(*trials)
    print(*trial_results, sep="\n\n***\n\n")
-    success = ["load_iris" not in result and "iris_data" in result \
-        for result in trial_results]  # should reuse iris_data from previous tasks
+    success = [
+        "load_iris" not in result and "iris_data" in result for result in trial_results
+    ]  # should reuse iris_data from previous tasks
    success_rate = sum(success) / trials_num
    logger.info(f"success rate: {success_rate :.2f}")
    assert success_rate >= 0.8
@ -299,8 +302,9 @@ async def test_write_code_reuse_code_long_for_wine():
    trials = [WriteCodeByGenerate().run(context=context, temperature=0.0) for _ in range(trials_num)]
    trial_results = await asyncio.gather(*trials)
    print(*trial_results, sep="\n\n***\n\n")
-    success = ["load_wine" not in result and "wine_data" in result\
-        for result in trial_results]  # should reuse iris_data from previous tasks
+    success = [
+        "load_wine" not in result and "wine_data" in result for result in trial_results
+    ]  # should reuse iris_data from previous tasks
    success_rate = sum(success) / trials_num
    logger.info(f"success rate: {success_rate :.2f}")
    assert success_rate >= 0.8
--- a/tests/metagpt/actions/test_write_plan.py
+++ b/tests/metagpt/actions/test_write_plan.py
@ -1,6 +1,9 @@
-import pytest
+from metagpt.actions.write_plan import (
+    Plan,
+    Task,
+    precheck_update_plan_from_rsp,
+)

-from metagpt.actions.write_plan import WritePlan, precheck_update_plan_from_rsp, Plan, Task

 def test_precheck_update_plan_from_rsp():
    plan = Plan(goal="")
@ -10,6 +13,6 @@ def test_precheck_update_plan_from_rsp():
    assert success
    assert len(plan.tasks) == 1 and plan.tasks[0].task_id == "1"  # precheck should not change the original one

-    invalid_rsp = 'wrong'
+    invalid_rsp = "wrong"
    success, _ = precheck_update_plan_from_rsp(invalid_rsp, plan)
    assert not success
--- a/tests/metagpt/roles/run_code_interpreter.py
+++ b/tests/metagpt/roles/run_code_interpreter.py
@ -1,15 +1,16 @@
 import fire

 from metagpt.actions.execute_code import ExecutePyCode
-from metagpt.const import DATA_PATH
 from metagpt.logs import logger
 from metagpt.roles.code_interpreter import CodeInterpreter
 from metagpt.roles.ml_engineer import MLEngineer
 from metagpt.schema import Plan
-from metagpt.utils.recovery_util import save_history, load_history
+from metagpt.utils.recovery_util import load_history, save_history


-async def run_code_interpreter(role_class, requirement, auto_run, use_tools, use_code_steps, make_udfs, use_udfs, save_dir):
+async def run_code_interpreter(
+    role_class, requirement, auto_run, use_tools, use_code_steps, make_udfs, use_udfs, save_dir
+):
    """
    The main function to run the MLEngineer with optional history loading.

@ -26,26 +27,28 @@ async def run_code_interpreter(role_class, requirement, auto_run, use_tools, use
        role = CodeInterpreter(goal=requirement, auto_run=auto_run, use_tools=use_tools)
    else:
        role = MLEngineer(
-            goal=requirement, auto_run=auto_run, use_tools=use_tools, use_code_steps=use_code_steps,
-            make_udfs=make_udfs, use_udfs=use_udfs
+            goal=requirement,
+            auto_run=auto_run,
+            use_tools=use_tools,
+            use_code_steps=use_code_steps,
+            make_udfs=make_udfs,
+            use_udfs=use_udfs,
        )
-    
+
    if save_dir:
        logger.info("Resuming from history trajectory")
        plan, nb = load_history(save_dir)
        role.planner.plan = Plan(**plan)
        role.execute_code = ExecutePyCode(nb)
-    
+
    else:
        logger.info("Run from scratch")
-        
-    
+
    try:
        await role.run(requirement)
    except Exception as e:
-        
        save_path = save_history(role, save_dir)
-        
+
        logger.exception(f"An error occurred: {e}, save trajectory here: {save_path}")


@ -60,7 +63,7 @@ if __name__ == "__main__":
    # requirement = f"This is a customers financial dataset. Your goal is to predict which customers will make a specific transaction in the future. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report AUC Score on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv' ."
    # data_path = f"{DATA_PATH}/house-prices-advanced-regression-techniques"
    # requirement = f"This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'."
-    
+
    save_dir = ""

    # role_class = "ci"
@ -71,10 +74,17 @@ if __name__ == "__main__":
    use_udfs = False

    async def main(
-        role_class: str = role_class, requirement: str = requirement, auto_run: bool = auto_run,
-        use_tools: bool = use_tools, use_code_steps: bool = False, make_udfs: bool = make_udfs, use_udfs: bool = use_udfs,
-        save_dir: str = save_dir
+        role_class: str = role_class,
+        requirement: str = requirement,
+        auto_run: bool = auto_run,
+        use_tools: bool = use_tools,
+        use_code_steps: bool = False,
+        make_udfs: bool = make_udfs,
+        use_udfs: bool = use_udfs,
+        save_dir: str = save_dir,
    ):
-        await run_code_interpreter(role_class, requirement, auto_run, use_tools, use_code_steps, make_udfs, use_udfs, save_dir)
+        await run_code_interpreter(
+            role_class, requirement, auto_run, use_tools, use_code_steps, make_udfs, use_udfs, save_dir
+        )

    fire.Fire(main)
--- a/tests/metagpt/roles/test_daml.py
+++ b/tests/metagpt/roles/test_daml.py
@ -2,8 +2,9 @@ import pytest
 from tqdm import tqdm

 from metagpt.logs import logger
+from metagpt.roles.ml_engineer import ExecutePyCode, MLEngineer
 from metagpt.schema import Plan
-from metagpt.roles.ml_engineer import MLEngineer, ExecutePyCode
+

 def reset(role):
    """Restart role with the same goal."""
@ -11,6 +12,7 @@ def reset(role):
    role.planner.plan = Plan(goal=role.planner.plan.goal)
    role.execute_code = ExecutePyCode()

+
 async def make_use_tools(requirement: str, auto_run: bool = True):
    """make and use tools for requirement."""
    role = MLEngineer(goal=requirement, auto_run=auto_run)
@ -31,11 +33,13 @@ async def make_use_tools(requirement: str, auto_run: bool = True):

@pytest.mark.asyncio
 async def test_make_use_tools():
-    requirements = ["Run data analysis on sklearn Iris dataset, include a plot",
-                    "Run data analysis on sklearn Diabetes dataset, include a plot",
-                    "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy",
-                    "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy",
-                    "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: tests/data/titanic.csv"]
+    requirements = [
+        "Run data analysis on sklearn Iris dataset, include a plot",
+        "Run data analysis on sklearn Diabetes dataset, include a plot",
+        "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy",
+        "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy",
+        "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: tests/data/titanic.csv",
+    ]
    success = 0
    for requirement in tqdm(requirements, total=len(requirements)):
        try:
--- a/tests/metagpt/tools/functions/test_udf.py
+++ b/tests/metagpt/tools/functions/test_udf.py
@ -1,15 +1,15 @@
-import pytest
-import yaml
 import json

-from metagpt.tools.functions.libs.udf import UDFS, docstring_to_yaml, UDFS_YAML
+import yaml
+
 from metagpt.logs import logger
+from metagpt.tools.functions.libs.udf import UDFS, UDFS_YAML, docstring_to_yaml


 def test_udfs():
    assert len(UDFS) > 0
-    assert 'udf_name' in UDFS[0]
-    assert 'udf_doc' in UDFS[0]
+    assert "udf_name" in UDFS[0]
+    assert "udf_doc" in UDFS[0]
    logger.info(UDFS)


@ -23,27 +23,27 @@ def test_docstring2yaml():
        pd.DataFrame: The dataframe with an additional column 'duration_hour' added.
    """

-    yaml_result = docstring_to_yaml(docstring, return_vars='dataframe')
-    assert 'parameters' in yaml_result
-    assert 'properties' in yaml_result['parameters']
-    assert 'dataframe' in yaml_result['parameters']['properties']
+    yaml_result = docstring_to_yaml(docstring, return_vars="dataframe")
+    assert "parameters" in yaml_result
+    assert "properties" in yaml_result["parameters"]
+    assert "dataframe" in yaml_result["parameters"]["properties"]


 def test_UDFS_YAML():
    assert len(UDFS_YAML) > 0
    logger.info(f"\n\n{json.dumps(UDFS_YAML, indent=2, ensure_ascii=False)}")
    function_schema = UDFS_YAML
-    assert 'description' in function_schema[list(function_schema.keys())[0]]
-    assert 'type' in function_schema[list(function_schema.keys())[0]]
-    assert 'parameters' in function_schema[list(function_schema.keys())[0]]
-    assert 'properties' in function_schema[list(function_schema.keys())[0]]['parameters']
-    assert 'required' in function_schema[list(function_schema.keys())[0]]['parameters']
-    assert 'returns' in function_schema[list(function_schema.keys())[0]]
+    assert "description" in function_schema[list(function_schema.keys())[0]]
+    assert "type" in function_schema[list(function_schema.keys())[0]]
+    assert "parameters" in function_schema[list(function_schema.keys())[0]]
+    assert "properties" in function_schema[list(function_schema.keys())[0]]["parameters"]
+    assert "required" in function_schema[list(function_schema.keys())[0]]["parameters"]
+    assert "returns" in function_schema[list(function_schema.keys())[0]]
    # 指定要保存的文件路径
-    file_path = './tests/data/function_schema.yaml'
+    file_path = "./tests/data/function_schema.yaml"

    # 使用 PyYAML 将字典保存为 YAML 文件
-    with open(file_path, 'w') as file:
+    with open(file_path, "w") as file:
        yaml.dump(function_schema, file, default_flow_style=False)

-    print(f'Data has been saved to {file_path}')
+    print(f"Data has been saved to {file_path}")
--- a/tests/metagpt/utils/test_save_code.py
+++ b/tests/metagpt/utils/test_save_code.py
@ -2,15 +2,15 @@
 # @Date    : 12/12/2023 4:17 PM
 # @Author  : stellahong (stellahong@fuzhi.ai)
 # @Desc    :
-import pytest
-import os
 import json
+import os
+
 import nbformat
+import pytest

-from metagpt.actions.write_analysis_code import WriteCodeByGenerate
 from metagpt.actions.execute_code import ExecutePyCode
-
-from metagpt.utils.save_code import save_code_file, DATA_PATH
+from metagpt.actions.write_analysis_code import WriteCodeByGenerate
+from metagpt.utils.save_code import DATA_PATH, save_code_file


 def test_save_code_file_python():
@ -36,12 +36,9 @@ def test_save_code_file_json():
    assert data["code"] == "print('Hello, JSON!')", "JSON content does not match"


-
@pytest.mark.asyncio
 async def test_save_code_file_notebook():
-    code = await WriteCodeByGenerate().run(
-        context="basic python, hello world", plan="", code_steps="", temperature=0.0
-    )
+    code = await WriteCodeByGenerate().run(context="basic python, hello world", plan="", code_steps="", temperature=0.0)
    executor = ExecutePyCode()
    await executor.run(code)
    # Save as a Notebook file