diff --git a/examples/ml_engineer_with_tools.py b/examples/ml_engineer_with_tools.py new file mode 100644 index 000000000..1c90f2946 --- /dev/null +++ b/examples/ml_engineer_with_tools.py @@ -0,0 +1,16 @@ +import asyncio + +from metagpt.roles.ci.ml_engineer import MLEngineer + + +async def main(requirement: str, auto_run: bool = True, use_tools: bool = True): + role = MLEngineer(goal=requirement, auto_run=auto_run, use_tools=use_tools) + await role.run(requirement) + + +if __name__ == "__main__": + data_path = "your_path_to_icr/icr-identify-age-related-conditions" # 替换 'your_path_to_icr' 为实际数据存放的路径 + train_path = f"{data_path}/your_train_data.csv" # 替换 'your_train_data.csv' 为你的训练数据文件名 + eval_path = f"{data_path}/your_eval_data.csv" # 替换 'your_eval_data.csv' 为你的评估数据文件名 + requirement = f"This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions.The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report f1 score on the eval data. Train data path: {train_path}, eval data path:{eval_path}." + asyncio.run(main(requirement)) diff --git a/metagpt/prompts/tool_types.py b/metagpt/prompts/tool_types.py index f27fbea99..8728d22f3 100644 --- a/metagpt/prompts/tool_types.py +++ b/metagpt/prompts/tool_types.py @@ -1,3 +1,10 @@ +# Prompt for using tools of "eda" type +EDA_PROMPT = """ +The current task is about exploratory data analysis, please note the following: +- Distinguish column types with `select_dtypes` for tailored analysis and visualization, such as correlation. +- Remember to `import numpy as np` before using Numpy functions. +""" + # Prompt for using tools of "data_preprocess" type DATA_PREPROCESS_PROMPT = """ The current task is about data preprocessing, please note the following: diff --git a/metagpt/tools/tool_type.py b/metagpt/tools/tool_type.py index 6fa971c56..7f3f132a6 100644 --- a/metagpt/tools/tool_type.py +++ b/metagpt/tools/tool_type.py @@ -1,6 +1,7 @@ from enum import Enum from metagpt.prompts.tool_types import ( + EDA_PROMPT, DATA_PREPROCESS_PROMPT, FEATURE_ENGINEERING_PROMPT, IMAGE2WEBPAGE_PROMPT, @@ -11,7 +12,11 @@ from metagpt.tools.tool_data_type import ToolTypeDef class ToolType(Enum): - EDA = ToolTypeDef(name="eda", desc="For performing exploratory data analysis") + EDA = ToolTypeDef( + name="eda", + desc="For performing exploratory data analysis", + usage_prompt=EDA_PROMPT, + ) DATA_PREPROCESS = ToolTypeDef( name="data_preprocess", desc="Only for changing value inplace.",