diff --git a/.gitignore b/.gitignore index 76283319f..d01469a36 100644 --- a/.gitignore +++ b/.gitignore @@ -167,5 +167,3 @@ tmp output.wav metagpt/roles/idea_agent.py .aider* -/tests/metagpt/actions/check_data.py -/config/config.yaml diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index bd46ae79a..cd2104c4b 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -274,29 +274,13 @@ class MLEngineer(Role): if __name__ == "__main__": - requirement = "Run data analysis on sklearn Iris dataset, include a plot" - # requirement = "Run data analysis on sklearn Diabetes dataset, include a plot" - # requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy" - # requirement = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy" - # requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv" - - # requirement = "Perform data analysis on the provided data. Train a model to predict the target variable Survived. Include data preprocessing, feature engineering, and modeling in your pipeline. The metric is accuracy." - - # data_path = f"{DATA_PATH}/titanic" - # requirement = f"This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'." - # requirement = f"Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy" - # data_path = f"{DATA_PATH}/icr-identify-age-related-conditions" - # requirement = f"This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions.The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report f1 score on the eval data. Train data path: {data_path}/split_train.csv, eval data path: {data_path}/split_eval.csv." data_path = f"{DATA_PATH}/house-prices-advanced-regression-techniques" requirement = f"This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'." - # data_path = f"{DATA_PATH}/santander-customer-transaction-prediction" - # requirement = f"This is a customers financial dataset. Your goal is to predict which customers will make a specific transaction in the future. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report F1 Score on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv' ." save_dir = "" # save_dir = DATA_PATH / "save" / "2023-12-14_16-58-03" - def load_history(save_dir: str = save_dir): """ Load history from the specified save directory.