From 3dde4664f40f9ae9f92441369a7cab0c88f7e68d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 21 Feb 2024 11:03:32 +0800 Subject: [PATCH 01/10] add sales_forecast in machine_learning. --- examples/mi/machine_learning.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/examples/mi/machine_learning.py b/examples/mi/machine_learning.py index a8ab5051e..a76561a37 100644 --- a/examples/mi/machine_learning.py +++ b/examples/mi/machine_learning.py @@ -3,10 +3,36 @@ import fire from metagpt.roles.mi.interpreter import Interpreter -async def main(auto_run: bool = True): - requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy." +DATA_DIR = "examples/mi/data" +requirements = { + "wine": "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.", + + # sales_forecast data from https://www.kaggle.com/datasets/aslanahmedov/walmart-sales-forecast/data + "sales_forecast": f""" + # Goal + Use time series regression machine learning to make predictions for Dept sales of the stores as accurate as possible. + + # Datasets Available + - train_data: {DATA_DIR}/WalmartSalesForecast/new_train.csv + - test_data: {DATA_DIR}/WalmartSalesForecast/new_test.csv + - additional data: {DATA_DIR}/WalmartSalesForecast/features.csv; To merge on train, test data. + - stores data: {DATA_DIR}/WalmartSalesForecast/stores.csv; To merge on train, test data. + + # Metric + The metric of the competition is weighted mean absolute error (WMAE) for test data. + + # Notice + - *print* key variables to get more information for next task step. + - Perform data analysis by plotting sales trends, holiday effects, distribution of sales across stores/departments using box/violin on the train data. + - Make sure the DataFrame.dtypes must be int, float or bool, and drop date column. + - Plot scatter plots of groud truth and predictions on test data. + """ +} + + +async def main(auto_run: bool = True, use_case: str = 'wine'): mi = Interpreter(auto_run=auto_run) - await mi.run(requirement) + await mi.run(requirements[use_case]) if __name__ == "__main__": From 247fa13e864b22848e33656b21752e64db30cc11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 21 Feb 2024 11:03:48 +0800 Subject: [PATCH 02/10] fix: pip error. --- metagpt/actions/mi/execute_nb_code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/actions/mi/execute_nb_code.py b/metagpt/actions/mi/execute_nb_code.py index 8e8e997b8..0e4563a37 100644 --- a/metagpt/actions/mi/execute_nb_code.py +++ b/metagpt/actions/mi/execute_nb_code.py @@ -182,7 +182,7 @@ class ExecuteNbCode(Action): outputs = self.parse_outputs(self.nb.cells[-1].outputs) outputs, success = truncate(remove_escape_and_color_codes(outputs), is_success=success) - if "!pip" in outputs: + if "!pip" in code: success = False return outputs, success From 662fbd7e5554f426cc34e42402e48fc5ab407621 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 21 Feb 2024 11:43:02 +0800 Subject: [PATCH 03/10] chore. --- examples/mi/machine_learning.py | 44 ++++++++++++++++----------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/examples/mi/machine_learning.py b/examples/mi/machine_learning.py index a76561a37..689335db3 100644 --- a/examples/mi/machine_learning.py +++ b/examples/mi/machine_learning.py @@ -2,35 +2,35 @@ import fire from metagpt.roles.mi.interpreter import Interpreter - DATA_DIR = "examples/mi/data" -requirements = { - "wine": "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.", +WINE_REQ = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy." - # sales_forecast data from https://www.kaggle.com/datasets/aslanahmedov/walmart-sales-forecast/data - "sales_forecast": f""" - # Goal - Use time series regression machine learning to make predictions for Dept sales of the stores as accurate as possible. +# sales_forecast data from https://www.kaggle.com/datasets/aslanahmedov/walmart-sales-forecast/data, +# new_train, new_test from train.csv. +SALES_FORECAST_REQ = f""" +# Goal +Use time series regression machine learning to make predictions for Dept sales of the stores as accurate as possible. - # Datasets Available - - train_data: {DATA_DIR}/WalmartSalesForecast/new_train.csv - - test_data: {DATA_DIR}/WalmartSalesForecast/new_test.csv - - additional data: {DATA_DIR}/WalmartSalesForecast/features.csv; To merge on train, test data. - - stores data: {DATA_DIR}/WalmartSalesForecast/stores.csv; To merge on train, test data. +# Datasets Available +- train_data: {DATA_DIR}/WalmartSalesForecast/new_train.csv +- test_data: {DATA_DIR}/WalmartSalesForecast/new_test.csv +- additional data: {DATA_DIR}/WalmartSalesForecast/features.csv; To merge on train, test data. +- stores data: {DATA_DIR}/WalmartSalesForecast/stores.csv; To merge on train, test data. - # Metric - The metric of the competition is weighted mean absolute error (WMAE) for test data. +# Metric +The metric of the competition is weighted mean absolute error (WMAE) for test data. - # Notice - - *print* key variables to get more information for next task step. - - Perform data analysis by plotting sales trends, holiday effects, distribution of sales across stores/departments using box/violin on the train data. - - Make sure the DataFrame.dtypes must be int, float or bool, and drop date column. - - Plot scatter plots of groud truth and predictions on test data. - """ -} +# Notice +- *print* key variables to get more information for next task step. +- Perform data analysis by plotting sales trends, holiday effects, distribution of sales across stores/departments using box/violin on the train data. +- Make sure the DataFrame.dtypes must be int, float or bool, and drop date column. +- Plot scatter plots of groud truth and predictions on test data. +""" + +requirements = {"wine": WINE_REQ, "sales_forecast": SALES_FORECAST_REQ} -async def main(auto_run: bool = True, use_case: str = 'wine'): +async def main(auto_run: bool = True, use_case: str = "wine"): mi = Interpreter(auto_run=auto_run) await mi.run(requirements[use_case]) From fc4017480205104f281f0367ef83acc433375a59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 21 Feb 2024 21:33:11 +0800 Subject: [PATCH 04/10] chore. --- examples/mi/machine_learning.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/examples/mi/machine_learning.py b/examples/mi/machine_learning.py index 689335db3..5f9d5b0cd 100644 --- a/examples/mi/machine_learning.py +++ b/examples/mi/machine_learning.py @@ -2,29 +2,30 @@ import fire from metagpt.roles.mi.interpreter import Interpreter -DATA_DIR = "examples/mi/data" WINE_REQ = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy." -# sales_forecast data from https://www.kaggle.com/datasets/aslanahmedov/walmart-sales-forecast/data, -# new_train, new_test from train.csv. +# DATA_DIR = "your/path/to/data" +DATA_DIR = "examples/mi/data/WalmartSalesForecast2" +# sales_forecast data from https://www.kaggle.com/datasets/aslanahmedov/walmart-sales-forecast/data SALES_FORECAST_REQ = f""" # Goal -Use time series regression machine learning to make predictions for Dept sales of the stores as accurate as possible. +Train a model to predict sales for each department in every store (split the last 40 weeks records as validation dataset, +the others is train dataset), include plot sales trends, holiday effects, distribution of sales across stores/departments, +using box on the train dataset, print metric and plot scatter plots of groud truth and predictions on validation data. +save predictions on test data. # Datasets Available -- train_data: {DATA_DIR}/WalmartSalesForecast/new_train.csv -- test_data: {DATA_DIR}/WalmartSalesForecast/new_test.csv -- additional data: {DATA_DIR}/WalmartSalesForecast/features.csv; To merge on train, test data. -- stores data: {DATA_DIR}/WalmartSalesForecast/stores.csv; To merge on train, test data. +- train_data: {DATA_DIR}/train.csv +- test_data: {DATA_DIR}/test.csv, no label data. +- additional data: {DATA_DIR}/features.csv +- stores data: {DATA_DIR}/stores.csv # Metric The metric of the competition is weighted mean absolute error (WMAE) for test data. # Notice - *print* key variables to get more information for next task step. -- Perform data analysis by plotting sales trends, holiday effects, distribution of sales across stores/departments using box/violin on the train data. -- Make sure the DataFrame.dtypes must be int, float or bool, and drop date column. -- Plot scatter plots of groud truth and predictions on test data. +- Only When you fit the model, make the DataFrame.dtypes to be int, float or bool, and drop date column. """ requirements = {"wine": WINE_REQ, "sales_forecast": SALES_FORECAST_REQ} @@ -32,7 +33,12 @@ requirements = {"wine": WINE_REQ, "sales_forecast": SALES_FORECAST_REQ} async def main(auto_run: bool = True, use_case: str = "wine"): mi = Interpreter(auto_run=auto_run) - await mi.run(requirements[use_case]) + if use_case == "wine": + requirement = requirements[use_case] + else: + assert DATA_DIR != "your/path/to/data", f"Please set DATA_DIR for the use_case: {use_case}!" + requirement = requirements[use_case] + await mi.run(requirement) if __name__ == "__main__": From 6652aa09ce8e2e19ba4a8ffd89013fae2fccb23f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 21 Feb 2024 22:52:18 +0800 Subject: [PATCH 05/10] delete feature.csv, store.csv, test.csv. --- examples/mi/machine_learning.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/examples/mi/machine_learning.py b/examples/mi/machine_learning.py index 5f9d5b0cd..652e7c908 100644 --- a/examples/mi/machine_learning.py +++ b/examples/mi/machine_learning.py @@ -10,22 +10,17 @@ DATA_DIR = "examples/mi/data/WalmartSalesForecast2" SALES_FORECAST_REQ = f""" # Goal Train a model to predict sales for each department in every store (split the last 40 weeks records as validation dataset, -the others is train dataset), include plot sales trends, holiday effects, distribution of sales across stores/departments, -using box on the train dataset, print metric and plot scatter plots of groud truth and predictions on validation data. -save predictions on test data. +the others is train dataset), include plot sales trends, print metric and plot scatter plots of +groud truth and predictions on validation data. # Datasets Available - train_data: {DATA_DIR}/train.csv -- test_data: {DATA_DIR}/test.csv, no label data. -- additional data: {DATA_DIR}/features.csv -- stores data: {DATA_DIR}/stores.csv # Metric The metric of the competition is weighted mean absolute error (WMAE) for test data. # Notice - *print* key variables to get more information for next task step. -- Only When you fit the model, make the DataFrame.dtypes to be int, float or bool, and drop date column. """ requirements = {"wine": WINE_REQ, "sales_forecast": SALES_FORECAST_REQ} @@ -36,6 +31,7 @@ async def main(auto_run: bool = True, use_case: str = "wine"): if use_case == "wine": requirement = requirements[use_case] else: + mi.use_tools = True assert DATA_DIR != "your/path/to/data", f"Please set DATA_DIR for the use_case: {use_case}!" requirement = requirements[use_case] await mi.run(requirement) From 2d8906091824263236db19ade7c1a9edfb5146af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 21 Feb 2024 23:03:36 +0800 Subject: [PATCH 06/10] chore. --- examples/mi/machine_learning.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/mi/machine_learning.py b/examples/mi/machine_learning.py index 652e7c908..9e4045bbb 100644 --- a/examples/mi/machine_learning.py +++ b/examples/mi/machine_learning.py @@ -23,17 +23,17 @@ The metric of the competition is weighted mean absolute error (WMAE) for test da - *print* key variables to get more information for next task step. """ -requirements = {"wine": WINE_REQ, "sales_forecast": SALES_FORECAST_REQ} +REQUIREMENTS = {"wine": WINE_REQ, "sales_forecast": SALES_FORECAST_REQ} async def main(auto_run: bool = True, use_case: str = "wine"): mi = Interpreter(auto_run=auto_run) if use_case == "wine": - requirement = requirements[use_case] + requirement = REQUIREMENTS[use_case] else: mi.use_tools = True assert DATA_DIR != "your/path/to/data", f"Please set DATA_DIR for the use_case: {use_case}!" - requirement = requirements[use_case] + requirement = REQUIREMENTS[use_case] await mi.run(requirement) From bd994adca2ec4b8199d75ea99365881f69ebb557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 21 Feb 2024 23:07:42 +0800 Subject: [PATCH 07/10] chore. --- examples/mi/machine_learning.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/mi/machine_learning.py b/examples/mi/machine_learning.py index 9e4045bbb..43f1a1d3a 100644 --- a/examples/mi/machine_learning.py +++ b/examples/mi/machine_learning.py @@ -4,8 +4,7 @@ from metagpt.roles.mi.interpreter import Interpreter WINE_REQ = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy." -# DATA_DIR = "your/path/to/data" -DATA_DIR = "examples/mi/data/WalmartSalesForecast2" +DATA_DIR = "your/path/to/data" # sales_forecast data from https://www.kaggle.com/datasets/aslanahmedov/walmart-sales-forecast/data SALES_FORECAST_REQ = f""" # Goal From ec95cedb368c5014c2978d65966e1aaee7639669 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Thu, 22 Feb 2024 21:07:32 +0800 Subject: [PATCH 08/10] update SALES_FORECAST_REQ. --- examples/mi/machine_learning.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/examples/mi/machine_learning.py b/examples/mi/machine_learning.py index 43f1a1d3a..d67a42712 100644 --- a/examples/mi/machine_learning.py +++ b/examples/mi/machine_learning.py @@ -6,20 +6,8 @@ WINE_REQ = "Run data analysis on sklearn Wine recognition dataset, include a plo DATA_DIR = "your/path/to/data" # sales_forecast data from https://www.kaggle.com/datasets/aslanahmedov/walmart-sales-forecast/data -SALES_FORECAST_REQ = f""" -# Goal -Train a model to predict sales for each department in every store (split the last 40 weeks records as validation dataset, -the others is train dataset), include plot sales trends, print metric and plot scatter plots of -groud truth and predictions on validation data. - -# Datasets Available -- train_data: {DATA_DIR}/train.csv - -# Metric -The metric of the competition is weighted mean absolute error (WMAE) for test data. - -# Notice -- *print* key variables to get more information for next task step. +SALES_FORECAST_REQ = f"""Train a model to predict sales for each department in every store (split the last 40 weeks records as validation dataset, the others is train dataset), include plot sales trends, print metric and plot scatter plots of +groud truth and predictions on validation data. Datasets Available is train_data: {DATA_DIR}/train.csv, The metric of the competition is weighted mean absolute error (WMAE) for test data. Notice: *print* key variables to get more information for next task step. """ REQUIREMENTS = {"wine": WINE_REQ, "sales_forecast": SALES_FORECAST_REQ} From 08e00b4dc2894dc233bbb2802009f9f070092b3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Thu, 22 Feb 2024 21:48:05 +0800 Subject: [PATCH 09/10] chore: simplify code. --- examples/mi/machine_learning.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/examples/mi/machine_learning.py b/examples/mi/machine_learning.py index d67a42712..53168e0e9 100644 --- a/examples/mi/machine_learning.py +++ b/examples/mi/machine_learning.py @@ -4,10 +4,10 @@ from metagpt.roles.mi.interpreter import Interpreter WINE_REQ = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy." -DATA_DIR = "your/path/to/data" +DATA_DIR = "examples/mi/data/WalmartSalesForecast2" # sales_forecast data from https://www.kaggle.com/datasets/aslanahmedov/walmart-sales-forecast/data -SALES_FORECAST_REQ = f"""Train a model to predict sales for each department in every store (split the last 40 weeks records as validation dataset, the others is train dataset), include plot sales trends, print metric and plot scatter plots of -groud truth and predictions on validation data. Datasets Available is train_data: {DATA_DIR}/train.csv, The metric of the competition is weighted mean absolute error (WMAE) for test data. Notice: *print* key variables to get more information for next task step. +SALES_FORECAST_REQ = f"""Train a model to predict sales for each department in every store (split the last 40 weeks records as validation dataset, the others is train dataset), include plot total sales trends, print metric and plot scatter plots of +groud truth and predictions on validation data. Dataset is {DATA_DIR}/train.csv, the metric is weighted mean absolute error (WMAE) for test data. Notice: *print* key variables to get more information for next task step. """ REQUIREMENTS = {"wine": WINE_REQ, "sales_forecast": SALES_FORECAST_REQ} @@ -15,12 +15,7 @@ REQUIREMENTS = {"wine": WINE_REQ, "sales_forecast": SALES_FORECAST_REQ} async def main(auto_run: bool = True, use_case: str = "wine"): mi = Interpreter(auto_run=auto_run) - if use_case == "wine": - requirement = REQUIREMENTS[use_case] - else: - mi.use_tools = True - assert DATA_DIR != "your/path/to/data", f"Please set DATA_DIR for the use_case: {use_case}!" - requirement = REQUIREMENTS[use_case] + requirement = REQUIREMENTS[use_case] await mi.run(requirement) From 041a2d61097bad45b5b7e28a880b37268a285c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Thu, 22 Feb 2024 21:56:02 +0800 Subject: [PATCH 10/10] chore. --- examples/mi/machine_learning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/mi/machine_learning.py b/examples/mi/machine_learning.py index 53168e0e9..56c68f69e 100644 --- a/examples/mi/machine_learning.py +++ b/examples/mi/machine_learning.py @@ -4,7 +4,7 @@ from metagpt.roles.mi.interpreter import Interpreter WINE_REQ = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy." -DATA_DIR = "examples/mi/data/WalmartSalesForecast2" +DATA_DIR = "path/to/your/data" # sales_forecast data from https://www.kaggle.com/datasets/aslanahmedov/walmart-sales-forecast/data SALES_FORECAST_REQ = f"""Train a model to predict sales for each department in every store (split the last 40 weeks records as validation dataset, the others is train dataset), include plot total sales trends, print metric and plot scatter plots of groud truth and predictions on validation data. Dataset is {DATA_DIR}/train.csv, the metric is weighted mean absolute error (WMAE) for test data. Notice: *print* key variables to get more information for next task step.