From 9ba9371656f04af5553ddeb95affd35ebe66ae53 Mon Sep 17 00:00:00 2001 From: limafang Date: Thu, 12 Sep 2024 20:26:11 +0800 Subject: [PATCH 1/8] add autosklearn setup run --- expo/README.md | 9 +++ expo/experimenter/autosklearn.py | 110 +++++++++++++++++++++++++++++++ expo/run_experiment.py | 5 +- 3 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 expo/experimenter/autosklearn.py diff --git a/expo/README.md b/expo/README.md index 55ea7eed4..e824312f2 100644 --- a/expo/README.md +++ b/expo/README.md @@ -182,6 +182,15 @@ #### Setup ``` 提供github链接,并说明使用的命令以及参数设置 +### AutoSklearn +#### Setup +``` +pip install autosklearn +``` +#### Run +``` +python run_experiment.py --exp_mode autosklearn --task titanic +``` ### Base DI For setup, check 5. diff --git a/expo/experimenter/autosklearn.py b/expo/experimenter/autosklearn.py new file mode 100644 index 000000000..5786a3790 --- /dev/null +++ b/expo/experimenter/autosklearn.py @@ -0,0 +1,110 @@ +from datetime import datetime +import autosklearn.classification +import autosklearn.regression +import pandas as pd +from expo.experimenter.custom import CustomExperimenter +from expo.evaluation.evaluation import evaluate_score +from autosklearn.metrics import make_scorer +from functools import partial + + +def custom_scorer(y_true, y_pred, metric_name): + return evaluate_score(y_pred, y_true, metric_name) + + +def create_autosklearn_scorer(metric_name): + return make_scorer( + name=metric_name, score_func=partial(custom_scorer, metric_name=metric_name) + ) + + +class ASRunner: + time_limit = 300 + + def __init__(self, state=None): + self.state = state + self.datasets = self.state["datasets_dir"] + + def run(self): + train_path = self.datasets["train"] + dev_wo_target_path = self.datasets["dev_wo_target"] + test_wo_target_path = self.datasets["test_wo_target"] + target_col = self.state["dataset_config"]["target_col"] + + train_data = pd.read_csv(train_path) + dev_data = pd.read_csv(dev_wo_target_path) + test_data = pd.read_csv(test_wo_target_path) + eval_metric = self.state["dataset_config"]["metric"].replace(" ", "_") + X_train = train_data.drop(columns=[target_col]) + y_train = train_data[target_col] + + if eval_metric == "rmse": + automl = autosklearn.regression.AutoSklearnRegressor( + time_left_for_this_task=self.time_limit, + per_run_time_limit=60, + metric=create_autosklearn_scorer("rmse"), # 使用新的函数创建评分器 + memory_limit=8192, + tmp_folder="AutosklearnModels/as-{}-{}".format( + self.state["task"], datetime.now().strftime("%y%m%d_%H%M") + ), + n_jobs=-1, + ) + elif eval_metric == "f1": + automl = autosklearn.classification.AutoSklearnClassifier( + time_left_for_this_task=self.time_limit, + per_run_time_limit=60, + metric=create_autosklearn_scorer("f1"), # 使用新的函数创建评分器 + memory_limit=8192, + tmp_folder="AutosklearnModels/as-{}-{}".format( + self.state["task"], datetime.now().strftime("%y%m%d_%H%M") + ), + n_jobs=-1, + ) + elif eval_metric == "f1_weighted": + automl = autosklearn.classification.AutoSklearnClassifier( + time_left_for_this_task=self.time_limit, + per_run_time_limit=60, + metric=create_autosklearn_scorer( + "f1 weighted" + ), # 使用新的函数创建评分器 + memory_limit=8192, + tmp_folder="AutosklearnModels/as-{}-{}".format( + self.state["task"], datetime.now().strftime("%y%m%d_%H%M") + ), + n_jobs=-1, + ) + else: + raise ValueError(f"Unsupported metric: {eval_metric}") + automl.fit(X_train, y_train) + + dev_preds = automl.predict(dev_data) + test_preds = automl.predict(test_data) + + return {"test_preds": test_preds, "dev_preds": dev_preds} + + +class AutoSklearnExperimenter(CustomExperimenter): + result_path: str = "results/autosklearn" + + def __init__(self, args, **kwargs): + super().__init__(args, **kwargs) + self.framework = ASRunner(self.state) + + async def run_experiment(self): + result = self.framework.run() + user_requirement = self.state["requirement"] + dev_preds = result["dev_preds"] + test_preds = result["test_preds"] + score_dict = { + "dev_score": self.evaluate_predictions(dev_preds, "dev"), + "test_score": self.evaluate_predictions(test_preds, "test"), + } + results = [ + 0, + { + "score_dict": score_dict, + "user_requirement": user_requirement, + "args": vars(self.args), + }, + ] + self.save_result(results) diff --git a/expo/run_experiment.py b/expo/run_experiment.py index 2123fade3..2c996a737 100644 --- a/expo/run_experiment.py +++ b/expo/run_experiment.py @@ -6,6 +6,7 @@ from expo.experimenter.autogluon import GluonExperimenter from expo.experimenter.custom import CustomExperimenter from expo.experimenter.experimenter import Experimenter from expo.experimenter.mcts import MCTSExperimenter +from expo.experimenter.autosklearn import AutoSklearnExperimenter def get_args(): @@ -15,7 +16,7 @@ def get_args(): "--exp_mode", type=str, default="mcts", - choices=["mcts", "aug", "base", "custom", "greedy", "autogluon", "random"], + choices=["mcts", "aug", "base", "custom", "greedy", "autogluon", "random", "autosklearn"], ) get_di_args(parser) get_mcts_args(parser) @@ -59,6 +60,8 @@ async def main(args): experimenter = GluonExperimenter(args) elif args.exp_mode == "custom": experimenter = CustomExperimenter(args) + elif args.exp_mode == "autosklearn": + experimenter = AutoSklearnExperimenter(args) else: raise ValueError(f"Invalid exp_mode: {args.exp_mode}") await experimenter.run_experiment() From b49334b16c4c61bff89780c522c6f60f25d89329 Mon Sep 17 00:00:00 2001 From: limafang Date: Fri, 13 Sep 2024 21:07:10 +0800 Subject: [PATCH 2/8] fix import and update readme --- expo/README.md | 18 +++++++++++++++++- expo/experimenter/autosklearn.py | 32 ++++++++++++-------------------- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/expo/README.md b/expo/README.md index e824312f2..707e6415e 100644 --- a/expo/README.md +++ b/expo/README.md @@ -183,10 +183,26 @@ #### Setup 提供github链接,并说明使用的命令以及参数设置 ### AutoSklearn +#### System requirements +auto-sklearn has the following system requirements: + +- Linux operating system (for example Ubuntu) + +- Python (>=3.7) + +- C++ compiler (with C++11 supports) + +In case you try to install Auto-sklearn on a system where no wheel files for the pyrfr package are provided (see here for available wheels) you also need: + +- SWIG [(get SWIG here).](https://www.swig.org/survey.html) + +For an explanation of missing Microsoft Windows and macOS support please check the Section [Windows/macOS compatibility](https://automl.github.io/auto-sklearn/master/installation.html#windows-macos-compatibility). + #### Setup ``` -pip install autosklearn +pip install auto-sklearn ``` + #### Run ``` python run_experiment.py --exp_mode autosklearn --task titanic diff --git a/expo/experimenter/autosklearn.py b/expo/experimenter/autosklearn.py index 5786a3790..7340edafa 100644 --- a/expo/experimenter/autosklearn.py +++ b/expo/experimenter/autosklearn.py @@ -1,10 +1,7 @@ from datetime import datetime -import autosklearn.classification -import autosklearn.regression import pandas as pd from expo.experimenter.custom import CustomExperimenter from expo.evaluation.evaluation import evaluate_score -from autosklearn.metrics import make_scorer from functools import partial @@ -24,6 +21,14 @@ class ASRunner: def __init__(self, state=None): self.state = state self.datasets = self.state["datasets_dir"] + try: + import autosklearn.classification + import autosklearn.regression + from autosklearn.metrics import make_scorer + except ImportError: + raise ImportError( + "autosklearn not found or system not supported, please check it first" + ) def run(self): train_path = self.datasets["train"] @@ -34,7 +39,7 @@ class ASRunner: train_data = pd.read_csv(train_path) dev_data = pd.read_csv(dev_wo_target_path) test_data = pd.read_csv(test_wo_target_path) - eval_metric = self.state["dataset_config"]["metric"].replace(" ", "_") + eval_metric = self.state["dataset_config"]["metric"] X_train = train_data.drop(columns=[target_col]) y_train = train_data[target_col] @@ -42,31 +47,18 @@ class ASRunner: automl = autosklearn.regression.AutoSklearnRegressor( time_left_for_this_task=self.time_limit, per_run_time_limit=60, - metric=create_autosklearn_scorer("rmse"), # 使用新的函数创建评分器 + metric=create_autosklearn_scorer(eval_metric), memory_limit=8192, tmp_folder="AutosklearnModels/as-{}-{}".format( self.state["task"], datetime.now().strftime("%y%m%d_%H%M") ), n_jobs=-1, ) - elif eval_metric == "f1": + elif eval_metric in ["f1", "f1 weighted"]: automl = autosklearn.classification.AutoSklearnClassifier( time_left_for_this_task=self.time_limit, per_run_time_limit=60, - metric=create_autosklearn_scorer("f1"), # 使用新的函数创建评分器 - memory_limit=8192, - tmp_folder="AutosklearnModels/as-{}-{}".format( - self.state["task"], datetime.now().strftime("%y%m%d_%H%M") - ), - n_jobs=-1, - ) - elif eval_metric == "f1_weighted": - automl = autosklearn.classification.AutoSklearnClassifier( - time_left_for_this_task=self.time_limit, - per_run_time_limit=60, - metric=create_autosklearn_scorer( - "f1 weighted" - ), # 使用新的函数创建评分器 + metric=create_autosklearn_scorer(eval_metric), memory_limit=8192, tmp_folder="AutosklearnModels/as-{}-{}".format( self.state["task"], datetime.now().strftime("%y%m%d_%H%M") From 5d2de4d0ec008b3e5737ad8b2ee531a9dc88e1c3 Mon Sep 17 00:00:00 2001 From: duiyipan Date: Sat, 14 Sep 2024 11:37:08 +0800 Subject: [PATCH 3/8] add random seed --- expo/experimenter/autosklearn.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/expo/experimenter/autosklearn.py b/expo/experimenter/autosklearn.py index 7340edafa..22aa2a132 100644 --- a/expo/experimenter/autosklearn.py +++ b/expo/experimenter/autosklearn.py @@ -17,6 +17,7 @@ def create_autosklearn_scorer(metric_name): class ASRunner: time_limit = 300 + seed = 42 def __init__(self, state=None): self.state = state @@ -49,6 +50,7 @@ class ASRunner: per_run_time_limit=60, metric=create_autosklearn_scorer(eval_metric), memory_limit=8192, + seed=self.seed, tmp_folder="AutosklearnModels/as-{}-{}".format( self.state["task"], datetime.now().strftime("%y%m%d_%H%M") ), @@ -60,6 +62,7 @@ class ASRunner: per_run_time_limit=60, metric=create_autosklearn_scorer(eval_metric), memory_limit=8192, + seed=self.seed, tmp_folder="AutosklearnModels/as-{}-{}".format( self.state["task"], datetime.now().strftime("%y%m%d_%H%M") ), From c4fe056bcaa2d06abec4c2328c01994d09fce031 Mon Sep 17 00:00:00 2001 From: duiyipan Date: Sat, 14 Sep 2024 14:58:22 +0800 Subject: [PATCH 4/8] fix import error delete seed --- expo/experimenter/autosklearn.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/expo/experimenter/autosklearn.py b/expo/experimenter/autosklearn.py index 22aa2a132..602b8385a 100644 --- a/expo/experimenter/autosklearn.py +++ b/expo/experimenter/autosklearn.py @@ -9,15 +9,8 @@ def custom_scorer(y_true, y_pred, metric_name): return evaluate_score(y_pred, y_true, metric_name) -def create_autosklearn_scorer(metric_name): - return make_scorer( - name=metric_name, score_func=partial(custom_scorer, metric_name=metric_name) - ) - - class ASRunner: time_limit = 300 - seed = 42 def __init__(self, state=None): self.state = state @@ -25,12 +18,19 @@ class ASRunner: try: import autosklearn.classification import autosklearn.regression - from autosklearn.metrics import make_scorer + import autosklearn.metrics + + self.autosklearn = autosklearn except ImportError: raise ImportError( "autosklearn not found or system not supported, please check it first" ) + def create_autosklearn_scorer(self, metric_name): + return self.autosklearn.metrics.make_scorer( + name=metric_name, score_func=partial(custom_scorer, metric_name=metric_name) + ) + def run(self): train_path = self.datasets["train"] dev_wo_target_path = self.datasets["dev_wo_target"] @@ -45,24 +45,22 @@ class ASRunner: y_train = train_data[target_col] if eval_metric == "rmse": - automl = autosklearn.regression.AutoSklearnRegressor( + automl = self.autosklearn.regression.AutoSklearnRegressor( time_left_for_this_task=self.time_limit, per_run_time_limit=60, - metric=create_autosklearn_scorer(eval_metric), + metric=self.create_autosklearn_scorer(eval_metric), memory_limit=8192, - seed=self.seed, tmp_folder="AutosklearnModels/as-{}-{}".format( self.state["task"], datetime.now().strftime("%y%m%d_%H%M") ), n_jobs=-1, ) elif eval_metric in ["f1", "f1 weighted"]: - automl = autosklearn.classification.AutoSklearnClassifier( + automl = self.autosklearn.classification.AutoSklearnClassifier( time_left_for_this_task=self.time_limit, per_run_time_limit=60, - metric=create_autosklearn_scorer(eval_metric), + metric=self.create_autosklearn_scorer(eval_metric), memory_limit=8192, - seed=self.seed, tmp_folder="AutosklearnModels/as-{}-{}".format( self.state["task"], datetime.now().strftime("%y%m%d_%H%M") ), From 9665ebdf4d1a8c2dff63c0b991184f633c7171f7 Mon Sep 17 00:00:00 2001 From: duiyipan Date: Sat, 14 Sep 2024 23:43:57 +0800 Subject: [PATCH 5/8] autosklearn delete per_run_time_limit and change time_limit --- expo/experimenter/autosklearn.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/expo/experimenter/autosklearn.py b/expo/experimenter/autosklearn.py index 602b8385a..e8923c6bd 100644 --- a/expo/experimenter/autosklearn.py +++ b/expo/experimenter/autosklearn.py @@ -10,7 +10,7 @@ def custom_scorer(y_true, y_pred, metric_name): class ASRunner: - time_limit = 300 + time_limit = 600 def __init__(self, state=None): self.state = state @@ -47,7 +47,6 @@ class ASRunner: if eval_metric == "rmse": automl = self.autosklearn.regression.AutoSklearnRegressor( time_left_for_this_task=self.time_limit, - per_run_time_limit=60, metric=self.create_autosklearn_scorer(eval_metric), memory_limit=8192, tmp_folder="AutosklearnModels/as-{}-{}".format( @@ -58,7 +57,6 @@ class ASRunner: elif eval_metric in ["f1", "f1 weighted"]: automl = self.autosklearn.classification.AutoSklearnClassifier( time_left_for_this_task=self.time_limit, - per_run_time_limit=60, metric=self.create_autosklearn_scorer(eval_metric), memory_limit=8192, tmp_folder="AutosklearnModels/as-{}-{}".format( From c007d0bd5e6a89ad53bd4ab7c8504ed329ffb1a5 Mon Sep 17 00:00:00 2001 From: duiyipan Date: Sat, 14 Sep 2024 23:49:46 +0800 Subject: [PATCH 6/8] change import way --- expo/experimenter/autosklearn.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/expo/experimenter/autosklearn.py b/expo/experimenter/autosklearn.py index e8923c6bd..c6aa70920 100644 --- a/expo/experimenter/autosklearn.py +++ b/expo/experimenter/autosklearn.py @@ -15,19 +15,11 @@ class ASRunner: def __init__(self, state=None): self.state = state self.datasets = self.state["datasets_dir"] - try: - import autosklearn.classification - import autosklearn.regression - import autosklearn.metrics - - self.autosklearn = autosklearn - except ImportError: - raise ImportError( - "autosklearn not found or system not supported, please check it first" - ) def create_autosklearn_scorer(self, metric_name): - return self.autosklearn.metrics.make_scorer( + from autosklearn.metrics import make_scorer + + return make_scorer( name=metric_name, score_func=partial(custom_scorer, metric_name=metric_name) ) @@ -45,7 +37,9 @@ class ASRunner: y_train = train_data[target_col] if eval_metric == "rmse": - automl = self.autosklearn.regression.AutoSklearnRegressor( + from autosklearn.regression import AutoSklearnRegressor + + automl = AutoSklearnRegressor( time_left_for_this_task=self.time_limit, metric=self.create_autosklearn_scorer(eval_metric), memory_limit=8192, @@ -55,7 +49,9 @@ class ASRunner: n_jobs=-1, ) elif eval_metric in ["f1", "f1 weighted"]: - automl = self.autosklearn.classification.AutoSklearnClassifier( + from autosklearn.classification import AutoSklearnClassifier + + automl = AutoSklearnClassifier( time_left_for_this_task=self.time_limit, metric=self.create_autosklearn_scorer(eval_metric), memory_limit=8192, From 574f1b0e0d2c6b9702097287761bf64a29b8a82f Mon Sep 17 00:00:00 2001 From: duiyipan Date: Sun, 15 Sep 2024 00:01:35 +0800 Subject: [PATCH 7/8] change import way --- expo/experimenter/autosklearn.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/expo/experimenter/autosklearn.py b/expo/experimenter/autosklearn.py index c6aa70920..9d0ea2df4 100644 --- a/expo/experimenter/autosklearn.py +++ b/expo/experimenter/autosklearn.py @@ -24,6 +24,8 @@ class ASRunner: ) def run(self): + import autosklearn + train_path = self.datasets["train"] dev_wo_target_path = self.datasets["dev_wo_target"] test_wo_target_path = self.datasets["test_wo_target"] @@ -37,9 +39,7 @@ class ASRunner: y_train = train_data[target_col] if eval_metric == "rmse": - from autosklearn.regression import AutoSklearnRegressor - - automl = AutoSklearnRegressor( + automl = autosklearn.regression.AutoSklearnRegressor( time_left_for_this_task=self.time_limit, metric=self.create_autosklearn_scorer(eval_metric), memory_limit=8192, @@ -49,9 +49,7 @@ class ASRunner: n_jobs=-1, ) elif eval_metric in ["f1", "f1 weighted"]: - from autosklearn.classification import AutoSklearnClassifier - - automl = AutoSklearnClassifier( + automl = autosklearn.classification.AutoSklearnClassifier( time_left_for_this_task=self.time_limit, metric=self.create_autosklearn_scorer(eval_metric), memory_limit=8192, From d95c1cb333069e06881a7ff7712ba349c584a5cb Mon Sep 17 00:00:00 2001 From: duiyipan Date: Mon, 16 Sep 2024 14:07:09 +0800 Subject: [PATCH 8/8] fix import error --- expo/experimenter/autosklearn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/expo/experimenter/autosklearn.py b/expo/experimenter/autosklearn.py index 9d0ea2df4..02a3cc465 100644 --- a/expo/experimenter/autosklearn.py +++ b/expo/experimenter/autosklearn.py @@ -24,7 +24,8 @@ class ASRunner: ) def run(self): - import autosklearn + import autosklearn.classification + import autosklearn.regression train_path = self.datasets["train"] dev_wo_target_path = self.datasets["dev_wo_target"]