Merge branch 'autosklearn' into 'expo'

add autosklearn setup run

See merge request agents/exp_optimizer!10
This commit is contained in:
林义章 2024-09-20 06:27:19 +00:00
commit 2d62317240
3 changed files with 125 additions and 1 deletions

View file

@ -218,6 +218,31 @@ #### Setup
```
提供github链接并说明使用的命令以及参数设置
### AutoSklearn
#### System requirements
auto-sklearn has the following system requirements:
- Linux operating system (for example Ubuntu)
- Python (>=3.7)
- C++ compiler (with C++11 supports)
In case you try to install Auto-sklearn on a system where no wheel files for the pyrfr package are provided (see here for available wheels) you also need:
- SWIG [(get SWIG here).](https://www.swig.org/survey.html)
For an explanation of missing Microsoft Windows and macOS support please check the Section [Windows/macOS compatibility](https://automl.github.io/auto-sklearn/master/installation.html#windows-macos-compatibility).
#### Setup
```
pip install auto-sklearn
```
#### Run
```
python run_experiment.py --exp_mode autosklearn --task titanic
```
### Base DI
For setup, check 5.

View file

@ -0,0 +1,96 @@
from datetime import datetime
import pandas as pd
from expo.experimenter.custom import CustomExperimenter
from expo.evaluation.evaluation import evaluate_score
from functools import partial
def custom_scorer(y_true, y_pred, metric_name):
return evaluate_score(y_pred, y_true, metric_name)
class ASRunner:
time_limit = 600
def __init__(self, state=None):
self.state = state
self.datasets = self.state["datasets_dir"]
def create_autosklearn_scorer(self, metric_name):
from autosklearn.metrics import make_scorer
return make_scorer(
name=metric_name, score_func=partial(custom_scorer, metric_name=metric_name)
)
def run(self):
import autosklearn.classification
import autosklearn.regression
train_path = self.datasets["train"]
dev_wo_target_path = self.datasets["dev_wo_target"]
test_wo_target_path = self.datasets["test_wo_target"]
target_col = self.state["dataset_config"]["target_col"]
train_data = pd.read_csv(train_path)
dev_data = pd.read_csv(dev_wo_target_path)
test_data = pd.read_csv(test_wo_target_path)
eval_metric = self.state["dataset_config"]["metric"]
X_train = train_data.drop(columns=[target_col])
y_train = train_data[target_col]
if eval_metric == "rmse":
automl = autosklearn.regression.AutoSklearnRegressor(
time_left_for_this_task=self.time_limit,
metric=self.create_autosklearn_scorer(eval_metric),
memory_limit=8192,
tmp_folder="AutosklearnModels/as-{}-{}".format(
self.state["task"], datetime.now().strftime("%y%m%d_%H%M")
),
n_jobs=-1,
)
elif eval_metric in ["f1", "f1 weighted"]:
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=self.time_limit,
metric=self.create_autosklearn_scorer(eval_metric),
memory_limit=8192,
tmp_folder="AutosklearnModels/as-{}-{}".format(
self.state["task"], datetime.now().strftime("%y%m%d_%H%M")
),
n_jobs=-1,
)
else:
raise ValueError(f"Unsupported metric: {eval_metric}")
automl.fit(X_train, y_train)
dev_preds = automl.predict(dev_data)
test_preds = automl.predict(test_data)
return {"test_preds": test_preds, "dev_preds": dev_preds}
class AutoSklearnExperimenter(CustomExperimenter):
result_path: str = "results/autosklearn"
def __init__(self, args, **kwargs):
super().__init__(args, **kwargs)
self.framework = ASRunner(self.state)
async def run_experiment(self):
result = self.framework.run()
user_requirement = self.state["requirement"]
dev_preds = result["dev_preds"]
test_preds = result["test_preds"]
score_dict = {
"dev_score": self.evaluate_predictions(dev_preds, "dev"),
"test_score": self.evaluate_predictions(test_preds, "test"),
}
results = [
0,
{
"score_dict": score_dict,
"user_requirement": user_requirement,
"args": vars(self.args),
},
]
self.save_result(results)

View file

@ -6,6 +6,7 @@ from expo.experimenter.autogluon import GluonExperimenter
from expo.experimenter.custom import CustomExperimenter
from expo.experimenter.experimenter import Experimenter
from expo.experimenter.mcts import MCTSExperimenter
from expo.experimenter.autosklearn import AutoSklearnExperimenter
def get_args():
@ -15,7 +16,7 @@ def get_args():
"--exp_mode",
type=str,
default="mcts",
choices=["mcts", "aug", "base", "custom", "greedy", "autogluon", "random"],
choices=["mcts", "aug", "base", "custom", "greedy", "autogluon", "random", "autosklearn"],
)
get_di_args(parser)
get_mcts_args(parser)
@ -61,6 +62,8 @@ async def main(args):
experimenter = GluonExperimenter(args)
elif args.exp_mode == "custom":
experimenter = CustomExperimenter(args)
elif args.exp_mode == "autosklearn":
experimenter = AutoSklearnExperimenter(args)
else:
raise ValueError(f"Invalid exp_mode: {args.exp_mode}")
await experimenter.run_experiment()