diff --git a/.gitignore b/.gitignore index 3fc66cecb..6e1fc7f74 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ share/python-wheels/ MANIFEST metagpt/tools/schemas/ examples/data/search_kb/*.json +expo/AutogluonModels # PyInstaller # Usually these files are written by a python scripts from a template diff --git a/expo/experimenter/autogluon.py b/expo/experimenter/autogluon.py index 4f5d151ef..478ecfc01 100644 --- a/expo/experimenter/autogluon.py +++ b/expo/experimenter/autogluon.py @@ -1,27 +1,29 @@ +from datetime import datetime from autogluon.tabular import TabularDataset, TabularPredictor - from expo.experimenter.custom import CustomExperimenter class AGRunner: preset = "best_quality" - time_limit = 500 + time_limit = 1000 # 1000s - def __init__(self, datasets): - self.datasets = datasets + def __init__(self, state=None): + self.state = state + self.datasets = self.state["datasets_dir"] def run(self): train_path = self.datasets["train"] - test_wo_target_path = self.datasets["test_wo_target"] dev_wo_target_path = self.datasets["dev_wo_target"] + test_wo_target_path = self.datasets["test_wo_target"] target_col = self.state["dataset_config"]["target_col"] train_data = TabularDataset(train_path) - test_data = TabularDataset(test_wo_target_path) dev_data = TabularDataset(dev_wo_target_path) - - predictor = TabularPredictor(label=target_col).fit(train_data, presets=self.preset, time_limit=self.time_limit) - test_preds = predictor.predict(test_data) + test_data = TabularDataset(test_wo_target_path) + eval_metric = self.state["dataset_config"]["metric"].replace(" ", "_") + # predictor = TabularPredictor(label=target_col, eval_metric=eval_metric, path="AutogluonModels/ag-{}-{}".format(self.state['task'], datetime.now().strftime("%y%m%d_%H%M"))).fit(train_data, presets=self.preset, time_limit=self.time_limit, fit_weighted_ensemble=False, num_gpus=1) + predictor = TabularPredictor(label=target_col, eval_metric=eval_metric, path="AutogluonModels/ag-{}-{}".format(self.state['task'], datetime.now().strftime("%y%m%d_%H%M"))).fit(train_data, num_gpus=1) dev_preds = predictor.predict(dev_data) + test_preds = predictor.predict(test_data) return {"test_preds": test_preds, "dev_preds": dev_preds} @@ -30,4 +32,16 @@ class GluonExperimenter(CustomExperimenter): def __init__(self, args, **kwargs): super().__init__(args, **kwargs) - self.framework = AGRunner(self.datasets) + self.framework = AGRunner(self.state) + + async def run_experiment(self): + result = self.framework.run() + user_requirement = self.state["requirement"] + dev_preds = result["dev_preds"] + test_preds = result["test_preds"] + score_dict = { + "dev_score": self.evaluate_predictions(dev_preds, "dev"), + "test_score": self.evaluate_predictions(test_preds, "test"), + } + results = [0, {"score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)}] + self.save_result(results) \ No newline at end of file diff --git a/expo/experimenter/custom.py b/expo/experimenter/custom.py index 4a5486af0..df090fb58 100644 --- a/expo/experimenter/custom.py +++ b/expo/experimenter/custom.py @@ -12,7 +12,7 @@ class CustomExperimenter(Experimenter): def __init__(self, args, **kwargs): super().__init__(args, **kwargs) - self.framework = kwargs["framework"] # todo + self.framework = kwargs.get("framework", None) # todo self.task = kwargs.get("task", self.args.task) self.low_is_better = kwargs.get("low_is_better", self.args.low_is_better) self.name = kwargs.get("name", "") diff --git a/expo/insights/solution_designer.py b/expo/insights/solution_designer.py index fc05afeea..b1fcf4188 100644 --- a/expo/insights/solution_designer.py +++ b/expo/insights/solution_designer.py @@ -19,7 +19,8 @@ DATASET_INSIGHT_PROMPT = """ Propose insights to help improve the performance of the model on this dataset. The insights should be proposed based on the dataset description with different task types. Each task type should have at least 5 insights. -Make sure each method is independent and can be implemented separately. +Make sure each method is diverse enough and can be implemented separately. +Be specific about models' choices, ensemble and tuning techniques, and preprocessing & feature engineering techniques. # Format ```json diff --git a/expo/run_experiment.py b/expo/run_experiment.py index b68607d79..2123fade3 100644 --- a/expo/run_experiment.py +++ b/expo/run_experiment.py @@ -2,6 +2,7 @@ import argparse import asyncio from expo.experimenter.aug import AugExperimenter +from expo.experimenter.autogluon import GluonExperimenter from expo.experimenter.custom import CustomExperimenter from expo.experimenter.experimenter import Experimenter from expo.experimenter.mcts import MCTSExperimenter @@ -11,7 +12,10 @@ def get_args(): parser = argparse.ArgumentParser() parser.add_argument("--name", type=str, default="") parser.add_argument( - "--exp_mode", type=str, default="mcts", choices=["mcts", "aug", "base", "custom", "greedy", "random"] + "--exp_mode", + type=str, + default="mcts", + choices=["mcts", "aug", "base", "custom", "greedy", "autogluon", "random"], ) get_di_args(parser) get_mcts_args(parser) @@ -51,6 +55,8 @@ async def main(args): experimenter = AugExperimenter(args) elif args.exp_mode == "base": experimenter = Experimenter(args) + elif args.exp_mode == "autogluon": + experimenter = GluonExperimenter(args) elif args.exp_mode == "custom": experimenter = CustomExperimenter(args) else: