Merge remote-tracking branch 'origin/expo' into improve_mcts

This commit is contained in:
Yizhou Chi 2024-09-11 15:33:55 +08:00
commit 4dfa01295e
5 changed files with 35 additions and 13 deletions

1
.gitignore vendored
View file

@ -29,6 +29,7 @@ share/python-wheels/
MANIFEST
metagpt/tools/schemas/
examples/data/search_kb/*.json
expo/AutogluonModels
# PyInstaller
# Usually these files are written by a python scripts from a template

View file

@ -1,27 +1,29 @@
from datetime import datetime
from autogluon.tabular import TabularDataset, TabularPredictor
from expo.experimenter.custom import CustomExperimenter
class AGRunner:
preset = "best_quality"
time_limit = 500
time_limit = 1000 # 1000s
def __init__(self, datasets):
self.datasets = datasets
def __init__(self, state=None):
self.state = state
self.datasets = self.state["datasets_dir"]
def run(self):
train_path = self.datasets["train"]
test_wo_target_path = self.datasets["test_wo_target"]
dev_wo_target_path = self.datasets["dev_wo_target"]
test_wo_target_path = self.datasets["test_wo_target"]
target_col = self.state["dataset_config"]["target_col"]
train_data = TabularDataset(train_path)
test_data = TabularDataset(test_wo_target_path)
dev_data = TabularDataset(dev_wo_target_path)
predictor = TabularPredictor(label=target_col).fit(train_data, presets=self.preset, time_limit=self.time_limit)
test_preds = predictor.predict(test_data)
test_data = TabularDataset(test_wo_target_path)
eval_metric = self.state["dataset_config"]["metric"].replace(" ", "_")
# predictor = TabularPredictor(label=target_col, eval_metric=eval_metric, path="AutogluonModels/ag-{}-{}".format(self.state['task'], datetime.now().strftime("%y%m%d_%H%M"))).fit(train_data, presets=self.preset, time_limit=self.time_limit, fit_weighted_ensemble=False, num_gpus=1)
predictor = TabularPredictor(label=target_col, eval_metric=eval_metric, path="AutogluonModels/ag-{}-{}".format(self.state['task'], datetime.now().strftime("%y%m%d_%H%M"))).fit(train_data, num_gpus=1)
dev_preds = predictor.predict(dev_data)
test_preds = predictor.predict(test_data)
return {"test_preds": test_preds, "dev_preds": dev_preds}
@ -30,4 +32,16 @@ class GluonExperimenter(CustomExperimenter):
def __init__(self, args, **kwargs):
super().__init__(args, **kwargs)
self.framework = AGRunner(self.datasets)
self.framework = AGRunner(self.state)
async def run_experiment(self):
result = self.framework.run()
user_requirement = self.state["requirement"]
dev_preds = result["dev_preds"]
test_preds = result["test_preds"]
score_dict = {
"dev_score": self.evaluate_predictions(dev_preds, "dev"),
"test_score": self.evaluate_predictions(test_preds, "test"),
}
results = [0, {"score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)}]
self.save_result(results)

View file

@ -12,7 +12,7 @@ class CustomExperimenter(Experimenter):
def __init__(self, args, **kwargs):
super().__init__(args, **kwargs)
self.framework = kwargs["framework"] # todo
self.framework = kwargs.get("framework", None) # todo
self.task = kwargs.get("task", self.args.task)
self.low_is_better = kwargs.get("low_is_better", self.args.low_is_better)
self.name = kwargs.get("name", "")

View file

@ -19,7 +19,8 @@ DATASET_INSIGHT_PROMPT = """
Propose insights to help improve the performance of the model on this dataset.
The insights should be proposed based on the dataset description with different task types.
Each task type should have at least 5 insights.
Make sure each method is independent and can be implemented separately.
Make sure each method is diverse enough and can be implemented separately.
Be specific about models' choices, ensemble and tuning techniques, and preprocessing & feature engineering techniques.
# Format
```json

View file

@ -2,6 +2,7 @@ import argparse
import asyncio
from expo.experimenter.aug import AugExperimenter
from expo.experimenter.autogluon import GluonExperimenter
from expo.experimenter.custom import CustomExperimenter
from expo.experimenter.experimenter import Experimenter
from expo.experimenter.mcts import MCTSExperimenter
@ -11,7 +12,10 @@ def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--name", type=str, default="")
parser.add_argument(
"--exp_mode", type=str, default="mcts", choices=["mcts", "aug", "base", "custom", "greedy", "random"]
"--exp_mode",
type=str,
default="mcts",
choices=["mcts", "aug", "base", "custom", "greedy", "autogluon", "random"],
)
get_di_args(parser)
get_mcts_args(parser)
@ -51,6 +55,8 @@ async def main(args):
experimenter = AugExperimenter(args)
elif args.exp_mode == "base":
experimenter = Experimenter(args)
elif args.exp_mode == "autogluon":
experimenter = GluonExperimenter(args)
elif args.exp_mode == "custom":
experimenter = CustomExperimenter(args)
else: