From e2cee3905f92f661a7bacd2be102a0bf9428ffff Mon Sep 17 00:00:00 2001 From: Rayhao Date: Wed, 25 Sep 2024 22:58:04 -0700 Subject: [PATCH 1/4] add autogluon multimodal support --- expo/README.md | 2 + expo/experimenter/autogluon.py | 74 +++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/expo/README.md b/expo/README.md index 011322897..3f9e630e5 100644 --- a/expo/README.md +++ b/expo/README.md @@ -215,6 +215,8 @@ #### Setup pip install -U pip pip install -U setuptools wheel pip install autogluon + +python run_expriment.py --exp_mode autogluon --task fashion_mnist ``` 提供github链接,并说明使用的命令以及参数设置 diff --git a/expo/experimenter/autogluon.py b/expo/experimenter/autogluon.py index 93dfdb4bc..4bcba432c 100644 --- a/expo/experimenter/autogluon.py +++ b/expo/experimenter/autogluon.py @@ -32,6 +32,77 @@ class AGRunner: test_preds = predictor.predict(test_data) return {"test_preds": test_preds, "dev_preds": dev_preds} + def run_images(self): + from autogluon.multimodal import MultiModalPredictor + target_col = self.state["dataset_config"]["target_col"] + train_path = self.datasets["train"] + dev_path = self.datasets["dev"] + dev_wo_target_path = self.datasets["dev_wo_target"] # Updated variable name + test_wo_target_path = self.datasets["test_wo_target"] + eval_metric = self.state["dataset_config"]["metric"].replace(" ", "_") + + # Load the datasets + train_data, dev_data, dev_wo_target_data, test_data = self.load_split_dataset( + train_path, dev_path, dev_wo_target_path, test_wo_target_path + ) + + # Create and fit the predictor + predictor = MultiModalPredictor( + label=target_col, + eval_metric=eval_metric, + path="AutogluonModels/ag-{}-{}".format(self.state["task"], datetime.now().strftime("%y%m%d_%H%M")), + ).fit(train_data=train_data, tuning_data=dev_data, time_limit=self.time_limit) + + # Make predictions on dev and test datasets + dev_preds = predictor.predict(dev_wo_target_data) + test_preds = predictor.predict(test_data) + + # Return predictions for dev and test datasets + return { + "dev_preds": dev_preds, + "test_preds": test_preds + } + + def load_split_dataset(self, train_path, dev_path, dev_wo_target_path, test_wo_target_path): + import os + import pandas as pd + """ + Loads training, dev, and test datasets from given file paths + + Args: + train_path (str): Path to the training dataset. + dev_path (str): Path to the dev dataset with target labels. + dev_wo_target_path (str): Path to the dev dataset without target labels. + test_wo_target_path (str): Path to the test dataset without target labels. + + Returns: + train_data (pd.DataFrame): Loaded training dataset with updated image paths. + dev_data (pd.DataFrame): Loaded dev dataset with updated image paths. + dev_wo_target_data (pd.DataFrame): Loaded dev dataset without target labels and updated image paths. + test_data (pd.DataFrame): Loaded test dataset with updated image paths. + """ + + # Define the root path to append + root_folder = os.path.join("F:/Download/Dataset/", self.state["task"]) + + # Load the datasets + train_data = pd.read_csv(train_path) + dev_data = pd.read_csv(dev_path) # Load dev dataset with target labels + dev_wo_target_data = pd.read_csv(dev_wo_target_path) # Load dev dataset without target labels + test_data = pd.read_csv(test_wo_target_path) + + + # Get the name of the first column (assuming it's the image path column) + + image_column = train_data.columns[0] + # Append root folder path to the image column in each dataset + train_data[image_column] = train_data[image_column].apply(lambda x: os.path.join(root_folder, x)) + dev_data[image_column] = dev_data[image_column].apply(lambda x: os.path.join(root_folder, x)) + dev_wo_target_data[image_column] = dev_wo_target_data[image_column].apply( + lambda x: os.path.join(root_folder, x)) + test_data[image_column] = test_data[image_column].apply(lambda x: os.path.join(root_folder, x)) + return train_data, dev_data, dev_wo_target_data, test_data + class GluonExperimenter(CustomExperimenter): result_path: str = "results/autogluon" @@ -41,7 +112,8 @@ class GluonExperimenter(CustomExperimenter): self.framework = AGRunner(self.state) async def run_experiment(self): - result = self.framework.run() + # result = self.framework.run() + result = self.framework.run_images() user_requirement = self.state["requirement"] dev_preds = result["dev_preds"] test_preds = result["test_preds"] From 1a1855f21a7379af2099e5a3cc01cace085ea8d8 Mon Sep 17 00:00:00 2001 From: Rayhao Date: Wed, 25 Sep 2024 23:28:16 -0700 Subject: [PATCH 2/4] add input param for autogluon --- expo/README.md | 12 +++++++++++- expo/experimenter/autogluon.py | 25 +++++++++++++------------ expo/run_experiment.py | 1 + 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/expo/README.md b/expo/README.md index 3f9e630e5..e5da96708 100644 --- a/expo/README.md +++ b/expo/README.md @@ -216,9 +216,19 @@ #### Setup pip install -U setuptools wheel pip install autogluon -python run_expriment.py --exp_mode autogluon --task fashion_mnist ``` +For Tabular data: +``` +python run_expriment.py --exp_mode autogluon --task {task_name} +``` +For Multimodal data: +``` +python run_expriment.py --exp_mode autogluon --task {task_name} --is_multimodal +``` +Replace {task_name} with the specific task you want to run. + + 提供github链接,并说明使用的命令以及参数设置 ### AutoSklearn #### System requirements diff --git a/expo/experimenter/autogluon.py b/expo/experimenter/autogluon.py index 4bcba432c..6cb3797e3 100644 --- a/expo/experimenter/autogluon.py +++ b/expo/experimenter/autogluon.py @@ -1,12 +1,10 @@ from datetime import datetime - from expo.experimenter.custom import CustomExperimenter +import os +import pandas as pd class AGRunner: - preset = "best_quality" - time_limit = 1000 # 1000s - def __init__(self, state=None): self.state = state self.datasets = self.state["datasets_dir"] @@ -32,7 +30,7 @@ class AGRunner: test_preds = predictor.predict(test_data) return {"test_preds": test_preds, "dev_preds": dev_preds} - def run_images(self): + def run_multimodal(self): from autogluon.multimodal import MultiModalPredictor target_col = self.state["dataset_config"]["target_col"] train_path = self.datasets["train"] @@ -51,7 +49,7 @@ class AGRunner: label=target_col, eval_metric=eval_metric, path="AutogluonModels/ag-{}-{}".format(self.state["task"], datetime.now().strftime("%y%m%d_%H%M")), - ).fit(train_data=train_data, tuning_data=dev_data, time_limit=self.time_limit) + ).fit(train_data=train_data, tuning_data=dev_data) # Make predictions on dev and test datasets dev_preds = predictor.predict(dev_wo_target_data) @@ -64,8 +62,6 @@ class AGRunner: } def load_split_dataset(self, train_path, dev_path, dev_wo_target_path, test_wo_target_path): - import os - import pandas as pd """ Loads training, dev, and test datasets from given file paths @@ -91,16 +87,16 @@ class AGRunner: dev_wo_target_data = pd.read_csv(dev_wo_target_path) # Load dev dataset without target labels test_data = pd.read_csv(test_wo_target_path) - # Get the name of the first column (assuming it's the image path column) - image_column = train_data.columns[0] + # Append root folder path to the image column in each dataset train_data[image_column] = train_data[image_column].apply(lambda x: os.path.join(root_folder, x)) dev_data[image_column] = dev_data[image_column].apply(lambda x: os.path.join(root_folder, x)) dev_wo_target_data[image_column] = dev_wo_target_data[image_column].apply( lambda x: os.path.join(root_folder, x)) test_data[image_column] = test_data[image_column].apply(lambda x: os.path.join(root_folder, x)) + return train_data, dev_data, dev_wo_target_data, test_data @@ -110,10 +106,15 @@ class GluonExperimenter(CustomExperimenter): def __init__(self, args, **kwargs): super().__init__(args, **kwargs) self.framework = AGRunner(self.state) + self.is_multimodal = args.is_multimodal if hasattr(args, 'is_multimodal') else False async def run_experiment(self): - # result = self.framework.run() - result = self.framework.run_images() + if not self.is_multimodal: + result = self.framework.run() + else: + result = self.framework.run_multimodal() + + assert result is not None user_requirement = self.state["requirement"] dev_preds = result["dev_preds"] test_preds = result["test_preds"] diff --git a/expo/run_experiment.py b/expo/run_experiment.py index be028c47e..038b57ad2 100644 --- a/expo/run_experiment.py +++ b/expo/run_experiment.py @@ -34,6 +34,7 @@ def get_mcts_args(parser): def get_aug_exp_args(parser): parser.add_argument("--aug_mode", type=str, default="single", choices=["single", "set"]) + parser.add_argument("--is_multimodal", action="store_true", help="Specify if the model is multi-modal") def get_di_args(parser): From 3c397387f9991baa04a02b8f1e41178375f1db6f Mon Sep 17 00:00:00 2001 From: Rayhao Date: Wed, 25 Sep 2024 23:32:52 -0700 Subject: [PATCH 3/4] add tuning data for tabular mode --- expo/experimenter/autogluon.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/expo/experimenter/autogluon.py b/expo/experimenter/autogluon.py index 6cb3797e3..e5e3045f1 100644 --- a/expo/experimenter/autogluon.py +++ b/expo/experimenter/autogluon.py @@ -1,5 +1,6 @@ from datetime import datetime from expo.experimenter.custom import CustomExperimenter +from autogluon.tabular import TabularDataset, TabularPredictor import os import pandas as pd @@ -10,23 +11,22 @@ class AGRunner: self.datasets = self.state["datasets_dir"] def run(self): - from autogluon.tabular import TabularDataset, TabularPredictor - train_path = self.datasets["train"] + dev_path = self.datasets["dev"] dev_wo_target_path = self.datasets["dev_wo_target"] test_wo_target_path = self.datasets["test_wo_target"] target_col = self.state["dataset_config"]["target_col"] train_data = TabularDataset(train_path) - dev_data = TabularDataset(dev_wo_target_path) + dev_data = TabularDataset(dev_path) + dev_wo_target_data = TabularDataset(dev_wo_target_path) test_data = TabularDataset(test_wo_target_path) eval_metric = self.state["dataset_config"]["metric"].replace(" ", "_") - # predictor = TabularPredictor(label=target_col, eval_metric=eval_metric, path="AutogluonModels/ag-{}-{}".format(self.state['task'], datetime.now().strftime("%y%m%d_%H%M"))).fit(train_data, presets=self.preset, time_limit=self.time_limit, fit_weighted_ensemble=False, num_gpus=1) predictor = TabularPredictor( label=target_col, eval_metric=eval_metric, path="AutogluonModels/ag-{}-{}".format(self.state["task"], datetime.now().strftime("%y%m%d_%H%M")), - ).fit(train_data, num_gpus=1) - dev_preds = predictor.predict(dev_data) + ).fit(train_data=train_data, tuning_data=dev_data, num_gpus=1) + dev_preds = predictor.predict(dev_wo_target_data) test_preds = predictor.predict(test_data) return {"test_preds": test_preds, "dev_preds": dev_preds} From e2c82249b37e78bfe8c0cb2f34050db8c06021db Mon Sep 17 00:00:00 2001 From: Rayhao Date: Thu, 26 Sep 2024 21:13:48 -0700 Subject: [PATCH 4/4] import issue --- expo/experimenter/autogluon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expo/experimenter/autogluon.py b/expo/experimenter/autogluon.py index e5e3045f1..dabf0c138 100644 --- a/expo/experimenter/autogluon.py +++ b/expo/experimenter/autogluon.py @@ -1,6 +1,5 @@ from datetime import datetime from expo.experimenter.custom import CustomExperimenter -from autogluon.tabular import TabularDataset, TabularPredictor import os import pandas as pd @@ -11,6 +10,7 @@ class AGRunner: self.datasets = self.state["datasets_dir"] def run(self): + from autogluon.tabular import TabularDataset, TabularPredictor train_path = self.datasets["train"] dev_path = self.datasets["dev"] dev_wo_target_path = self.datasets["dev_wo_target"]