Merge branch 'ray_multilmodal_0925' into 'expo'

add autogluon multimodal support

See merge request agents/exp_optimizer!23
This commit is contained in:
林义章 2024-09-27 04:26:11 +00:00
commit 98ef44acda
3 changed files with 96 additions and 10 deletions

View file

@ -215,8 +215,20 @@ #### Setup
pip install -U pip
pip install -U setuptools wheel
pip install autogluon
```
For Tabular data:
```
python run_expriment.py --exp_mode autogluon --task {task_name}
```
For Multimodal data:
```
python run_expriment.py --exp_mode autogluon --task {task_name} --is_multimodal
```
Replace {task_name} with the specific task you want to run.
提供github链接并说明使用的命令以及参数设置
### AutoSklearn
#### System requirements

View file

@ -1,37 +1,104 @@
from datetime import datetime
from expo.experimenter.custom import CustomExperimenter
import os
import pandas as pd
class AGRunner:
preset = "best_quality"
time_limit = 1000 # 1000s
def __init__(self, state=None):
self.state = state
self.datasets = self.state["datasets_dir"]
def run(self):
from autogluon.tabular import TabularDataset, TabularPredictor
train_path = self.datasets["train"]
dev_path = self.datasets["dev"]
dev_wo_target_path = self.datasets["dev_wo_target"]
test_wo_target_path = self.datasets["test_wo_target"]
target_col = self.state["dataset_config"]["target_col"]
train_data = TabularDataset(train_path)
dev_data = TabularDataset(dev_wo_target_path)
dev_data = TabularDataset(dev_path)
dev_wo_target_data = TabularDataset(dev_wo_target_path)
test_data = TabularDataset(test_wo_target_path)
eval_metric = self.state["dataset_config"]["metric"].replace(" ", "_")
# predictor = TabularPredictor(label=target_col, eval_metric=eval_metric, path="AutogluonModels/ag-{}-{}".format(self.state['task'], datetime.now().strftime("%y%m%d_%H%M"))).fit(train_data, presets=self.preset, time_limit=self.time_limit, fit_weighted_ensemble=False, num_gpus=1)
predictor = TabularPredictor(
label=target_col,
eval_metric=eval_metric,
path="AutogluonModels/ag-{}-{}".format(self.state["task"], datetime.now().strftime("%y%m%d_%H%M")),
).fit(train_data, num_gpus=1)
dev_preds = predictor.predict(dev_data)
).fit(train_data=train_data, tuning_data=dev_data, num_gpus=1)
dev_preds = predictor.predict(dev_wo_target_data)
test_preds = predictor.predict(test_data)
return {"test_preds": test_preds, "dev_preds": dev_preds}
def run_multimodal(self):
from autogluon.multimodal import MultiModalPredictor
target_col = self.state["dataset_config"]["target_col"]
train_path = self.datasets["train"]
dev_path = self.datasets["dev"]
dev_wo_target_path = self.datasets["dev_wo_target"] # Updated variable name
test_wo_target_path = self.datasets["test_wo_target"]
eval_metric = self.state["dataset_config"]["metric"].replace(" ", "_")
# Load the datasets
train_data, dev_data, dev_wo_target_data, test_data = self.load_split_dataset(
train_path, dev_path, dev_wo_target_path, test_wo_target_path
)
# Create and fit the predictor
predictor = MultiModalPredictor(
label=target_col,
eval_metric=eval_metric,
path="AutogluonModels/ag-{}-{}".format(self.state["task"], datetime.now().strftime("%y%m%d_%H%M")),
).fit(train_data=train_data, tuning_data=dev_data)
# Make predictions on dev and test datasets
dev_preds = predictor.predict(dev_wo_target_data)
test_preds = predictor.predict(test_data)
# Return predictions for dev and test datasets
return {
"dev_preds": dev_preds,
"test_preds": test_preds
}
def load_split_dataset(self, train_path, dev_path, dev_wo_target_path, test_wo_target_path):
"""
Loads training, dev, and test datasets from given file paths
Args:
train_path (str): Path to the training dataset.
dev_path (str): Path to the dev dataset with target labels.
dev_wo_target_path (str): Path to the dev dataset without target labels.
test_wo_target_path (str): Path to the test dataset without target labels.
Returns:
train_data (pd.DataFrame): Loaded training dataset with updated image paths.
dev_data (pd.DataFrame): Loaded dev dataset with updated image paths.
dev_wo_target_data (pd.DataFrame): Loaded dev dataset without target labels and updated image paths.
test_data (pd.DataFrame): Loaded test dataset with updated image paths.
"""
# Define the root path to append
root_folder = os.path.join("F:/Download/Dataset/", self.state["task"])
# Load the datasets
train_data = pd.read_csv(train_path)
dev_data = pd.read_csv(dev_path) # Load dev dataset with target labels
dev_wo_target_data = pd.read_csv(dev_wo_target_path) # Load dev dataset without target labels
test_data = pd.read_csv(test_wo_target_path)
# Get the name of the first column (assuming it's the image path column)
image_column = train_data.columns[0]
# Append root folder path to the image column in each dataset
train_data[image_column] = train_data[image_column].apply(lambda x: os.path.join(root_folder, x))
dev_data[image_column] = dev_data[image_column].apply(lambda x: os.path.join(root_folder, x))
dev_wo_target_data[image_column] = dev_wo_target_data[image_column].apply(
lambda x: os.path.join(root_folder, x))
test_data[image_column] = test_data[image_column].apply(lambda x: os.path.join(root_folder, x))
return train_data, dev_data, dev_wo_target_data, test_data
class GluonExperimenter(CustomExperimenter):
result_path: str = "results/autogluon"
@ -39,9 +106,15 @@ class GluonExperimenter(CustomExperimenter):
def __init__(self, args, **kwargs):
super().__init__(args, **kwargs)
self.framework = AGRunner(self.state)
self.is_multimodal = args.is_multimodal if hasattr(args, 'is_multimodal') else False
async def run_experiment(self):
result = self.framework.run()
if not self.is_multimodal:
result = self.framework.run()
else:
result = self.framework.run_multimodal()
assert result is not None
user_requirement = self.state["requirement"]
dev_preds = result["dev_preds"]
test_preds = result["test_preds"]

View file

@ -34,6 +34,7 @@ def get_mcts_args(parser):
def get_aug_exp_args(parser):
parser.add_argument("--aug_mode", type=str, default="single", choices=["single", "set"])
parser.add_argument("--is_multimodal", action="store_true", help="Specify if the model is multi-modal")
def get_di_args(parser):