diff --git a/expo/MCTS.py b/expo/MCTS.py index a8410748e..749850dd6 100644 --- a/expo/MCTS.py +++ b/expo/MCTS.py @@ -8,7 +8,7 @@ import shutil import numpy as np import pandas as pd -from expo.data.custom_task import get_mle_bench_requirements +from expo.data.custom_task import get_mle_bench_requirements, get_mle_task_id from expo.data.dataset import generate_task_requirement, get_split_dataset_path from expo.evaluation.evaluation import evaluate_score from expo.insights.instruction_generator import InstructionGenerator @@ -35,6 +35,8 @@ def create_initial_state( datasets_dir = args.custom_dataset_dir requirement = get_mle_bench_requirements(args.custom_dataset_dir, data_config) exp_pool_path = None + # external_eval = False # make sure external eval is false if custom dataset is used + task = get_mle_task_id(args.custom_dataset_dir) else: dataset_config = data_config["datasets"][task] datasets_dir = get_split_dataset_path(task, data_config) @@ -120,7 +122,7 @@ class Node: return f"{self.parent.id}-{num_sibling}" def is_terminal(self): - return int(self.state["start_task_id"]) == self.max_depth + 1 + return int(self.state["start_task_id"]) == self.max_depth + 1 # TODO: Check if this is correct or +1 def is_fully_expanded(self): return len(self.children) > 0 diff --git a/expo/data/custom_task.py b/expo/data/custom_task.py index 14eb6aac2..f66b4aa58 100644 --- a/expo/data/custom_task.py +++ b/expo/data/custom_task.py @@ -22,19 +22,26 @@ COMPETITION INSTRUCTIONS ## More Instructions - output_dir: {output_dir} - Besides `submission.csv`, you should also save your output in the output directory. -- Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. - -Do not make visualizations. +- You should split the training data into train and dev set. +- Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. They should be in the same format as the `submission.csv`. +- Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. +**Do not make any plots or visualizations.** """ +def get_mle_task_id(dataset_dir): + return dataset_dir.split("/")[-3] + + def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False): work_dir = data_config["work_dir"] - output_dir = f"{work_dir}/output" + task = get_mle_task_id(dataset_dir) + output_dir = f"{work_dir}/{task}" + final_output_dir = f"{work_dir}/submission" os.makedirs(output_dir, exist_ok=True) if obfuscated: - instructions = INSTRUCTIONS_OBFUSCATED.format(dataset_dir=dataset_dir, output_dir=output_dir) + instructions = INSTRUCTIONS_OBFUSCATED.format(dataset_dir=dataset_dir, output_dir=final_output_dir) task_file = "description_obfuscated.md" else: instructions = INSTRUCTIONS.format(dataset_dir=dataset_dir, output_dir=output_dir) diff --git a/expo/evaluation/evaluation.py b/expo/evaluation/evaluation.py index 1ba7fa60f..2c19b81fc 100644 --- a/expo/evaluation/evaluation.py +++ b/expo/evaluation/evaluation.py @@ -1,3 +1,5 @@ +from pathlib import Path + import numpy as np from sklearn.metrics import accuracy_score, f1_score, mean_squared_error, roc_auc_score @@ -33,4 +35,14 @@ def node_evaluate_score_sela(node): def node_evaluate_score_mlebench(node): # TODO - return 0 + from mlebench.grade import grade_csv + from mlebench.registry import registry + + competition_id = node.state["task"] + pred_path = node.get_predictions_path("test") + new_registry = registry.set_data_dir(Path(registry.get_data_dir())) + competition = new_registry.get_competition(competition_id) + submission = Path(pred_path) + report = grade_csv(submission, competition).to_dict() + report["submission_path"] = str(submission) + return report diff --git a/expo/run_experiment.py b/expo/run_experiment.py index 53fcdd18c..bf90cb07a 100644 --- a/expo/run_experiment.py +++ b/expo/run_experiment.py @@ -60,6 +60,11 @@ def get_di_args(parser): async def main(args): + if args.custom_dataset_dir: + args.external_eval = False + args.eval_func = "mlebench" + args.from_scratch = True + if args.exp_mode == "mcts": experimenter = MCTSExperimenter(args) elif args.exp_mode == "greedy":