支持跑通mle bench

2026-06-11 15:15:18 +02:00 · 2024-10-14 16:12:26 +08:00 · 2024-10-14 16:12:26 +08:00 · 1d4a845120
commit 1d4a845120
parent a91003a7fe
4 changed files with 34 additions and 8 deletions
--- a/expo/MCTS.py
+++ b/expo/MCTS.py
@ -8,7 +8,7 @@ import shutil
 import numpy as np
 import pandas as pd

-from expo.data.custom_task import get_mle_bench_requirements
+from expo.data.custom_task import get_mle_bench_requirements, get_mle_task_id
 from expo.data.dataset import generate_task_requirement, get_split_dataset_path
 from expo.evaluation.evaluation import evaluate_score
 from expo.insights.instruction_generator import InstructionGenerator
@ -35,6 +35,8 @@ def create_initial_state(
        datasets_dir = args.custom_dataset_dir
        requirement = get_mle_bench_requirements(args.custom_dataset_dir, data_config)
        exp_pool_path = None
+        # external_eval = False # make sure external eval is false if custom dataset is used
+        task = get_mle_task_id(args.custom_dataset_dir)
    else:
        dataset_config = data_config["datasets"][task]
        datasets_dir = get_split_dataset_path(task, data_config)
@ -120,7 +122,7 @@ class Node:
            return f"{self.parent.id}-{num_sibling}"

    def is_terminal(self):
-        return int(self.state["start_task_id"]) == self.max_depth + 1
+        return int(self.state["start_task_id"]) == self.max_depth + 1  # TODO: Check if this is correct or +1

    def is_fully_expanded(self):
        return len(self.children) > 0
--- a/expo/data/custom_task.py
+++ b/expo/data/custom_task.py
@ -22,19 +22,26 @@ COMPETITION INSTRUCTIONS
 ## More Instructions
 - output_dir: {output_dir}
 - Besides `submission.csv`, you should also save your output in the output directory.
- Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory.
-
-Do not make visualizations.
+- You should split the training data into train and dev set.
+- Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. They should be in the same format as the `submission.csv`.
+- Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. 
+**Do not make any plots or visualizations.**
 """


+def get_mle_task_id(dataset_dir):
+    return dataset_dir.split("/")[-3]
+
+
 def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False):
    work_dir = data_config["work_dir"]
-    output_dir = f"{work_dir}/output"
+    task = get_mle_task_id(dataset_dir)
+    output_dir = f"{work_dir}/{task}"
+    final_output_dir = f"{work_dir}/submission"
    os.makedirs(output_dir, exist_ok=True)

    if obfuscated:
-        instructions = INSTRUCTIONS_OBFUSCATED.format(dataset_dir=dataset_dir, output_dir=output_dir)
+        instructions = INSTRUCTIONS_OBFUSCATED.format(dataset_dir=dataset_dir, output_dir=final_output_dir)
        task_file = "description_obfuscated.md"
    else:
        instructions = INSTRUCTIONS.format(dataset_dir=dataset_dir, output_dir=output_dir)
--- a/expo/evaluation/evaluation.py
+++ b/expo/evaluation/evaluation.py
@ -1,3 +1,5 @@
+from pathlib import Path
+
 import numpy as np
 from sklearn.metrics import accuracy_score, f1_score, mean_squared_error, roc_auc_score

@ -33,4 +35,14 @@ def node_evaluate_score_sela(node):

 def node_evaluate_score_mlebench(node):
    # TODO
-    return 0
+    from mlebench.grade import grade_csv
+    from mlebench.registry import registry
+
+    competition_id = node.state["task"]
+    pred_path = node.get_predictions_path("test")
+    new_registry = registry.set_data_dir(Path(registry.get_data_dir()))
+    competition = new_registry.get_competition(competition_id)
+    submission = Path(pred_path)
+    report = grade_csv(submission, competition).to_dict()
+    report["submission_path"] = str(submission)
+    return report
--- a/expo/run_experiment.py
+++ b/expo/run_experiment.py
@ -60,6 +60,11 @@ def get_di_args(parser):


 async def main(args):
+    if args.custom_dataset_dir:
+        args.external_eval = False
+        args.eval_func = "mlebench"
+        args.from_scratch = True
+
    if args.exp_mode == "mcts":
        experimenter = MCTSExperimenter(args)
    elif args.exp_mode == "greedy":