1. add eval_func for sela and compatibility to others

2. llm extract score (use all code block and execution results) 3. add argument for custom dataset dir 4. dataset custom requirement support
2026-05-11 00:32:37 +02:00 · 2024-10-12 17:16:51 +08:00 · 2024-10-12 17:16:51 +08:00 · 3a57060e25
commit 3a57060e25
parent eda9322361
11 changed files with 202 additions and 38 deletions
--- a/expo/data/custom_task.py
+++ b/expo/data/custom_task.py
@ -0,0 +1,38 @@
+import os
+
+from expo.experimenter.mle_bench.instructions import (
+    ADDITIONAL_NOTES,
+    INSTRUCTIONS,
+    INSTRUCTIONS_OBFUSCATED,
+)
+
+MLE_BENCH_FILES = ["description.md", "description_obfuscated.md"]
+
+
+MLE_REQUIREMENTS = """
+{instructions}
+
+{additonal_notes}
+
+COMPETITION INSTRUCTIONS
+------
+
+{task_description}
+
+"""
+
+
+def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False):
+    if obfuscated:
+        instructions = INSTRUCTIONS_OBFUSCATED
+        task_file = "description_obfuscated.md"
+    else:
+        instructions = INSTRUCTIONS
+        task_file = "description.md"
+
+    with open(os.path.join(dataset_dir, task_file)) as f:
+        task_description = f.read()
+    mle_requirement = MLE_REQUIREMENTS.format(
+        instructions=instructions, additonal_notes=ADDITIONAL_NOTES, task_description=task_description
+    )
+    return mle_requirement
--- a/expo/data/dataset.py
+++ b/expo/data/dataset.py
@ -268,7 +268,7 @@ class ExpDataset:
        dataset_info = self.get_dataset_info()
        num_classes = dataset_info["metadata"]["NumberOfClasses"]
        if num_classes == 2:
-            metric = "f1"
+            metric = "f1 binary"
        elif 2 < num_classes <= 200:
            metric = "f1 weighted"
        elif num_classes > 200 or num_classes == 0: