1. add eval_func for sela and compatibility to others

2. llm extract score (use all code block and execution results)
3. add argument for custom dataset dir
4. dataset custom requirement support
This commit is contained in:
Yizhou Chi 2024-10-12 17:16:51 +08:00
parent eda9322361
commit 3a57060e25
11 changed files with 202 additions and 38 deletions

38
expo/data/custom_task.py Normal file
View file

@ -0,0 +1,38 @@
import os
from expo.experimenter.mle_bench.instructions import (
ADDITIONAL_NOTES,
INSTRUCTIONS,
INSTRUCTIONS_OBFUSCATED,
)
MLE_BENCH_FILES = ["description.md", "description_obfuscated.md"]
MLE_REQUIREMENTS = """
{instructions}
{additonal_notes}
COMPETITION INSTRUCTIONS
------
{task_description}
"""
def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False):
if obfuscated:
instructions = INSTRUCTIONS_OBFUSCATED
task_file = "description_obfuscated.md"
else:
instructions = INSTRUCTIONS
task_file = "description.md"
with open(os.path.join(dataset_dir, task_file)) as f:
task_description = f.read()
mle_requirement = MLE_REQUIREMENTS.format(
instructions=instructions, additonal_notes=ADDITIONAL_NOTES, task_description=task_description
)
return mle_requirement

View file

@ -268,7 +268,7 @@ class ExpDataset:
dataset_info = self.get_dataset_info()
num_classes = dataset_info["metadata"]["NumberOfClasses"]
if num_classes == 2:
metric = "f1"
metric = "f1 binary"
elif 2 < num_classes <= 200:
metric = "f1 weighted"
elif num_classes > 200 or num_classes == 0: