rename expo folder to sela

This commit is contained in:
Cyzus Chi 2024-10-22 21:33:31 +08:00
parent 4bed19b931
commit 7c5b29de63
33 changed files with 53 additions and 53 deletions

View file

31
sela/experimenter/aide.py Normal file
View file

@ -0,0 +1,31 @@
import aide
import os
import time
os.environ["OPENAI_API_KEY"] = "sk-xxx"
os.environ["OPENAI_BASE_URL"] = "your url"
start_time = time.time()
data_dir = "xxx/data/titanic"
goal = f"""
# User requirement
({data_dir}, 'This is a 04_titanic dataset. Your goal is to predict the target column `Survived`.\nPerform data analysis, data preprocessing, feature engineering, and modeling to predict the target. \nReport f1 on the eval data. Do not plot or make any visualizations.\n')
# Data dir
training (with labels): train.csv
testing (without labels): test.csv
dataset description: dataset_info.json (You can use this file to get additional information about the dataset)"""
exp = aide.Experiment(
data_dir=data_dir, # replace this with your own directory
goal=goal,
eval="f1", # replace with your own evaluation metric
)
best_solution = exp.run(steps=10)
print(f"Best solution has validation metric: {best_solution.valid_metric}")
print(f"Best solution code: {best_solution.code}")
end_time = time.time()
execution_time = end_time - start_time
print(f"run time : {execution_time} seconds")

55
sela/experimenter/aug.py Normal file
View file

@ -0,0 +1,55 @@
from sela.experimenter.experimenter import Experimenter
from sela.insights.instruction_generator import InstructionGenerator
from sela.research_assistant import ResearchAssistant
from sela.utils import get_exp_pool_path
EXPS_PROMPT = """
When doing the tasks, you can refer to the insights below:
{experience}
"""
class AugExperimenter(Experimenter):
result_path: str = "results/aug"
async def run_experiment(self):
# state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
user_requirement = self.state["requirement"]
exp_pool_path = get_exp_pool_path(self.args.task, self.data_config, pool_name="ds_analysis_pool")
exp_pool = InstructionGenerator.load_analysis_pool(
exp_pool_path, use_fixed_insights=self.args.use_fixed_insights
)
if self.args.aug_mode == "single":
exps = InstructionGenerator._random_sample(exp_pool, self.args.num_experiments)
exps = [exp["Analysis"] for exp in exps]
elif self.args.aug_mode == "set":
exps = []
for i in range(self.args.num_experiments):
exp_set = InstructionGenerator.sample_instruction_set(exp_pool)
exp_set_text = "\n".join([f"{exp['task_id']}: {exp['Analysis']}" for exp in exp_set])
exps.append(exp_set_text)
else:
raise ValueError(f"Invalid mode: {self.args.aug_mode}")
results = []
for i in range(self.args.num_experiments):
di = ResearchAssistant(
node_id=str(i), use_reflection=self.args.reflection, role_timeout=self.args.role_timeout
)
di.role_dir = f"{di.role_dir}_{self.args.task}"
requirement = user_requirement + EXPS_PROMPT.format(experience=exps[i])
print(requirement)
score_dict = await self.run_di(di, requirement, run_idx=i)
results.append(
{
"idx": i,
"score_dict": score_dict,
"aug_mode": self.args.aug_mode,
"insights": exps[i],
"user_requirement": requirement,
"args": vars(self.args),
}
)
results = self.summarize_results(results)
self.save_result(results)

View file

@ -0,0 +1,126 @@
from datetime import datetime
from sela.experimenter.custom import CustomExperimenter
import os
import pandas as pd
class AGRunner:
def __init__(self, state=None):
self.state = state
self.datasets = self.state["datasets_dir"]
def run(self):
from autogluon.tabular import TabularDataset, TabularPredictor
train_path = self.datasets["train"]
dev_path = self.datasets["dev"]
dev_wo_target_path = self.datasets["dev_wo_target"]
test_wo_target_path = self.datasets["test_wo_target"]
target_col = self.state["dataset_config"]["target_col"]
train_data = TabularDataset(train_path)
dev_data = TabularDataset(dev_path)
dev_wo_target_data = TabularDataset(dev_wo_target_path)
test_data = TabularDataset(test_wo_target_path)
eval_metric = self.state["dataset_config"]["metric"].replace(" ", "_")
predictor = TabularPredictor(
label=target_col,
eval_metric=eval_metric,
path="AutogluonModels/ag-{}-{}".format(self.state["task"], datetime.now().strftime("%y%m%d_%H%M")),
).fit(train_data=train_data, tuning_data=dev_data, num_gpus=1)
dev_preds = predictor.predict(dev_wo_target_data)
test_preds = predictor.predict(test_data)
return {"test_preds": test_preds, "dev_preds": dev_preds}
def run_multimodal(self):
from autogluon.multimodal import MultiModalPredictor
target_col = self.state["dataset_config"]["target_col"]
train_path = self.datasets["train"]
dev_path = self.datasets["dev"]
dev_wo_target_path = self.datasets["dev_wo_target"] # Updated variable name
test_wo_target_path = self.datasets["test_wo_target"]
eval_metric = self.state["dataset_config"]["metric"].replace(" ", "_")
# Load the datasets
train_data, dev_data, dev_wo_target_data, test_data = self.load_split_dataset(
train_path, dev_path, dev_wo_target_path, test_wo_target_path
)
# Create and fit the predictor
predictor = MultiModalPredictor(
label=target_col,
eval_metric=eval_metric,
path="AutogluonModels/ag-{}-{}".format(self.state["task"], datetime.now().strftime("%y%m%d_%H%M")),
).fit(train_data=train_data, tuning_data=dev_data)
# Make predictions on dev and test datasets
dev_preds = predictor.predict(dev_wo_target_data)
test_preds = predictor.predict(test_data)
# Return predictions for dev and test datasets
return {
"dev_preds": dev_preds,
"test_preds": test_preds
}
def load_split_dataset(self, train_path, dev_path, dev_wo_target_path, test_wo_target_path):
"""
Loads training, dev, and test datasets from given file paths
Args:
train_path (str): Path to the training dataset.
dev_path (str): Path to the dev dataset with target labels.
dev_wo_target_path (str): Path to the dev dataset without target labels.
test_wo_target_path (str): Path to the test dataset without target labels.
Returns:
train_data (pd.DataFrame): Loaded training dataset with updated image paths.
dev_data (pd.DataFrame): Loaded dev dataset with updated image paths.
dev_wo_target_data (pd.DataFrame): Loaded dev dataset without target labels and updated image paths.
test_data (pd.DataFrame): Loaded test dataset with updated image paths.
"""
# Define the root path to append
root_folder = os.path.join("F:/Download/Dataset/", self.state["task"])
# Load the datasets
train_data = pd.read_csv(train_path)
dev_data = pd.read_csv(dev_path) # Load dev dataset with target labels
dev_wo_target_data = pd.read_csv(dev_wo_target_path) # Load dev dataset without target labels
test_data = pd.read_csv(test_wo_target_path)
# Get the name of the first column (assuming it's the image path column)
image_column = train_data.columns[0]
# Append root folder path to the image column in each dataset
train_data[image_column] = train_data[image_column].apply(lambda x: os.path.join(root_folder, x))
dev_data[image_column] = dev_data[image_column].apply(lambda x: os.path.join(root_folder, x))
dev_wo_target_data[image_column] = dev_wo_target_data[image_column].apply(
lambda x: os.path.join(root_folder, x))
test_data[image_column] = test_data[image_column].apply(lambda x: os.path.join(root_folder, x))
return train_data, dev_data, dev_wo_target_data, test_data
class GluonExperimenter(CustomExperimenter):
result_path: str = "results/autogluon"
def __init__(self, args, **kwargs):
super().__init__(args, **kwargs)
self.framework = AGRunner(self.state)
self.is_multimodal = args.is_multimodal if hasattr(args, 'is_multimodal') else False
async def run_experiment(self):
if not self.is_multimodal:
result = self.framework.run()
else:
result = self.framework.run_multimodal()
assert result is not None
user_requirement = self.state["requirement"]
dev_preds = result["dev_preds"]
test_preds = result["test_preds"]
score_dict = {
"dev_score": self.evaluate_predictions(dev_preds, "dev"),
"test_score": self.evaluate_predictions(test_preds, "test"),
}
results = [0, {"score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)}]
self.save_result(results)

View file

@ -0,0 +1,96 @@
from datetime import datetime
import pandas as pd
from sela.experimenter.custom import CustomExperimenter
from sela.evaluation.evaluation import evaluate_score
from functools import partial
def custom_scorer(y_true, y_pred, metric_name):
return evaluate_score(y_pred, y_true, metric_name)
class ASRunner:
time_limit = 600
def __init__(self, state=None):
self.state = state
self.datasets = self.state["datasets_dir"]
def create_autosklearn_scorer(self, metric_name):
from autosklearn.metrics import make_scorer
return make_scorer(
name=metric_name, score_func=partial(custom_scorer, metric_name=metric_name)
)
def run(self):
import autosklearn.classification
import autosklearn.regression
train_path = self.datasets["train"]
dev_wo_target_path = self.datasets["dev_wo_target"]
test_wo_target_path = self.datasets["test_wo_target"]
target_col = self.state["dataset_config"]["target_col"]
train_data = pd.read_csv(train_path)
dev_data = pd.read_csv(dev_wo_target_path)
test_data = pd.read_csv(test_wo_target_path)
eval_metric = self.state["dataset_config"]["metric"]
X_train = train_data.drop(columns=[target_col])
y_train = train_data[target_col]
if eval_metric == "rmse":
automl = autosklearn.regression.AutoSklearnRegressor(
time_left_for_this_task=self.time_limit,
metric=self.create_autosklearn_scorer(eval_metric),
memory_limit=8192,
tmp_folder="AutosklearnModels/as-{}-{}".format(
self.state["task"], datetime.now().strftime("%y%m%d_%H%M")
),
n_jobs=-1,
)
elif eval_metric in ["f1", "f1 weighted"]:
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=self.time_limit,
metric=self.create_autosklearn_scorer(eval_metric),
memory_limit=8192,
tmp_folder="AutosklearnModels/as-{}-{}".format(
self.state["task"], datetime.now().strftime("%y%m%d_%H%M")
),
n_jobs=-1,
)
else:
raise ValueError(f"Unsupported metric: {eval_metric}")
automl.fit(X_train, y_train)
dev_preds = automl.predict(dev_data)
test_preds = automl.predict(test_data)
return {"test_preds": test_preds, "dev_preds": dev_preds}
class AutoSklearnExperimenter(CustomExperimenter):
result_path: str = "results/autosklearn"
def __init__(self, args, **kwargs):
super().__init__(args, **kwargs)
self.framework = ASRunner(self.state)
async def run_experiment(self):
result = self.framework.run()
user_requirement = self.state["requirement"]
dev_preds = result["dev_preds"]
test_preds = result["test_preds"]
score_dict = {
"dev_score": self.evaluate_predictions(dev_preds, "dev"),
"test_score": self.evaluate_predictions(test_preds, "test"),
}
results = [
0,
{
"score_dict": score_dict,
"user_requirement": user_requirement,
"args": vars(self.args),
},
]
self.save_result(results)

View file

@ -0,0 +1,62 @@
import os
import pandas as pd
from sela.evaluation.evaluation import evaluate_score
from sela.experimenter.experimenter import Experimenter
from sela.MCTS import create_initial_state
class CustomExperimenter(Experimenter):
result_path: str = "results/custom"
def __init__(self, args, **kwargs):
super().__init__(args, **kwargs)
self.framework = kwargs.get("framework", None) # todo
self.task = kwargs.get("task", self.args.task)
self.low_is_better = kwargs.get("low_is_better", self.args.low_is_better)
self.name = kwargs.get("name", "")
self.result_path = f"results/custom_{self.name}"
self.state = create_initial_state(
self.task,
start_task_id=1,
data_config=self.data_config,
args=self.args,
)
def run_experiment(self):
user_requirement = self.state["requirement"]
preds = self.framework.run(user_requirement)
test_preds = preds["test_preds"]
dev_preds = preds["dev_preds"]
score_dict = {
"dev_score": self.evaluate_predictions(dev_preds, "dev"),
"test_score": self.evaluate_predictions(test_preds, "test"),
}
results = {"score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)}
self.save_result(results)
def evaluate_pred_files(self, dev_pred_path, test_pred_path):
dev_preds = pd.read_csv(dev_pred_path)["target"]
test_preds = pd.read_csv(test_pred_path)["target"]
score_dict = {
"dev_score": self.evaluate_score(dev_preds, "dev"),
"test_score": self.evaluate_score(test_preds, "test"),
}
return score_dict
def evaluate_predictions(self, preds, split):
metric = self.state["dataset_config"]["metric"]
gt_path = os.path.join(self.state["datasets_dir"][f"{split}_target"])
gt = pd.read_csv(gt_path)["target"]
score = evaluate_score(preds, gt, metric)
return score
def load_datasets(self):
train_path = self.state["datasets_dir"]["train"]
dev_path = self.state["datasets_dir"]["dev"]
test_path = self.state["datasets_dir"]["test"]
train = pd.read_csv(train_path)
dev = pd.read_csv(dev_path)
test = pd.read_csv(test_path)
return train, dev, test

View file

@ -0,0 +1,135 @@
import datetime
import json
import os
import numpy as np
import pandas as pd
from sela.evaluation.evaluation import evaluate_score
from sela.MCTS import create_initial_state
from sela.research_assistant import ResearchAssistant
from sela.utils import DATA_CONFIG, save_notebook
class Experimenter:
result_path: str = "results/base"
data_config = DATA_CONFIG
start_task_id = 1
def __init__(self, args, **kwargs):
self.args = args
self.start_time_raw = datetime.datetime.now()
self.start_time = self.start_time_raw.strftime("%Y%m%d%H%M")
self.state = create_initial_state(
self.args.task,
start_task_id=self.start_task_id,
data_config=self.data_config,
args=self.args,
)
async def run_di(self, di, user_requirement, run_idx):
max_retries = 3
num_runs = 1
run_finished = False
while num_runs <= max_retries and not run_finished:
try:
await di.run(user_requirement)
score_dict = await di.get_score()
score_dict = self.evaluate(score_dict, self.state)
run_finished = True
except Exception as e:
print(f"Error: {e}")
num_runs += 1
# save_notebook(role=di, save_dir=self.result_path, name=f"{self.args.task}_{self.start_time}_{run_idx}")
save_name = self.get_save_name()
save_notebook(role=di, save_dir=self.result_path, name=f"{save_name}_{run_idx}")
if not run_finished:
score_dict = {"train_score": -1, "dev_score": -1, "test_score": -1, "score": -1}
return score_dict
def summarize_results(self, results):
dev_scores = [result["score_dict"]["dev_score"] for result in results]
best_dev_score = (
max(dev_scores)
if not self.args.low_is_better
else min([score for score in dev_scores if score != -1] + [np.inf])
)
best_score_idx = dev_scores.index(best_dev_score)
test_scores = [result["score_dict"]["test_score"] for result in results]
avg_score = sum(test_scores) / len(test_scores)
global_best_score = (
max(test_scores)
if not self.args.low_is_better
else min([score for i, score in enumerate(test_scores) if dev_scores[i] != -1] + [np.inf])
)
results.insert(
0,
{
"best_dev_score": best_dev_score,
"best_dev_score_idx": best_score_idx,
"best_dev_test_score": test_scores[best_score_idx],
"avg_test_score": avg_score,
"global_best_test_score": global_best_score,
},
)
return results
async def run_experiment(self):
state = self.state
user_requirement = state["requirement"]
results = []
for i in range(self.args.num_experiments):
di = ResearchAssistant(
node_id="0", use_reflection=self.args.reflection, role_timeout=self.args.role_timeout
)
score_dict = await self.run_di(di, user_requirement, run_idx=i)
results.append(
{"idx": i, "score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)}
)
self.save_result(results) # save intermediate results
results = self.summarize_results(results)
self.save_result(results)
def evaluate_prediction(self, split, state):
pred_path = os.path.join(state["work_dir"], state["task"], f"{split}_predictions.csv")
os.makedirs(state["node_dir"], exist_ok=True)
pred_node_path = os.path.join(state["node_dir"], f"{self.start_time}-{split}_predictions.csv")
gt_path = os.path.join(state["datasets_dir"][f"{split}_target"])
preds = pd.read_csv(pred_path)
preds = preds[preds.columns.tolist()[-1]]
preds.to_csv(pred_node_path, index=False)
gt = pd.read_csv(gt_path)["target"]
metric = state["dataset_config"]["metric"]
os.remove(pred_path)
return evaluate_score(preds, gt, metric)
def evaluate(self, score_dict, state):
scores = {
"dev_score": self.evaluate_prediction("dev", state),
"test_score": self.evaluate_prediction("test", state),
}
score_dict.update(scores)
return score_dict
def get_save_name(self):
return f"{self.args.exp_mode}-{self.args.task}_{self.start_time}"
def save_result(self, result):
end_time_raw = datetime.datetime.now()
end_time = end_time_raw.strftime("%Y%m%d%H%M")
time_info = {
"start_time": self.start_time,
"end_time": end_time,
"duration (seconds)": (end_time_raw - self.start_time_raw).seconds,
}
result = result.copy()
result.insert(0, time_info)
save_name = self.get_save_name()
os.makedirs(self.result_path, exist_ok=True)
with open(f"{self.result_path}/{save_name}.json", "w") as f:
json.dump(result, f, indent=4)

81
sela/experimenter/mcts.py Normal file
View file

@ -0,0 +1,81 @@
import shutil
from sela.evaluation.evaluation import (
node_evaluate_score_mlebench,
node_evaluate_score_sela,
)
from sela.evaluation.visualize_mcts import get_tree_text
from sela.experimenter.experimenter import Experimenter
from sela.Greedy import Greedy, Random
from sela.MCTS import MCTS
class MCTSExperimenter(Experimenter):
result_path: str = "results/mcts"
def __init__(self, args, tree_mode=None, **kwargs):
if args.special_instruction == "image":
self.start_task_id = 1 # start from datapreprocessing if it is image task
else:
self.start_task_id = args.start_task_id
if args.eval_func == "sela":
self.eval_func = node_evaluate_score_sela
elif args.eval_func == "mlebench":
self.eval_func = node_evaluate_score_mlebench
super().__init__(args, **kwargs)
self.tree_mode = tree_mode
async def run_experiment(self):
use_fixed_insights = self.args.use_fixed_insights
depth = self.args.max_depth
if self.tree_mode == "greedy":
mcts = Greedy(root_node=None, max_depth=depth, use_fixed_insights=use_fixed_insights)
elif self.tree_mode == "random":
mcts = Random(root_node=None, max_depth=depth, use_fixed_insights=use_fixed_insights)
else:
mcts = MCTS(root_node=None, max_depth=depth, use_fixed_insights=use_fixed_insights)
best_nodes = await mcts.search(state=self.state, args=self.args)
best_node = best_nodes["global_best"]
dev_best_node = best_nodes["dev_best"]
score_dict = best_nodes["scores"]
additional_scores = {"grader": self.eval_func(dev_best_node)}
text, num_generated_codes = get_tree_text(mcts.root_node)
text += f"Generated {num_generated_codes} unique codes.\n"
text += f"Best node: {best_node.id}, score: {best_node.raw_reward}\n"
text += f"Dev best node: {dev_best_node.id}, score: {dev_best_node.raw_reward}\n"
text += f"Grader score: {additional_scores['grader']}\n"
print(text)
results = [
{
"best_node": best_node.id,
"best_node_score": best_node.raw_reward,
"dev_best_node": dev_best_node.id,
"dev_best_node_score": dev_best_node.raw_reward,
"num_generated_codes": num_generated_codes,
"user_requirement": best_node.state["requirement"],
"tree_text": text,
"args": vars(self.args),
"scores": score_dict,
"additional_scores": additional_scores,
}
]
self.save_result(results)
self.copy_notebook(best_node, "best")
self.copy_notebook(dev_best_node, "dev_best")
self.save_tree(text)
def copy_notebook(self, node, name):
node_dir = node.get_node_dir()
node_nb_dir = f"{node_dir}/Node-{node.id}.ipynb"
save_name = self.get_save_name()
copy_nb_dir = f"{self.result_path}/{save_name}_{name}.ipynb"
shutil.copy(node_nb_dir, copy_nb_dir)
def save_tree(self, tree_text):
save_name = self.get_save_name()
fpath = f"{self.result_path}/{save_name}_tree.txt"
with open(fpath, "w") as f:
f.write(tree_text)

View file

@ -0,0 +1,48 @@
SUBMISSION_INSTURCTION = """
- **Submission checker**: We provide a tool for checking the validity of your submission file, in the form of a server endpoint at `http://localhost:5000/validate`. If you post a submission file to the server (e.g. `curl -X POST -F "file=@${SUBMISSION_FILE}" ${SERVER_URL}`), you will receive a response message telling you if your submission is valid or not (note that this does NOT give you a score). For your convenience, we provide a script `/home/validate_submission.sh` which can help you make this submission to the validation server.
"""
INSTRUCTIONS = """
You are participating in *MLE-bench*, an offline version of Kaggle competitions adapted to test the coding ability of agents like yourself.
You will be given a machine learning task. You must solve the task by training a model and running the model on the test set to produce a submission file. You may not hand-label the data to produce a submission file.
BENCHMARK INSTRUCTIONS
------
First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `sela.experimenter.mle_bench.instructions.py`.
- **Competition-specific instructions**: The instructions for the competition are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the competition you are currently solving.
- **Dataset**: The dataset files for the competition are available in `{dataset_dir}`.
- **Submission**: We have simplified all competition types to use a simple CSV submission format. Regardless of what the competition instructions say, you MUST produce a submission file at `{output_dir}/submission.csv`. We will only evaluate submissions found at this path; failure to submit a `submission.csv` at this path will result in a failed attempt.
- **Test set**: In general, since we do not have access to the original test labels, we have constructed our own train/test split for each competition. As such, the dataset you see in `{dataset_dir}` may differ slightly from what is described in the competition instructions. Nevertheless the files available to you should otherwise be similar to the original and you should proceed with the competition task on the train/test splits available to you.
- **Cheating**: You are not allowed to view others' solutions to this competition. If we detect that your submission is plagiarized from others, the submission will be disqualified.
Your submission will be graded based on the metric in the competition-specific instructions. Your goal is to achieve the best score possible on the competition task.
Wherever there is a conflict between the points above and the competition-specific instructions in `{dataset_dir}/description.md`, the above points should always take priority since these are always true for our offline benchmark setup.
"""
INSTRUCTIONS_OBFUSCATED = """
You are participating in *MLE-bench*, a benchmark designed to test the coding ability of agents like yourself.
You will be given a machine learning task. You must solve the task by training a model and running the model on the test set to produce a submission file. You may not hand-label the data to produce a submission file.
BENCHMARK INSTRUCTIONS
------
First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `sela.experimenter.mle_bench.instructions.py`.
- **Task-specific instructions**: The instructions for the task are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the task you are currently solving.
- **Dataset**: The dataset files for the task are available in `{dataset_dir}/`.
- **Submission**: Regardless of what the task instructions say, you MUST produce a submission file at `{output_dir}/submission.csv`. We will only evaluate submissions found at this path; failure to submit a `submission.csv` at this path will result in a failed attempt.
- **Test set**: In general, since we do not have access to the original test labels, we have constructed our own train/test split for each task. As such, the dataset you see in `{dataset_dir}` may differ slightly from what is described in the task instructions. Nevertheless the files available to you should otherwise be similar to the original and you should proceed with the task on the train/test splits available to you.
- **Cheating**: You are not allowed to view others' solutions to this task. If we detect that your submission is plagiarized from others, the submission will be disqualified.
Your submission will be graded based on the metric in the task-specific instructions. Your goal is to achieve the best score possible on the task.
Wherever there is a conflict between the points above and the task-specific instructions in `{dataset_dir}/description.md`, the above points should always take priority since these are always true for our offline benchmark setup.
"""
ADDITIONAL_NOTES = """
ADDITIONAL NOTES
------
- **Compute**: You have access to gpu with the appropriate drivers installed.
"""