copy notebook to result after mcts

This commit is contained in:
Yizhou Chi 2024-09-20 15:53:10 +08:00
parent 32fc96cf71
commit 8dbcd46bfc
6 changed files with 47 additions and 17 deletions

View file

@ -87,6 +87,9 @@ class Node:
def get_depth(self):
return self.depth
def get_node_dir(self):
return self.state["node_dir"]
def generate_depth(self):
if self.parent is None:
return 0

View file

@ -2,6 +2,7 @@ import datetime
import json
import os
import numpy as np
import pandas as pd
from expo.evaluation.evaluation import evaluate_score
@ -58,17 +59,21 @@ class Experimenter:
{"idx": i, "score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)}
)
self.save_result(results) # save intermediate results
dev_scores = [
result["score_dict"]["dev_score"] for result in results if result["score_dict"]["dev_score"] != -1
]
best_dev_score = max(dev_scores) if not self.args.low_is_better else min(dev_scores)
dev_scores = [result["score_dict"]["dev_score"] for result in results]
best_dev_score = (
max(dev_scores)
if not self.args.low_is_better
else min([score for score in dev_scores if score != -1] + [np.inf])
)
best_score_idx = dev_scores.index(best_dev_score)
test_scores = [
result["score_dict"]["test_score"] for result in results if result["score_dict"]["dev_score"] != -1
]
test_scores = [result["score_dict"]["test_score"] for result in results]
avg_score = sum(test_scores) / len(test_scores)
global_best_score = max(test_scores) if not self.args.low_is_better else min(test_scores)
global_best_score = (
max(test_scores)
if not self.args.low_is_better
else min([score for i, score in enumerate(test_scores) if dev_scores[i] != -1] + [np.inf])
)
results.insert(
0,
@ -103,6 +108,9 @@ class Experimenter:
score_dict.update(scores)
return score_dict
def get_save_name(self):
return f"{self.args.exp_mode}-{self.args.task}_{self.start_time}"
def save_result(self, result):
end_time_raw = datetime.datetime.now()
end_time = end_time_raw.strftime("%Y%m%d%H%M")
@ -113,6 +121,7 @@ class Experimenter:
}
result = result.copy()
result.insert(0, time_info)
save_name = self.get_save_name()
os.makedirs(self.result_path, exist_ok=True)
with open(f"{self.result_path}/{self.args.exp_mode}-{self.args.task}_{self.start_time}.json", "w") as f:
with open(f"{self.result_path}/{save_name}.json", "w") as f:
json.dump(result, f, indent=4)

View file

@ -1,3 +1,5 @@
import shutil
from expo.evaluation.visualize_mcts import get_tree_text
from expo.experimenter.experimenter import Experimenter
from expo.Greedy import Greedy, Random
@ -28,6 +30,9 @@ class MCTSExperimenter(Experimenter):
best_node = best_nodes["global_best"]
dev_best_node = best_nodes["dev_best"]
self.copy_notebook(best_node, "best")
self.copy_notebook(dev_best_node, "dev_best")
text, num_generated_codes = get_tree_text(mcts.root_node)
text += f"Generated {num_generated_codes} unique codes.\n"
text += f"Best node: {best_node.id}, score: {best_node.raw_reward}\n"
@ -49,7 +54,15 @@ class MCTSExperimenter(Experimenter):
]
self.save_result(results)
def copy_notebook(self, node, name):
node_dir = node.get_node_dir()
node_nb_dir = f"{node_dir}/Node-{node.id}.ipynb"
save_name = self.get_save_name()
copy_nb_dir = f"{self.result_path}/{save_name}_{name}.ipynb"
shutil.copy(node_nb_dir, copy_nb_dir)
def save_tree(self, tree_text):
fpath = f"{self.result_path}/{self.args.task}_tree_{self.args.name}.txt"
save_name = self.get_save_name()
fpath = f"{self.result_path}/{save_name}_tree.txt"
with open(fpath, "w") as f:
f.write(tree_text)

View file

@ -111,6 +111,8 @@ class ResearchAssistant(DataInterpreter):
if int(current_task.task_id) == self.start_task_id + 1:
# fe_id = current_task.dependent_task_ids
self.save_state()
save_notebook(role=self, save_dir=self.role_dir, name=self.get_node_name(), save_to_depth=True)
else:
save_notebook(role=self, save_dir=self.role_dir, name=self.get_node_name())
return task_result

View file

@ -91,19 +91,21 @@ def process_cells(nb: NotebookNode) -> NotebookNode:
return nb
def save_notebook(role: Role, save_dir: str = "", name: str = ""):
def save_notebook(role: Role, save_dir: str = "", name: str = "", save_to_depth=False):
save_dir = Path(save_dir)
tasks = role.planner.plan.tasks
codes = [task.code for task in tasks if task.code]
clean_nb = nbformat.v4.new_notebook()
for code in codes:
clean_nb.cells.append(nbformat.v4.new_code_cell(code))
nb = process_cells(role.execute_code.nb)
os.makedirs(save_dir, exist_ok=True)
file_path = save_dir / f"{name}.ipynb"
clean_file_path = save_dir / f"{name}_clean.ipynb"
nbformat.write(nb, file_path)
nbformat.write(clean_nb, clean_file_path)
if save_to_depth:
clean_file_path = save_dir / f"{name}_clean.ipynb"
codes = [task.code for task in tasks if task.code]
clean_nb = nbformat.v4.new_notebook()
for code in codes:
clean_nb.cells.append(nbformat.v4.new_code_cell(code))
nbformat.write(clean_nb, clean_file_path)
async def load_execute_notebook(role):

View file

@ -37,6 +37,7 @@ The current task is about training a model, please ensure high performance:
- For tabular datasets - you have access to XGBoost, CatBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression, etc.
- For image datasets - you have access to ResNet, VGG, Inception, MobileNet, DenseNet, EfficientNet, etc.
- For text datasets - you have access to BERT, GPT-2, RoBERTa, DistilBERT, T5, etc.
- Avoid the use of SVM because of its high training time.
- Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as XGBoost, CatBoost, etc.
- If non-numeric columns exist, perform label encode together with all steps.
- Use the data from previous task result directly, do not mock or reload data yourself.