diff --git a/expo/MCTS.py b/expo/MCTS.py index 228671e2c..aa4ade944 100644 --- a/expo/MCTS.py +++ b/expo/MCTS.py @@ -87,6 +87,9 @@ class Node: def get_depth(self): return self.depth + def get_node_dir(self): + return self.state["node_dir"] + def generate_depth(self): if self.parent is None: return 0 diff --git a/expo/experimenter/experimenter.py b/expo/experimenter/experimenter.py index 155108f8d..77cb5fa45 100644 --- a/expo/experimenter/experimenter.py +++ b/expo/experimenter/experimenter.py @@ -2,6 +2,7 @@ import datetime import json import os +import numpy as np import pandas as pd from expo.evaluation.evaluation import evaluate_score @@ -58,17 +59,21 @@ class Experimenter: {"idx": i, "score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)} ) self.save_result(results) # save intermediate results - dev_scores = [ - result["score_dict"]["dev_score"] for result in results if result["score_dict"]["dev_score"] != -1 - ] - best_dev_score = max(dev_scores) if not self.args.low_is_better else min(dev_scores) + dev_scores = [result["score_dict"]["dev_score"] for result in results] + best_dev_score = ( + max(dev_scores) + if not self.args.low_is_better + else min([score for score in dev_scores if score != -1] + [np.inf]) + ) best_score_idx = dev_scores.index(best_dev_score) - test_scores = [ - result["score_dict"]["test_score"] for result in results if result["score_dict"]["dev_score"] != -1 - ] + test_scores = [result["score_dict"]["test_score"] for result in results] avg_score = sum(test_scores) / len(test_scores) - global_best_score = max(test_scores) if not self.args.low_is_better else min(test_scores) + global_best_score = ( + max(test_scores) + if not self.args.low_is_better + else min([score for i, score in enumerate(test_scores) if dev_scores[i] != -1] + [np.inf]) + ) results.insert( 0, @@ -103,6 +108,9 @@ class Experimenter: score_dict.update(scores) return score_dict + def get_save_name(self): + return f"{self.args.exp_mode}-{self.args.task}_{self.start_time}" + def save_result(self, result): end_time_raw = datetime.datetime.now() end_time = end_time_raw.strftime("%Y%m%d%H%M") @@ -113,6 +121,7 @@ class Experimenter: } result = result.copy() result.insert(0, time_info) + save_name = self.get_save_name() os.makedirs(self.result_path, exist_ok=True) - with open(f"{self.result_path}/{self.args.exp_mode}-{self.args.task}_{self.start_time}.json", "w") as f: + with open(f"{self.result_path}/{save_name}.json", "w") as f: json.dump(result, f, indent=4) diff --git a/expo/experimenter/mcts.py b/expo/experimenter/mcts.py index 89f362b6b..5fb00ca8d 100644 --- a/expo/experimenter/mcts.py +++ b/expo/experimenter/mcts.py @@ -1,3 +1,5 @@ +import shutil + from expo.evaluation.visualize_mcts import get_tree_text from expo.experimenter.experimenter import Experimenter from expo.Greedy import Greedy, Random @@ -28,6 +30,9 @@ class MCTSExperimenter(Experimenter): best_node = best_nodes["global_best"] dev_best_node = best_nodes["dev_best"] + self.copy_notebook(best_node, "best") + self.copy_notebook(dev_best_node, "dev_best") + text, num_generated_codes = get_tree_text(mcts.root_node) text += f"Generated {num_generated_codes} unique codes.\n" text += f"Best node: {best_node.id}, score: {best_node.raw_reward}\n" @@ -49,7 +54,15 @@ class MCTSExperimenter(Experimenter): ] self.save_result(results) + def copy_notebook(self, node, name): + node_dir = node.get_node_dir() + node_nb_dir = f"{node_dir}/Node-{node.id}.ipynb" + save_name = self.get_save_name() + copy_nb_dir = f"{self.result_path}/{save_name}_{name}.ipynb" + shutil.copy(node_nb_dir, copy_nb_dir) + def save_tree(self, tree_text): - fpath = f"{self.result_path}/{self.args.task}_tree_{self.args.name}.txt" + save_name = self.get_save_name() + fpath = f"{self.result_path}/{save_name}_tree.txt" with open(fpath, "w") as f: f.write(tree_text) diff --git a/expo/research_assistant.py b/expo/research_assistant.py index b21fc1a55..51de188d3 100644 --- a/expo/research_assistant.py +++ b/expo/research_assistant.py @@ -111,6 +111,8 @@ class ResearchAssistant(DataInterpreter): if int(current_task.task_id) == self.start_task_id + 1: # fe_id = current_task.dependent_task_ids self.save_state() + save_notebook(role=self, save_dir=self.role_dir, name=self.get_node_name(), save_to_depth=True) + else: save_notebook(role=self, save_dir=self.role_dir, name=self.get_node_name()) return task_result diff --git a/expo/utils.py b/expo/utils.py index f3c0c392d..56f3c21b9 100644 --- a/expo/utils.py +++ b/expo/utils.py @@ -91,19 +91,21 @@ def process_cells(nb: NotebookNode) -> NotebookNode: return nb -def save_notebook(role: Role, save_dir: str = "", name: str = ""): +def save_notebook(role: Role, save_dir: str = "", name: str = "", save_to_depth=False): save_dir = Path(save_dir) tasks = role.planner.plan.tasks - codes = [task.code for task in tasks if task.code] - clean_nb = nbformat.v4.new_notebook() - for code in codes: - clean_nb.cells.append(nbformat.v4.new_code_cell(code)) nb = process_cells(role.execute_code.nb) os.makedirs(save_dir, exist_ok=True) file_path = save_dir / f"{name}.ipynb" - clean_file_path = save_dir / f"{name}_clean.ipynb" nbformat.write(nb, file_path) - nbformat.write(clean_nb, clean_file_path) + + if save_to_depth: + clean_file_path = save_dir / f"{name}_clean.ipynb" + codes = [task.code for task in tasks if task.code] + clean_nb = nbformat.v4.new_notebook() + for code in codes: + clean_nb.cells.append(nbformat.v4.new_code_cell(code)) + nbformat.write(clean_nb, clean_file_path) async def load_execute_notebook(role): diff --git a/metagpt/prompts/task_type.py b/metagpt/prompts/task_type.py index e670fe088..97666874d 100644 --- a/metagpt/prompts/task_type.py +++ b/metagpt/prompts/task_type.py @@ -37,6 +37,7 @@ The current task is about training a model, please ensure high performance: - For tabular datasets - you have access to XGBoost, CatBoost, random forest, extremely randomized trees, k-nearest neighbors, linear regression, etc. - For image datasets - you have access to ResNet, VGG, Inception, MobileNet, DenseNet, EfficientNet, etc. - For text datasets - you have access to BERT, GPT-2, RoBERTa, DistilBERT, T5, etc. +- Avoid the use of SVM because of its high training time. - Keep in mind that your user prioritizes results and is highly focused on model performance. So, when needed, feel free to use models of any complexity to improve effectiveness, such as XGBoost, CatBoost, etc. - If non-numeric columns exist, perform label encode together with all steps. - Use the data from previous task result directly, do not mock or reload data yourself.