diff --git a/expo/MCTS.py b/expo/MCTS.py index 14f2c4e4b..dd4ad50b1 100644 --- a/expo/MCTS.py +++ b/expo/MCTS.py @@ -240,7 +240,7 @@ class MCTS(): all_children = [child for children in self.children.values() for child in children] return max(all_children, key=uct) - async def expand(self, node : Node, max_children=4): + async def expand(self, node : Node, max_children=5): await node.expand(max_children) if node not in self.children or not self.children[node]: self.children[node] = node.children @@ -273,7 +273,7 @@ class MCTS(): return best_score, best_child for child in self.children[node]: score = child.normalized_reward[split] - print(child.id, score) + print(child.id, split, score) if score > best_score: best_score = score best_child = child diff --git a/expo/dataset.py b/expo/dataset.py index 62665d297..a43f1292a 100644 --- a/expo/dataset.py +++ b/expo/dataset.py @@ -20,7 +20,7 @@ TASK_PROMPT = """\ **Attention** 1. Please do not leak the target label in any form during training. 2. Dev and Test sets do not have the target column. -3. You should perform transformations on all sets at the same step. +3. You should perform transformations on train, dev, and test sets at the same time (it's a good idea to define functions for this and avoid code repetition). 4. If labels are transformed during training, they should be transformed back to the original format before saving the predictions. ## Saving Dev and Test Predictions @@ -38,9 +38,9 @@ print("Train score:", train_score) ``` # Data dir -training: {train_path} -dev: {dev_path} -testing: {test_path} +training (with labels): {train_path} +dev (without labels): {dev_path} +testing (without labels): {test_path} # Output dir {output_dir} diff --git a/expo/evaluation/evaluation.py b/expo/evaluation/evaluation.py index 20a35aa27..886bc036d 100644 --- a/expo/evaluation/evaluation.py +++ b/expo/evaluation/evaluation.py @@ -5,7 +5,7 @@ def evaluate_score(pred, gt, metric): if metric == "accuracy": return accuracy_score(gt, pred) elif metric == "f1": - unique_classes = np.unique(gt) + unique_classes = sorted(list(np.unique(gt))) if 1 in unique_classes and 0 in unique_classes: pos_label = 1 else: diff --git a/expo/evaluation/visualize_mcts.py b/expo/evaluation/visualize_mcts.py index 6e38576e2..4199def0e 100644 --- a/expo/evaluation/visualize_mcts.py +++ b/expo/evaluation/visualize_mcts.py @@ -48,7 +48,9 @@ def get_tree_text(node : Node): for child in node.children: text += textwrap.indent(visualize_tree(child, depth+1, previous_plans), "\t") return text - - return visualize_tree(node), len(code_set) + num_simulations = node.visited + text = f"Number of simulations: {num_simulations}\n" + text += visualize_tree(node) + return text, len(code_set) diff --git a/expo/experimenter/mcts.py b/expo/experimenter/mcts.py index 0159abe24..43c5f9868 100644 --- a/expo/experimenter/mcts.py +++ b/expo/experimenter/mcts.py @@ -22,19 +22,18 @@ class MCTSExperimenter(Experimenter): text += f"Best node: {best_node}, score: {best_node.raw_reward}\n" text += f"Dev best node: {dev_best_node}, score: {dev_best_node.raw_reward}\n" print(text) - if self.args.rollouts > 0: - self.save_tree(text) + self.save_tree(text) - results = { - "best_node": best_node.id, - "best_node_score": best_node.raw_reward, - "dev_best_node": dev_best_node.id, - "dev_best_node_score": dev_best_node.raw_reward, - "num_generated_codes": num_generated_codes, - "user_requirement": best_node.state["requirement"], - "args": vars(self.args) - } - self.save_result(results) + results = { + "best_node": best_node.id, + "best_node_score": best_node.raw_reward, + "dev_best_node": dev_best_node.id, + "dev_best_node_score": dev_best_node.raw_reward, + "num_generated_codes": num_generated_codes, + "user_requirement": best_node.state["requirement"], + "args": vars(self.args) + } + self.save_result(results)