From 6344046c31d0d4c673ddfcd6c9b041f0dbfc3e6f Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Tue, 24 Sep 2024 20:48:44 +0800 Subject: [PATCH] update aug result summarization --- expo/experimenter/aug.py | 6 +----- expo/experimenter/experimenter.py | 29 +++++++++++++++++------------ 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/expo/experimenter/aug.py b/expo/experimenter/aug.py index ffe0d04c5..97b819802 100644 --- a/expo/experimenter/aug.py +++ b/expo/experimenter/aug.py @@ -49,9 +49,5 @@ class AugExperimenter(Experimenter): "args": vars(self.args), } ) - scores = [result["score_dict"]["test_score"] for result in results] - avg_score = sum(scores) / len(scores) - best_score = max(scores) if not self.args.low_is_better else min(scores) - best_score_idx = scores.index(best_score) - results.insert(0, {"avg_score": avg_score, "best_score": best_score, "best_score_idx": best_score_idx}) + results = self.summarize_results(results) self.save_result(results) diff --git a/expo/experimenter/experimenter.py b/expo/experimenter/experimenter.py index 77cb5fa45..c6ead281b 100644 --- a/expo/experimenter/experimenter.py +++ b/expo/experimenter/experimenter.py @@ -47,18 +47,7 @@ class Experimenter: score_dict = {"train_score": -1, "dev_score": -1, "test_score": -1, "score": -1} return score_dict - async def run_experiment(self): - state = self.state - user_requirement = state["requirement"] - results = [] - - for i in range(self.args.num_experiments): - di = ResearchAssistant(node_id="0", use_reflection=self.args.reflection) - score_dict = await self.run_di(di, user_requirement, run_idx=i) - results.append( - {"idx": i, "score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)} - ) - self.save_result(results) # save intermediate results + def summarize_results(self, results): dev_scores = [result["score_dict"]["dev_score"] for result in results] best_dev_score = ( max(dev_scores) @@ -85,6 +74,22 @@ class Experimenter: "global_best_test_score": global_best_score, }, ) + return results + + async def run_experiment(self): + state = self.state + user_requirement = state["requirement"] + results = [] + + for i in range(self.args.num_experiments): + di = ResearchAssistant(node_id="0", use_reflection=self.args.reflection) + score_dict = await self.run_di(di, user_requirement, run_idx=i) + results.append( + {"idx": i, "score_dict": score_dict, "user_requirement": user_requirement, "args": vars(self.args)} + ) + self.save_result(results) # save intermediate results + results = self.summarize_results(results) + self.save_result(results) def evaluate_prediction(self, split, state):