# refactor：eval for good team vote; fix re extract folder name; feat vote1 prob

2026-06-05 14:55:18 +02:00 · 2023-10-20 10:01:05 +08:00 · 2023-10-20 10:01:05 +08:00 · b8a30671d5
commit b8a30671d5
parent 9ad69e957f
2 changed files with 77 additions and 67 deletions
--- a/examples/werewolf_game/evals/eval.py
+++ b/examples/werewolf_game/evals/eval.py
@ -1,26 +1,16 @@
 '''
 Filename: MetaGPT/examples/werewolf_game/evals/eval.py
 Created Date: Oct 18, 2023
-Updated Date: Oct 19, 2023
+Revised Date: Oct 20, 2023
 Author: [Aria](https://github.com/ariafyy)
 Info: eval the vote correct probability of non_werewolves
-Files Tree:
-    evals
-    ├── 01-10
-    │         ├── ....txt
-    ├── 11-20
-    │         ├── ....txt
-    ├── 21-30
-    │         ├── ....txt
-    ├── outputs
-    │         ├──# 01-10_....txt
 '''

-from metagpt.const import PROJECT_ROOT
+from metagpt.const import WORKSPACE_ROOT, PROJECT_ROOT
 from pathlib import Path
 import pandas as pd
+import numpy as np
 import re
-import json
 import os, glob
 from tqdm import tqdm
 from utils import Utils
@ -28,30 +18,25 @@ from utils import Utils


 class Eval:
-    """Evaluation"""
+    """Vote Evaluation"""
    def __init__(self):
-        self.OUT_PATH = PROJECT_ROOT / "examples/werewolf_game/evals/outputs"
+        self.OUT_PATH = WORKSPACE_ROOT / "outputs"
        os.makedirs(self.OUT_PATH, exist_ok=True)
        self.SUB_FOLDER_LIST = ["01-10", "11-20", "21-30"]

-    def get_all_vote_fileslist(self):
+    def _get_log_fileslist(self, IN_PATH) -> list[str]:
        files_list = []
        for SUB_FOLDER in self.SUB_FOLDER_LIST:
-            ROOT_PATH = PROJECT_ROOT / ("examples/werewolf_game/evals/{}/").format(SUB_FOLDER)
-            tmp_files_list = Utils().get_file_list(ROOT_PATH)
-            files_list.extend(tmp_files_list)
+            files_list.extend(glob.glob(str(IN_PATH / SUB_FOLDER / '*.txt')))
        return files_list

-    def inlogfile_to_votelog(self, files_list):
-        for i in tqdm(range(0, len(files_list))):
-            in_logfile = files_list[i]
+    def extract_votes_from_logs(self, files_list: list):
+        for in_logfile in tqdm(files_list):
            SUB_FOLDER = (Path(in_logfile).parent).stem
            out_txtfile = self.OUT_PATH / "# {0}_{1}.txt".format(SUB_FOLDER, Path(in_logfile).stem)
            Utils().pick_vote_log(in_logfile, out_txtfile)
-
-    def get_picked_vote_texts(self):
-        files_list = self.get_all_vote_fileslist()
-        self.inlogfile_to_votelog(files_list)
+        votefiles_list = Utils().get_file_list(self.OUT_PATH)
+        return votefiles_list

    @staticmethod
    def parse_vote_text2chunks(text: str):
@ -141,10 +126,10 @@ class Eval:
            res = []
            for k, v in chunks.items():
                if v != "":
-                    chunksList = list(chunks.keys())
-                    vote_count = len(chunksList) - 1
+                    chunks_list = list(chunks.keys())
+                    vote_count = len(chunks_list) - 1
                    good_probability = Eval().get_vote_probability(v)
-                    folder = Utils().filename_to_folder(out_txtfile)
+                    folder = Utils().filename_to_foldername(out_txtfile)
                    result = {
                        "folder": folder,
                        "file": Path(out_txtfile).stem + ".txt",
@ -156,32 +141,52 @@ class Eval:
        df = pd.DataFrame(res)
        return df

-    def get_avg_prob_df(self):
+    def calc_avg_prob(self, IN_PATH) -> pd.DataFrame:
        """
        get avg_prob for each game
        avg_prob : the good_prob/total number of votes in the game
        """
-        out_txtfile_list = Utils().get_file_list(self.OUT_PATH)
-        df_list = []
-        for i in tqdm(range(0, len(out_txtfile_list))):
-            out_txtfile = out_txtfile_list[i]
-            file_df = Eval().get_result_df(out_txtfile)
-            df_list.append(file_df)
+        infiles_list = self._get_log_fileslist(IN_PATH)
+        votefiles_list = self.extract_votes_from_logs(infiles_list)
+        df_list = [self._load_df_from_file(file) for file in votefiles_list]
        combined_df = pd.concat(df_list, ignore_index=True)
-
        # calculate the average good_prob for each file
-        mean_probs = combined_df.groupby('file')['good_prob'].mean()
+        mean_probs = self._calculate_mean_probs(combined_df)
        combined_df['avg_prob'] = combined_df['file'].map(mean_probs)
-        combined_df['avg_prob'] = combined_df['avg_prob'].round(2)
-        combined_df['good_prob'] = combined_df['good_prob'].apply(lambda x: Utils()._float_to_percent(x))
-        combined_df['avg_prob'] = combined_df['avg_prob'].apply(lambda x: Utils()._float_to_percent(x))
+        # calculate vote1 prob
+        vote1_probs = self._calc_vote1_probs(combined_df)
+        combined_df['vote1_prob'] = combined_df['folder'].map(vote1_probs.set_index('folder')['good_prob'])
+        combined_df.loc[combined_df['votes'] != 'vote_1', 'vote1_prob'] = np.nan
+        combined_df['vote1_prob'] = combined_df['vote1_prob'].apply(self._format_probs)
+        combined_df['good_prob'] = combined_df['good_prob'].apply(self._format_probs)
+        combined_df['avg_prob'] = combined_df['avg_prob'].apply(self._format_probs)
+        combined_df.sort_values(['folder'], ascending=True, inplace=True)
        return combined_df

-    def get_result_csv(self):
-        Eval().get_picked_vote_texts()
-        combined_df = self.get_avg_prob_df()
-        combined_df.to_csv(self.OUT_PATH / 'goodteam_vote_probability.csv', index=False)
+    def _calc_vote1_probs(self, df):
+        df_vote1 = df[df['votes'] == 'vote_1']
+        vote1_probs = df_vote1.groupby('folder')['good_prob'].mean().reset_index()
+        return vote1_probs
+
+    def _load_df_from_file(self, file):
+        return self.get_result_df(file)
+
+    def _calculate_mean_probs(self, df):
+        return df.groupby('file')['good_prob'].mean()
+
+    def _format_probs(self, s):
+        return Utils().float_to_percent(s)
+
+    def get_eval_csv(self, IN_PATH, EVAL_RESULT):
+        """
+        IN_PATH : parent folder of ["01-10", "11-20", "21-30"]
+        EVAL_RESULT : output csv file path
+        """
+        combined_df = self.calc_avg_prob(IN_PATH)
+        combined_df.to_csv(EVAL_RESULT, index=False)


 if __name__ == '__main__':
-    Eval().get_result_csv()
+    IN_PATH = PROJECT_ROOT / "examples/werewolf_game/evals"
+    EVAL_RESULT = WORKSPACE_ROOT / "outputs" / 'goodteam_vote_probability.csv'
+    Eval().get_eval_csv(IN_PATH, EVAL_RESULT)
--- a/examples/werewolf_game/evals/utils.py
+++ b/examples/werewolf_game/evals/utils.py
@ -1,11 +1,13 @@
 '''
 Filename: MetaGPT/examples/werewolf_game/evals/utils.py
 Created Date: Oct 11, 2023
+Revised Date: Oct 20, 2023
 Author: [Aria](https://github.com/ariafyy)
 '''
 from metagpt.const import WORKSPACE_ROOT, PROJECT_ROOT
 import re
 import os,glob
+from pathlib import Path

 class Utils:
    """Utils: utils of logs"""
@ -57,29 +59,27 @@ class Utils:
    def pick_vote_log(in_logfile, out_txtfile):
        """
        pick the vote log from the log file.
-        ready to AnnounceGameResult serves as the 'key text' which indicates the end of the game.
+        ready to AnnounceGameResult serves as the 'HINT_TEXT ' which indicates the end of the game.
        """
        pattern_vote = r'(Player\d+)\(([A-Za-z]+)\): (\d+) \| (I vote to eliminate Player\d+)'
-        key_text = r"ready to AnnounceGameResult"
+        HINT_TEXT = r"ready to AnnounceGameResult"
        pattern_moderator = r'\[([^\]]+)\]\. Say ONLY: I vote to eliminate ...'
-        with open(in_logfile, "r") as f, open(out_txtfile, "w") as out:
-            lines = f.readlines()
-            start_idx = -1
-            # find the index of key_text
-            for idx, line in enumerate(lines):
-                if key_text in line:
-                    start_idx = idx
-                    break
+        with open(in_logfile, "r") as f:
+            lines = f.read()
+            split_lines = lines.split(HINT_TEXT)

-            # if find the 'key_text'
-            if start_idx >= 0:
-                # start from 'key_text' to the end
-                relevant_lines = lines[start_idx:]
+            if len(split_lines) < 2:
+                print(f"Key text :{HINT_TEXT} not found in {in_logfile}")
+                return
+
+            relevant_lines = split_lines[1].split("\n")
+            with open(out_txtfile, "w") as out:
                for line in relevant_lines:
                    if re.search(pattern_vote, line):
-                        out.write(line)
+                        out.write(line + "\n")
                    if re.search(pattern_moderator, line):
-                        out.write(line.lstrip())
+                        out.write(line.lstrip() + "\n")
+        

    @staticmethod
    def get_file_list(path: str) -> list:
@ -88,17 +88,21 @@ class Utils:
        return files_list

    @staticmethod
-    def _filename_to_folder(out_txtfile: str):
-        """convert filename into its parent folder name"""
+    def filename_to_foldername(out_txtfile: str):
+        """
+        convert filename into its parent folder name
+        input:"....../# 01-10_10132100.txt"
+        output:# 01-10
+        """
        s = Path(out_txtfile).stem
-        pattern_folder = r'(.+)_'
+        pattern_folder = r'([^_]*)_'
        match = re.match(pattern_folder, s)
        if match:
            folder = match.group(1)
            return folder

    @staticmethod
-    def _float_to_percent(decimal: float) -> str:
+    def float_to_percent(decimal: float) -> str:
        """
        input:  1.00
        output: 100.00%
@ -109,4 +113,5 @@ class Utils:
 if __name__ == '__main__':
    in_logfile = PROJECT_ROOT / "logs/log.txt"
    out_txtfile = "input your wish path"
-    Utils().polish_log(in_logfile, out_txtfile)
+    # Utils().polish_log(in_logfile, out_txtfile)
+    Utils().pick_vote_log(in_logfile, out_txtfile)