From 9ad69e957f4040c149e4a2fd91c64c958416f9a4 Mon Sep 17 00:00:00 2001
From: Aria F <51890782+ariafyy@users.noreply.github.com>
Date: Thu, 19 Oct 2023 12:25:02 +0800
Subject: [PATCH 1/5] # feat: eval good team vote correct probability

---
 examples/werewolf_game/evals/eval.py  | 187 ++++++++++++++++++++++++++
 examples/werewolf_game/evals/utils.py |  54 +++++++-
 2 files changed, 240 insertions(+), 1 deletion(-)
 create mode 100644 examples/werewolf_game/evals/eval.py

diff --git a/examples/werewolf_game/evals/eval.py b/examples/werewolf_game/evals/eval.py
new file mode 100644
index 000000000..ba14c48c5
--- /dev/null
+++ b/examples/werewolf_game/evals/eval.py
@@ -0,0 +1,187 @@
+'''
+Filename: MetaGPT/examples/werewolf_game/evals/eval.py
+Created Date: Oct 18, 2023
+Updated Date: Oct 19, 2023
+Author: [Aria](https://github.com/ariafyy)
+Info: eval the vote correct probability of non_werewolves
+Files Tree:
+    evals
+    ├── 01-10
+    │         ├── ....txt
+    ├── 11-20
+    │         ├── ....txt
+    ├── 21-30
+    │         ├── ....txt
+    ├── outputs
+    │         ├──# 01-10_....txt
+'''
+
+from metagpt.const import PROJECT_ROOT
+from pathlib import Path
+import pandas as pd
+import re
+import json
+import os, glob
+from tqdm import tqdm
+from utils import Utils
+
+
+
+class Eval:
+    """Evaluation"""
+    def __init__(self):
+        self.OUT_PATH = PROJECT_ROOT / "examples/werewolf_game/evals/outputs"
+        os.makedirs(self.OUT_PATH, exist_ok=True)
+        self.SUB_FOLDER_LIST = ["01-10", "11-20", "21-30"]
+
+    def get_all_vote_fileslist(self):
+        files_list = []
+        for SUB_FOLDER in self.SUB_FOLDER_LIST:
+            ROOT_PATH = PROJECT_ROOT / ("examples/werewolf_game/evals/{}/").format(SUB_FOLDER)
+            tmp_files_list = Utils().get_file_list(ROOT_PATH)
+            files_list.extend(tmp_files_list)
+        return files_list
+
+    def inlogfile_to_votelog(self, files_list):
+        for i in tqdm(range(0, len(files_list))):
+            in_logfile = files_list[i]
+            SUB_FOLDER = (Path(in_logfile).parent).stem
+            out_txtfile = self.OUT_PATH / "# {0}_{1}.txt".format(SUB_FOLDER, Path(in_logfile).stem)
+            Utils().pick_vote_log(in_logfile, out_txtfile)
+
+    def get_picked_vote_texts(self):
+        files_list = self.get_all_vote_fileslist()
+        self.inlogfile_to_votelog(files_list)
+
+    @staticmethod
+    def parse_vote_text2chunks(text: str):
+        """
+        parse each game vote log into text chunks
+
+        one chunk example:
+        ['Player1', 'Player2', 'Player3', 'Player5', 'Player6']. Say ONLY: I vote to eliminate ...
+        Player1(Witch): 49 | I vote to eliminate Player5
+        Player2(Villager): 49 | I vote to eliminate Player5
+        Player3(Villager): 49 | I vote to eliminate Player5
+        Player5(Werewolf): 49 | I vote to eliminate Player6
+        Player6(Seer): 49 | I vote to eliminate Player5
+        """
+        pattern = re.compile(r"""\[([^\]]+)\]. Say ONLY: I vote to eliminate ...""")
+        chunks = {}
+        chunk_id = 0
+        last_end = 0
+        for match in pattern.finditer(text):
+            start = match.start()
+            chunk = text[last_end:start]
+            chunks[f'vote_{chunk_id}'] = chunk.strip()
+            last_end = match.end()
+            chunk_id += 1
+        final_chunk = text[last_end:].strip()
+        if final_chunk:
+            chunks[f'vote_{chunk_id}'] = final_chunk
+        return chunks
+
+
+    def get_vote_probability(self, text: str) -> float:
+        """
+        # calculate the probability of goodteam vote werewolves
+        :example:
+
+        input:
+        ['Player1', 'Player2', 'Player3', 'Player5', 'Player6']. Say ONLY: I vote to eliminate ...
+        Player1(Witch): 49 | I vote to eliminate Player5
+        Player2(Villager): 49 | I vote to eliminate Player5
+        Player3(Villager): 49 | I vote to eliminate Player5
+        Player5(Werewolf): 49 | I vote to eliminate Player6
+        Player6(Seer): 49 | I vote to eliminate Player5
+
+        output:
+        werewolves:  ['Player5']
+        non_werewolves: ['Player1', 'Player2', 'Player3', 'Player6']
+        as you can see :Player2(Villager) and   Player3(Villager) vote to eliminate Player5(Werewolf)
+        :return goodteam vote Probability: 100.00%
+        """
+        pattern = re.compile(r'(\w+)\(([^\)]+)\): \d+ \| I vote to eliminate (\w+)')
+        # find all werewolves
+        werewolves = []
+        for match in pattern.finditer(text):
+            if match.group(2) == 'Werewolf':
+                werewolves.append(match.group(1))
+
+        # find all non_werewolves
+        non_werewolves = []
+        for match in pattern.finditer(text):
+            if match.group(2) != 'Werewolf':
+                non_werewolves.append(match.group(1))
+        num_non_werewolves = len(non_werewolves)
+
+        # count players other than werewolves made the correct votes
+        correct_votes = 0
+        for match in pattern.finditer(text):
+            if match.group(2) != 'Werewolf' and match.group(3) in werewolves:
+                correct_votes += 1
+
+        # cal the probability of non_werewolves
+        prob = correct_votes / num_non_werewolves
+        good_probability = round(prob, 2)
+        return good_probability
+
+    def get_result_df(self, out_txtfile: str) -> pd.DataFrame:
+        """
+        folder:  sub folders for evals
+        file: evaluation file, each file represents one game
+        votes: the number of votes, eg. vote_1 represents the first vote of this game,
+        good_prob:the probability of a good person voting against a werewolf, 
+                   correct_votes / the total number of players other than werewolves
+        vote_count:the total number of votes cast
+        """
+        with open(out_txtfile, "r") as out_file:
+            text = out_file.read()
+            chunks = Eval().parse_vote_text2chunks(text)
+            res = []
+            for k, v in chunks.items():
+                if v != "":
+                    chunksList = list(chunks.keys())
+                    vote_count = len(chunksList) - 1
+                    good_probability = Eval().get_vote_probability(v)
+                    folder = Utils().filename_to_folder(out_txtfile)
+                    result = {
+                        "folder": folder,
+                        "file": Path(out_txtfile).stem + ".txt",
+                        "votes": k,
+                        "good_prob": good_probability,
+                        "vote_count": vote_count
+                    }
+                    res.append(result)
+        df = pd.DataFrame(res)
+        return df
+
+    def get_avg_prob_df(self):
+        """
+        get avg_prob for each game
+        avg_prob : the good_prob/total number of votes in the game
+        """
+        out_txtfile_list = Utils().get_file_list(self.OUT_PATH)
+        df_list = []
+        for i in tqdm(range(0, len(out_txtfile_list))):
+            out_txtfile = out_txtfile_list[i]
+            file_df = Eval().get_result_df(out_txtfile)
+            df_list.append(file_df)
+        combined_df = pd.concat(df_list, ignore_index=True)
+
+        # calculate the average good_prob for each file
+        mean_probs = combined_df.groupby('file')['good_prob'].mean()
+        combined_df['avg_prob'] = combined_df['file'].map(mean_probs)
+        combined_df['avg_prob'] = combined_df['avg_prob'].round(2)
+        combined_df['good_prob'] = combined_df['good_prob'].apply(lambda x: Utils()._float_to_percent(x))
+        combined_df['avg_prob'] = combined_df['avg_prob'].apply(lambda x: Utils()._float_to_percent(x))
+        return combined_df
+
+    def get_result_csv(self):
+        Eval().get_picked_vote_texts()
+        combined_df = self.get_avg_prob_df()
+        combined_df.to_csv(self.OUT_PATH / 'goodteam_vote_probability.csv', index=False)
+
+
+if __name__ == '__main__':
+    Eval().get_result_csv()
diff --git a/examples/werewolf_game/evals/utils.py b/examples/werewolf_game/evals/utils.py
index d788496a3..cc90d94cc 100644
--- a/examples/werewolf_game/evals/utils.py
+++ b/examples/werewolf_game/evals/utils.py
@@ -5,7 +5,7 @@ Author: [Aria](https://github.com/ariafyy)
 '''
 from metagpt.const import WORKSPACE_ROOT, PROJECT_ROOT
 import re
-
+import os,glob
 
 class Utils:
     """Utils: utils of logs"""
@@ -53,6 +53,58 @@ class Utils:
                 else:
                     out.write("\n")
 
+    @staticmethod
+    def pick_vote_log(in_logfile, out_txtfile):
+        """
+        pick the vote log from the log file.
+        ready to AnnounceGameResult serves as the 'key text' which indicates the end of the game.
+        """
+        pattern_vote = r'(Player\d+)\(([A-Za-z]+)\): (\d+) \| (I vote to eliminate Player\d+)'
+        key_text = r"ready to AnnounceGameResult"
+        pattern_moderator = r'\[([^\]]+)\]\. Say ONLY: I vote to eliminate ...'
+        with open(in_logfile, "r") as f, open(out_txtfile, "w") as out:
+            lines = f.readlines()
+            start_idx = -1
+            # find the index of key_text
+            for idx, line in enumerate(lines):
+                if key_text in line:
+                    start_idx = idx
+                    break
+
+            # if find the 'key_text'
+            if start_idx >= 0:
+                # start from 'key_text' to the end
+                relevant_lines = lines[start_idx:]
+                for line in relevant_lines:
+                    if re.search(pattern_vote, line):
+                        out.write(line)
+                    if re.search(pattern_moderator, line):
+                        out.write(line.lstrip())
+
+    @staticmethod
+    def get_file_list(path: str) -> list:
+        file_pattern = os.path.join(path, '*.txt')
+        files_list = glob.glob(file_pattern)
+        return files_list
+
+    @staticmethod
+    def _filename_to_folder(out_txtfile: str):
+        """convert filename into its parent folder name"""
+        s = Path(out_txtfile).stem
+        pattern_folder = r'(.+)_'
+        match = re.match(pattern_folder, s)
+        if match:
+            folder = match.group(1)
+            return folder
+
+    @staticmethod
+    def _float_to_percent(decimal: float) -> str:
+        """
+        input:  1.00
+        output: 100.00%
+        """
+        percent = decimal * 100
+        return f"{percent:.2f}%"
 
 if __name__ == '__main__':
     in_logfile = PROJECT_ROOT / "logs/log.txt"

From b8a30671d53676764685664580fa5eb2bc21b7e0 Mon Sep 17 00:00:00 2001
From: Aria F <51890782+ariafyy@users.noreply.github.com>
Date: Fri, 20 Oct 2023 10:01:05 +0800
Subject: [PATCH 2/5] =?UTF-8?q?#=20refactor=EF=BC=9Aeval=20for=20good=20te?=
 =?UTF-8?q?am=20vote;=20fix=20re=20extract=20folder=20name;=20feat=20vote1?=
 =?UTF-8?q?=20prob?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/werewolf_game/evals/eval.py  | 97 ++++++++++++++-------------
 examples/werewolf_game/evals/utils.py | 47 +++++++------
 2 files changed, 77 insertions(+), 67 deletions(-)

diff --git a/examples/werewolf_game/evals/eval.py b/examples/werewolf_game/evals/eval.py
index ba14c48c5..f4adee1d4 100644
--- a/examples/werewolf_game/evals/eval.py
+++ b/examples/werewolf_game/evals/eval.py
@@ -1,26 +1,16 @@
 '''
 Filename: MetaGPT/examples/werewolf_game/evals/eval.py
 Created Date: Oct 18, 2023
-Updated Date: Oct 19, 2023
+Revised Date: Oct 20, 2023
 Author: [Aria](https://github.com/ariafyy)
 Info: eval the vote correct probability of non_werewolves
-Files Tree:
-    evals
-    ├── 01-10
-    │         ├── ....txt
-    ├── 11-20
-    │         ├── ....txt
-    ├── 21-30
-    │         ├── ....txt
-    ├── outputs
-    │         ├──# 01-10_....txt
 '''
 
-from metagpt.const import PROJECT_ROOT
+from metagpt.const import WORKSPACE_ROOT, PROJECT_ROOT
 from pathlib import Path
 import pandas as pd
+import numpy as np
 import re
-import json
 import os, glob
 from tqdm import tqdm
 from utils import Utils
@@ -28,30 +18,25 @@ from utils import Utils
 
 
 class Eval:
-    """Evaluation"""
+    """Vote Evaluation"""
     def __init__(self):
-        self.OUT_PATH = PROJECT_ROOT / "examples/werewolf_game/evals/outputs"
+        self.OUT_PATH = WORKSPACE_ROOT / "outputs"
         os.makedirs(self.OUT_PATH, exist_ok=True)
         self.SUB_FOLDER_LIST = ["01-10", "11-20", "21-30"]
 
-    def get_all_vote_fileslist(self):
+    def _get_log_fileslist(self, IN_PATH) -> list[str]:
         files_list = []
         for SUB_FOLDER in self.SUB_FOLDER_LIST:
-            ROOT_PATH = PROJECT_ROOT / ("examples/werewolf_game/evals/{}/").format(SUB_FOLDER)
-            tmp_files_list = Utils().get_file_list(ROOT_PATH)
-            files_list.extend(tmp_files_list)
+            files_list.extend(glob.glob(str(IN_PATH / SUB_FOLDER / '*.txt')))
         return files_list
 
-    def inlogfile_to_votelog(self, files_list):
-        for i in tqdm(range(0, len(files_list))):
-            in_logfile = files_list[i]
+    def extract_votes_from_logs(self, files_list: list):
+        for in_logfile in tqdm(files_list):
             SUB_FOLDER = (Path(in_logfile).parent).stem
             out_txtfile = self.OUT_PATH / "# {0}_{1}.txt".format(SUB_FOLDER, Path(in_logfile).stem)
             Utils().pick_vote_log(in_logfile, out_txtfile)
-
-    def get_picked_vote_texts(self):
-        files_list = self.get_all_vote_fileslist()
-        self.inlogfile_to_votelog(files_list)
+        votefiles_list = Utils().get_file_list(self.OUT_PATH)
+        return votefiles_list
 
     @staticmethod
     def parse_vote_text2chunks(text: str):
@@ -141,10 +126,10 @@ class Eval:
             res = []
             for k, v in chunks.items():
                 if v != "":
-                    chunksList = list(chunks.keys())
-                    vote_count = len(chunksList) - 1
+                    chunks_list = list(chunks.keys())
+                    vote_count = len(chunks_list) - 1
                     good_probability = Eval().get_vote_probability(v)
-                    folder = Utils().filename_to_folder(out_txtfile)
+                    folder = Utils().filename_to_foldername(out_txtfile)
                     result = {
                         "folder": folder,
                         "file": Path(out_txtfile).stem + ".txt",
@@ -156,32 +141,52 @@ class Eval:
         df = pd.DataFrame(res)
         return df
 
-    def get_avg_prob_df(self):
+    def calc_avg_prob(self, IN_PATH) -> pd.DataFrame:
         """
         get avg_prob for each game
         avg_prob : the good_prob/total number of votes in the game
         """
-        out_txtfile_list = Utils().get_file_list(self.OUT_PATH)
-        df_list = []
-        for i in tqdm(range(0, len(out_txtfile_list))):
-            out_txtfile = out_txtfile_list[i]
-            file_df = Eval().get_result_df(out_txtfile)
-            df_list.append(file_df)
+        infiles_list = self._get_log_fileslist(IN_PATH)
+        votefiles_list = self.extract_votes_from_logs(infiles_list)
+        df_list = [self._load_df_from_file(file) for file in votefiles_list]
         combined_df = pd.concat(df_list, ignore_index=True)
-
         # calculate the average good_prob for each file
-        mean_probs = combined_df.groupby('file')['good_prob'].mean()
+        mean_probs = self._calculate_mean_probs(combined_df)
         combined_df['avg_prob'] = combined_df['file'].map(mean_probs)
-        combined_df['avg_prob'] = combined_df['avg_prob'].round(2)
-        combined_df['good_prob'] = combined_df['good_prob'].apply(lambda x: Utils()._float_to_percent(x))
-        combined_df['avg_prob'] = combined_df['avg_prob'].apply(lambda x: Utils()._float_to_percent(x))
+        # calculate vote1 prob
+        vote1_probs = self._calc_vote1_probs(combined_df)
+        combined_df['vote1_prob'] = combined_df['folder'].map(vote1_probs.set_index('folder')['good_prob'])
+        combined_df.loc[combined_df['votes'] != 'vote_1', 'vote1_prob'] = np.nan
+        combined_df['vote1_prob'] = combined_df['vote1_prob'].apply(self._format_probs)
+        combined_df['good_prob'] = combined_df['good_prob'].apply(self._format_probs)
+        combined_df['avg_prob'] = combined_df['avg_prob'].apply(self._format_probs)
+        combined_df.sort_values(['folder'], ascending=True, inplace=True)
         return combined_df
 
-    def get_result_csv(self):
-        Eval().get_picked_vote_texts()
-        combined_df = self.get_avg_prob_df()
-        combined_df.to_csv(self.OUT_PATH / 'goodteam_vote_probability.csv', index=False)
+    def _calc_vote1_probs(self, df):
+        df_vote1 = df[df['votes'] == 'vote_1']
+        vote1_probs = df_vote1.groupby('folder')['good_prob'].mean().reset_index()
+        return vote1_probs
+
+    def _load_df_from_file(self, file):
+        return self.get_result_df(file)
+
+    def _calculate_mean_probs(self, df):
+        return df.groupby('file')['good_prob'].mean()
+
+    def _format_probs(self, s):
+        return Utils().float_to_percent(s)
+
+    def get_eval_csv(self, IN_PATH, EVAL_RESULT):
+        """
+        IN_PATH : parent folder of ["01-10", "11-20", "21-30"]
+        EVAL_RESULT : output csv file path
+        """
+        combined_df = self.calc_avg_prob(IN_PATH)
+        combined_df.to_csv(EVAL_RESULT, index=False)
 
 
 if __name__ == '__main__':
-    Eval().get_result_csv()
+    IN_PATH = PROJECT_ROOT / "examples/werewolf_game/evals"
+    EVAL_RESULT = WORKSPACE_ROOT / "outputs" / 'goodteam_vote_probability.csv'
+    Eval().get_eval_csv(IN_PATH, EVAL_RESULT)
diff --git a/examples/werewolf_game/evals/utils.py b/examples/werewolf_game/evals/utils.py
index cc90d94cc..8f2b16db5 100644
--- a/examples/werewolf_game/evals/utils.py
+++ b/examples/werewolf_game/evals/utils.py
@@ -1,11 +1,13 @@
 '''
 Filename: MetaGPT/examples/werewolf_game/evals/utils.py
 Created Date: Oct 11, 2023
+Revised Date: Oct 20, 2023
 Author: [Aria](https://github.com/ariafyy)
 '''
 from metagpt.const import WORKSPACE_ROOT, PROJECT_ROOT
 import re
 import os,glob
+from pathlib import Path
 
 class Utils:
     """Utils: utils of logs"""
@@ -57,29 +59,27 @@ class Utils:
     def pick_vote_log(in_logfile, out_txtfile):
         """
         pick the vote log from the log file.
-        ready to AnnounceGameResult serves as the 'key text' which indicates the end of the game.
+        ready to AnnounceGameResult serves as the 'HINT_TEXT ' which indicates the end of the game.
         """
         pattern_vote = r'(Player\d+)\(([A-Za-z]+)\): (\d+) \| (I vote to eliminate Player\d+)'
-        key_text = r"ready to AnnounceGameResult"
+        HINT_TEXT = r"ready to AnnounceGameResult"
         pattern_moderator = r'\[([^\]]+)\]\. Say ONLY: I vote to eliminate ...'
-        with open(in_logfile, "r") as f, open(out_txtfile, "w") as out:
-            lines = f.readlines()
-            start_idx = -1
-            # find the index of key_text
-            for idx, line in enumerate(lines):
-                if key_text in line:
-                    start_idx = idx
-                    break
+        with open(in_logfile, "r") as f:
+            lines = f.read()
+            split_lines = lines.split(HINT_TEXT)
 
-            # if find the 'key_text'
-            if start_idx >= 0:
-                # start from 'key_text' to the end
-                relevant_lines = lines[start_idx:]
+            if len(split_lines) < 2:
+                print(f"Key text :{HINT_TEXT} not found in {in_logfile}")
+                return
+
+            relevant_lines = split_lines[1].split("\n")
+            with open(out_txtfile, "w") as out:
                 for line in relevant_lines:
                     if re.search(pattern_vote, line):
-                        out.write(line)
+                        out.write(line + "\n")
                     if re.search(pattern_moderator, line):
-                        out.write(line.lstrip())
+                        out.write(line.lstrip() + "\n")
+        
 
     @staticmethod
     def get_file_list(path: str) -> list:
@@ -88,17 +88,21 @@ class Utils:
         return files_list
 
     @staticmethod
-    def _filename_to_folder(out_txtfile: str):
-        """convert filename into its parent folder name"""
+    def filename_to_foldername(out_txtfile: str):
+        """
+        convert filename into its parent folder name
+        input:"....../# 01-10_10132100.txt"
+        output:# 01-10
+        """
         s = Path(out_txtfile).stem
-        pattern_folder = r'(.+)_'
+        pattern_folder = r'([^_]*)_'
         match = re.match(pattern_folder, s)
         if match:
             folder = match.group(1)
             return folder
 
     @staticmethod
-    def _float_to_percent(decimal: float) -> str:
+    def float_to_percent(decimal: float) -> str:
         """
         input:  1.00
         output: 100.00%
@@ -109,4 +113,5 @@ class Utils:
 if __name__ == '__main__':
     in_logfile = PROJECT_ROOT / "logs/log.txt"
     out_txtfile = "input your wish path"
-    Utils().polish_log(in_logfile, out_txtfile)
+    # Utils().polish_log(in_logfile, out_txtfile)
+    Utils().pick_vote_log(in_logfile, out_txtfile)

From 3b958fee9209dcd49090909f75881042f068630c Mon Sep 17 00:00:00 2001
From: Aria F <51890782+ariafyy@users.noreply.github.com>
Date: Fri, 20 Oct 2023 21:53:08 +0800
Subject: [PATCH 3/5] =?UTF-8?q?#feat:calc=20votewolf=5Fdifficulty;=20refac?=
 =?UTF-8?q?tor=EF=BC=9Avariable=20name;fix:ignore=20reflection=20vote?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 examples/werewolf_game/evals/eval.py  | 69 +++++++++++++++++----------
 examples/werewolf_game/evals/utils.py | 14 ++++--
 2 files changed, 56 insertions(+), 27 deletions(-)

diff --git a/examples/werewolf_game/evals/eval.py b/examples/werewolf_game/evals/eval.py
index f4adee1d4..f16a357e4 100644
--- a/examples/werewolf_game/evals/eval.py
+++ b/examples/werewolf_game/evals/eval.py
@@ -17,7 +17,7 @@ from utils import Utils
 
 
 
-class Eval:
+class Vote:
     """Vote Evaluation"""
     def __init__(self):
         self.OUT_PATH = WORKSPACE_ROOT / "outputs"
@@ -67,9 +67,12 @@ class Eval:
         return chunks
 
 
-    def get_vote_probability(self, text: str) -> float:
+    def get_vote_prob_difficulity(self, text: str) -> float:
         """
         # calculate the probability of goodteam vote werewolves
+        # vote_wolf_difficulty: num_voted_wolfs / num_living_players
+        sometimes werewolf will camouflage as a good person and vote wolf
+
         :example:
 
         input:
@@ -85,6 +88,7 @@ class Eval:
         non_werewolves: ['Player1', 'Player2', 'Player3', 'Player6']
         as you can see :Player2(Villager) and   Player3(Villager) vote to eliminate Player5(Werewolf)
         :return goodteam vote Probability: 100.00%
+        :return vote_wolf_difficulty: 4 / 5
         """
         pattern = re.compile(r'(\w+)\(([^\)]+)\): \d+ \| I vote to eliminate (\w+)')
         # find all werewolves
@@ -108,71 +112,88 @@ class Eval:
 
         # cal the probability of non_werewolves
         prob = correct_votes / num_non_werewolves
-        good_probability = round(prob, 2)
-        return good_probability
+        good_vote_prob = round(prob, 2)
+
+        # count the num of living players voting wolfs, ignore their positions
+        vote2eliminate_wolfs = []
+        for match in pattern.finditer(text):
+            if match.group(2) != 'Werewolf' and match.group(3) in werewolves:
+                correct_votes += 1
+            if match.group(3) in werewolves:
+                vote2eliminate_wolfs.append(match.group(3))
+        num_living_players = len(werewolves) + len(non_werewolves)
+        num_vote2eliminate_wolfs = len(set(vote2eliminate_wolfs))
+        votewolf_difficulty = "{0} / {1}".format(num_vote2eliminate_wolfs, num_living_players)
+        return good_vote_prob, votewolf_difficulty
+
 
     def get_result_df(self, out_txtfile: str) -> pd.DataFrame:
         """
         folder:  sub folders for evals
         file: evaluation file, each file represents one game
         votes: the number of votes, eg. vote_1 represents the first vote of this game,
-        good_prob:the probability of a good person voting against a werewolf, 
+        good_vote_prob:the probability of a good person voting against a werewolf, 
                    correct_votes / the total number of players other than werewolves
-        vote_count:the total number of votes cast
+        total_votes:the total number of votes cast
+        vote_wolf_difficulty: num_voted_wolfs / num_living_players
+        sometimes werewolf will camouflage as a good person and vote wolf
         """
         with open(out_txtfile, "r") as out_file:
             text = out_file.read()
-            chunks = Eval().parse_vote_text2chunks(text)
+            chunks = self.parse_vote_text2chunks(text)
             res = []
             for k, v in chunks.items():
                 if v != "":
                     chunks_list = list(chunks.keys())
-                    vote_count = len(chunks_list) - 1
-                    good_probability = Eval().get_vote_probability(v)
+                    total_votes = len(chunks_list) - 1
+                    good_vote_prob, votewolf_difficulty = self.get_vote_prob_difficulity(v)
                     folder = Utils().filename_to_foldername(out_txtfile)
                     result = {
                         "folder": folder,
                         "file": Path(out_txtfile).stem + ".txt",
-                        "votes": k,
-                        "good_prob": good_probability,
-                        "vote_count": vote_count
+                        "vote_round": k,
+                        "good_vote_prob": good_vote_prob,
+                        "total_votes": total_votes,
+                        "votewolf_difficulty": votewolf_difficulty
                     }
                     res.append(result)
         df = pd.DataFrame(res)
         return df
+ 
 
     def calc_avg_prob(self, IN_PATH) -> pd.DataFrame:
         """
         get avg_prob for each game
         avg_prob : the good_prob/total number of votes in the game
+        vote1_prob: only check vote round1 , eval the mean of good_vote_prob
         """
         infiles_list = self._get_log_fileslist(IN_PATH)
         votefiles_list = self.extract_votes_from_logs(infiles_list)
         df_list = [self._load_df_from_file(file) for file in votefiles_list]
         combined_df = pd.concat(df_list, ignore_index=True)
-        # calculate the average good_prob for each file
+        # calculate the average good_vote_prob for each file
         mean_probs = self._calculate_mean_probs(combined_df)
-        combined_df['avg_prob'] = combined_df['file'].map(mean_probs)
+        combined_df["avg_prob"] = combined_df["file"].map(mean_probs)
         # calculate vote1 prob
         vote1_probs = self._calc_vote1_probs(combined_df)
-        combined_df['vote1_prob'] = combined_df['folder'].map(vote1_probs.set_index('folder')['good_prob'])
-        combined_df.loc[combined_df['votes'] != 'vote_1', 'vote1_prob'] = np.nan
-        combined_df['vote1_prob'] = combined_df['vote1_prob'].apply(self._format_probs)
-        combined_df['good_prob'] = combined_df['good_prob'].apply(self._format_probs)
-        combined_df['avg_prob'] = combined_df['avg_prob'].apply(self._format_probs)
-        combined_df.sort_values(['folder'], ascending=True, inplace=True)
+        combined_df["vote1_prob"] = combined_df["folder"].map(vote1_probs.set_index("folder")["good_vote_prob"])
+        combined_df.loc[combined_df["vote_round"] != "vote_1", "vote1_prob"] = np.nan
+        combined_df["vote1_prob"] = combined_df["vote1_prob"].apply(self._format_probs)
+        combined_df["good_vote_prob"] = combined_df["good_vote_prob"].apply(self._format_probs)
+        combined_df["avg_prob"] = combined_df["avg_prob"].apply(self._format_probs)
+        combined_df.sort_values(["folder"], ascending=True, inplace=True)
         return combined_df
 
     def _calc_vote1_probs(self, df):
-        df_vote1 = df[df['votes'] == 'vote_1']
-        vote1_probs = df_vote1.groupby('folder')['good_prob'].mean().reset_index()
+        df_vote1 = df[df["vote_round"] == 'vote_1']
+        vote1_probs = df_vote1.groupby("folder")["good_vote_prob"].mean().reset_index()
         return vote1_probs
 
     def _load_df_from_file(self, file):
         return self.get_result_df(file)
 
     def _calculate_mean_probs(self, df):
-        return df.groupby('file')['good_prob'].mean()
+        return df.groupby("file")["good_vote_prob"].mean()
 
     def _format_probs(self, s):
         return Utils().float_to_percent(s)
@@ -189,4 +210,4 @@ class Eval:
 if __name__ == '__main__':
     IN_PATH = PROJECT_ROOT / "examples/werewolf_game/evals"
     EVAL_RESULT = WORKSPACE_ROOT / "outputs" / 'goodteam_vote_probability.csv'
-    Eval().get_eval_csv(IN_PATH, EVAL_RESULT)
+    Vote().get_eval_csv(IN_PATH, EVAL_RESULT)
diff --git a/examples/werewolf_game/evals/utils.py b/examples/werewolf_game/evals/utils.py
index 8f2b16db5..33632063f 100644
--- a/examples/werewolf_game/evals/utils.py
+++ b/examples/werewolf_game/evals/utils.py
@@ -60,10 +60,14 @@ class Utils:
         """
         pick the vote log from the log file.
         ready to AnnounceGameResult serves as the 'HINT_TEXT ' which indicates the end of the game.
+        based on bservation and reflection, then discuss is not in vote session.
         """
         pattern_vote = r'(Player\d+)\(([A-Za-z]+)\): (\d+) \| (I vote to eliminate Player\d+)'
+        ignore_text = """reflection"""
         HINT_TEXT = r"ready to AnnounceGameResult"
         pattern_moderator = r'\[([^\]]+)\]\. Say ONLY: I vote to eliminate ...'
+        in_valid_block = False
+
         with open(in_logfile, "r") as f:
             lines = f.read()
             split_lines = lines.split(HINT_TEXT)
@@ -75,12 +79,16 @@ class Utils:
             relevant_lines = split_lines[1].split("\n")
             with open(out_txtfile, "w") as out:
                 for line in relevant_lines:
-                    if re.search(pattern_vote, line):
-                        out.write(line + "\n")
                     if re.search(pattern_moderator, line):
+                        in_valid_block = True
                         out.write(line.lstrip() + "\n")
-        
 
+                    elif in_valid_block and re.search(pattern_vote, line):
+                        out.write(line + "\n")
+                    elif ignore_text in line:
+                        in_valid_block = False
+
+        
     @staticmethod
     def get_file_list(path: str) -> list:
         file_pattern = os.path.join(path, '*.txt')

From 5778b6ebfdb2b02533e7aeca12eb0747b1806af1 Mon Sep 17 00:00:00 2001
From: Aria F <51890782+ariafyy@users.noreply.github.com>
Date: Tue, 24 Oct 2023 11:40:54 +0800
Subject: [PATCH 4/5] fix: votewolf_difficulty bug; refactor :rename prob into
 rate; perf: py func structure

---
 examples/werewolf_game/evals/eval.py  | 106 +++++++++++++-------------
 examples/werewolf_game/evals/utils.py |   1 -
 2 files changed, 54 insertions(+), 53 deletions(-)

diff --git a/examples/werewolf_game/evals/eval.py b/examples/werewolf_game/evals/eval.py
index f16a357e4..7d8a0e58e 100644
--- a/examples/werewolf_game/evals/eval.py
+++ b/examples/werewolf_game/evals/eval.py
@@ -3,7 +3,7 @@ Filename: MetaGPT/examples/werewolf_game/evals/eval.py
 Created Date: Oct 18, 2023
 Revised Date: Oct 20, 2023
 Author: [Aria](https://github.com/ariafyy)
-Info: eval the vote correct probability of non_werewolves
+Info: eval the Voting Accuracy Rate of non_werewolves and Vote Difficulity 
 '''
 
 from metagpt.const import WORKSPACE_ROOT, PROJECT_ROOT
@@ -66,13 +66,9 @@ class Vote:
             chunks[f'vote_{chunk_id}'] = final_chunk
         return chunks
 
-
-    def get_vote_prob_difficulity(self, text: str) -> float:
+    def _vote_rate_players(self, text: str):
         """
-        # calculate the probability of goodteam vote werewolves
-        # vote_wolf_difficulty: num_voted_wolfs / num_living_players
-        sometimes werewolf will camouflage as a good person and vote wolf
-
+        # calculate the rateability of goodteam vote werewolves
         :example:
 
         input:
@@ -87,13 +83,13 @@ class Vote:
         werewolves:  ['Player5']
         non_werewolves: ['Player1', 'Player2', 'Player3', 'Player6']
         as you can see :Player2(Villager) and   Player3(Villager) vote to eliminate Player5(Werewolf)
-        :return goodteam vote Probability: 100.00%
-        :return vote_wolf_difficulty: 4 / 5
+        :return goodteam vote rateability: 100.00%
         """
         pattern = re.compile(r'(\w+)\(([^\)]+)\): \d+ \| I vote to eliminate (\w+)')
         # find all werewolves
         werewolves = []
         for match in pattern.finditer(text):
+
             if match.group(2) == 'Werewolf':
                 werewolves.append(match.group(1))
 
@@ -110,33 +106,37 @@ class Vote:
             if match.group(2) != 'Werewolf' and match.group(3) in werewolves:
                 correct_votes += 1
 
-        # cal the probability of non_werewolves
-        prob = correct_votes / num_non_werewolves
-        good_vote_prob = round(prob, 2)
+        # cal the rateability of non_werewolves
+        rate = correct_votes / num_non_werewolves
+        good_vote_rate = round(rate, 2)
+        return {"good_vote_rate": good_vote_rate, "werewolves": werewolves, "non_werewolves": non_werewolves}
 
-        # count the num of living players voting wolfs, ignore their positions
-        vote2eliminate_wolfs = []
-        for match in pattern.finditer(text):
-            if match.group(2) != 'Werewolf' and match.group(3) in werewolves:
-                correct_votes += 1
-            if match.group(3) in werewolves:
-                vote2eliminate_wolfs.append(match.group(3))
+    def get_goodteam_vote_rate(self, text: str) -> float:
+        goodteam_vote_rate = self._vote_rate_players(text)["good_vote_rate"]
+        return goodteam_vote_rate
+
+    def get_werewolves(self, text: str) -> list:
+        werewolves_list = self._vote_rate_players(text)["werewolves"]
+        return werewolves_list
+
+    def get_non_werewolves(self, text: str) -> list:
+        non_werewolves_list = self._vote_rate_players(text)["non_werewolves"]
+        return non_werewolves_list
+
+    def get_votewolf_difficulty(self, werewolves: list, non_werewolves: list) -> str:
+        num_living_wolfs = len(werewolves)
         num_living_players = len(werewolves) + len(non_werewolves)
-        num_vote2eliminate_wolfs = len(set(vote2eliminate_wolfs))
-        votewolf_difficulty = "{0} / {1}".format(num_vote2eliminate_wolfs, num_living_players)
-        return good_vote_prob, votewolf_difficulty
-
+        votewolf_difficulty = "_{0} / {1}".format(num_living_wolfs, num_living_players)
+        return votewolf_difficulty
 
     def get_result_df(self, out_txtfile: str) -> pd.DataFrame:
         """
         folder:  sub folders for evals
         file: evaluation file, each file represents one game
         votes: the number of votes, eg. vote_1 represents the first vote of this game,
-        good_vote_prob:the probability of a good person voting against a werewolf, 
+        good_vote_rate:the rateability of a good person voting against a werewolf,
                    correct_votes / the total number of players other than werewolves
         total_votes:the total number of votes cast
-        vote_wolf_difficulty: num_voted_wolfs / num_living_players
-        sometimes werewolf will camouflage as a good person and vote wolf
         """
         with open(out_txtfile, "r") as out_file:
             text = out_file.read()
@@ -146,56 +146,58 @@ class Vote:
                 if v != "":
                     chunks_list = list(chunks.keys())
                     total_votes = len(chunks_list) - 1
-                    good_vote_prob, votewolf_difficulty = self.get_vote_prob_difficulity(v)
+                    werewolves = self.get_werewolves(v)
+                    non_werewolves = self.get_non_werewolves(v)
+                    good_vote_rate = self.get_goodteam_vote_rate(v)
+                    votewolf_difficulty = self.get_votewolf_difficulty(werewolves, non_werewolves)
                     folder = Utils().filename_to_foldername(out_txtfile)
                     result = {
                         "folder": folder,
                         "file": Path(out_txtfile).stem + ".txt",
                         "vote_round": k,
-                        "good_vote_prob": good_vote_prob,
+                        "good_vote_rate": good_vote_rate,
                         "total_votes": total_votes,
                         "votewolf_difficulty": votewolf_difficulty
                     }
                     res.append(result)
         df = pd.DataFrame(res)
         return df
- 
 
-    def calc_avg_prob(self, IN_PATH) -> pd.DataFrame:
+    def calc_avg_rate(self, IN_PATH) -> pd.DataFrame:
         """
-        get avg_prob for each game
-        avg_prob : the good_prob/total number of votes in the game
-        vote1_prob: only check vote round1 , eval the mean of good_vote_prob
+        get avg_rate for each game
+        avg_rate : the good_rate/total number of votes in the game
+        vote1_rate: First Round Voting Accuracy Rate
         """
         infiles_list = self._get_log_fileslist(IN_PATH)
         votefiles_list = self.extract_votes_from_logs(infiles_list)
         df_list = [self._load_df_from_file(file) for file in votefiles_list]
         combined_df = pd.concat(df_list, ignore_index=True)
-        # calculate the average good_vote_prob for each file
-        mean_probs = self._calculate_mean_probs(combined_df)
-        combined_df["avg_prob"] = combined_df["file"].map(mean_probs)
-        # calculate vote1 prob
-        vote1_probs = self._calc_vote1_probs(combined_df)
-        combined_df["vote1_prob"] = combined_df["folder"].map(vote1_probs.set_index("folder")["good_vote_prob"])
-        combined_df.loc[combined_df["vote_round"] != "vote_1", "vote1_prob"] = np.nan
-        combined_df["vote1_prob"] = combined_df["vote1_prob"].apply(self._format_probs)
-        combined_df["good_vote_prob"] = combined_df["good_vote_prob"].apply(self._format_probs)
-        combined_df["avg_prob"] = combined_df["avg_prob"].apply(self._format_probs)
-        combined_df.sort_values(["folder"], ascending=True, inplace=True)
+        # calculate the average good_vote_rate for each file
+        mean_rates = self._calculate_mean_rates(combined_df)
+        combined_df["avg_rate"] = combined_df["file"].map(mean_rates)
+        # calculate vote1 rate
+        vote1_rates = self._calc_vote1_rates(combined_df)
+        combined_df["vote1_rate"] = combined_df["folder"].map(vote1_rates.set_index("folder")["good_vote_rate"])
+        combined_df.loc[combined_df["vote_round"] != "vote_1", "vote1_rate"] = np.nan
+        combined_df["vote1_rate"] = combined_df["vote1_rate"].apply(self._format_rates)
+        combined_df["good_vote_rate"] = combined_df["good_vote_rate"].apply(self._format_rates)
+        combined_df["avg_rate"] = combined_df["avg_rate"].apply(self._format_rates)
+        combined_df.sort_values(["file"], ascending=True, inplace=True)
         return combined_df
 
-    def _calc_vote1_probs(self, df):
+    def _calc_vote1_rates(self, df):
         df_vote1 = df[df["vote_round"] == 'vote_1']
-        vote1_probs = df_vote1.groupby("folder")["good_vote_prob"].mean().reset_index()
-        return vote1_probs
+        vote1_rates = df_vote1.groupby("folder")["good_vote_rate"].mean().reset_index()
+        return vote1_rates
 
     def _load_df_from_file(self, file):
         return self.get_result_df(file)
 
-    def _calculate_mean_probs(self, df):
-        return df.groupby("file")["good_vote_prob"].mean()
+    def _calculate_mean_rates(self, df):
+        return df.groupby("file")["good_vote_rate"].mean()
 
-    def _format_probs(self, s):
+    def _format_rates(self, s):
         return Utils().float_to_percent(s)
 
     def get_eval_csv(self, IN_PATH, EVAL_RESULT):
@@ -203,11 +205,11 @@ class Vote:
         IN_PATH : parent folder of ["01-10", "11-20", "21-30"]
         EVAL_RESULT : output csv file path
         """
-        combined_df = self.calc_avg_prob(IN_PATH)
+        combined_df = self.calc_avg_rate(IN_PATH)
         combined_df.to_csv(EVAL_RESULT, index=False)
 
 
 if __name__ == '__main__':
     IN_PATH = PROJECT_ROOT / "examples/werewolf_game/evals"
-    EVAL_RESULT = WORKSPACE_ROOT / "outputs" / 'goodteam_vote_probability.csv'
+    EVAL_RESULT = WORKSPACE_ROOT / "outputs" / 'goodteam_vote_rate.csv'
     Vote().get_eval_csv(IN_PATH, EVAL_RESULT)
diff --git a/examples/werewolf_game/evals/utils.py b/examples/werewolf_game/evals/utils.py
index 33632063f..a3a5c539a 100644
--- a/examples/werewolf_game/evals/utils.py
+++ b/examples/werewolf_game/evals/utils.py
@@ -88,7 +88,6 @@ class Utils:
                     elif ignore_text in line:
                         in_valid_block = False
 
-        
     @staticmethod
     def get_file_list(path: str) -> list:
         file_pattern = os.path.join(path, '*.txt')

From c0d6afc6d7b3c50d7ecedb306d4805a2df71fa10 Mon Sep 17 00:00:00 2001
From: Aria F <51890782+ariafyy@users.noreply.github.com>
Date: Tue, 24 Oct 2023 11:51:17 +0800
Subject: [PATCH 5/5] style: fix typo

---
 examples/werewolf_game/evals/eval.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/werewolf_game/evals/eval.py b/examples/werewolf_game/evals/eval.py
index 7d8a0e58e..8734f438d 100644
--- a/examples/werewolf_game/evals/eval.py
+++ b/examples/werewolf_game/evals/eval.py
@@ -1,7 +1,7 @@
 '''
 Filename: MetaGPT/examples/werewolf_game/evals/eval.py
 Created Date: Oct 18, 2023
-Revised Date: Oct 20, 2023
+Updated Date: Oct 24, 2023
 Author: [Aria](https://github.com/ariafyy)
 Info: eval the Voting Accuracy Rate of non_werewolves and Vote Difficulity 
 '''
@@ -68,7 +68,7 @@ class Vote:
 
     def _vote_rate_players(self, text: str):
         """
-        # calculate the rateability of goodteam vote werewolves
+        # calculate the rate of goodteam vote werewolves
         :example:
 
         input: