diff --git a/examples/werewolf_game/evals/eval.py b/examples/werewolf_game/evals/eval.py new file mode 100644 index 000000000..8734f438d --- /dev/null +++ b/examples/werewolf_game/evals/eval.py @@ -0,0 +1,215 @@ +''' +Filename: MetaGPT/examples/werewolf_game/evals/eval.py +Created Date: Oct 18, 2023 +Updated Date: Oct 24, 2023 +Author: [Aria](https://github.com/ariafyy) +Info: eval the Voting Accuracy Rate of non_werewolves and Vote Difficulity +''' + +from metagpt.const import WORKSPACE_ROOT, PROJECT_ROOT +from pathlib import Path +import pandas as pd +import numpy as np +import re +import os, glob +from tqdm import tqdm +from utils import Utils + + + +class Vote: + """Vote Evaluation""" + def __init__(self): + self.OUT_PATH = WORKSPACE_ROOT / "outputs" + os.makedirs(self.OUT_PATH, exist_ok=True) + self.SUB_FOLDER_LIST = ["01-10", "11-20", "21-30"] + + def _get_log_fileslist(self, IN_PATH) -> list[str]: + files_list = [] + for SUB_FOLDER in self.SUB_FOLDER_LIST: + files_list.extend(glob.glob(str(IN_PATH / SUB_FOLDER / '*.txt'))) + return files_list + + def extract_votes_from_logs(self, files_list: list): + for in_logfile in tqdm(files_list): + SUB_FOLDER = (Path(in_logfile).parent).stem + out_txtfile = self.OUT_PATH / "# {0}_{1}.txt".format(SUB_FOLDER, Path(in_logfile).stem) + Utils().pick_vote_log(in_logfile, out_txtfile) + votefiles_list = Utils().get_file_list(self.OUT_PATH) + return votefiles_list + + @staticmethod + def parse_vote_text2chunks(text: str): + """ + parse each game vote log into text chunks + + one chunk example: + ['Player1', 'Player2', 'Player3', 'Player5', 'Player6']. Say ONLY: I vote to eliminate ... + Player1(Witch): 49 | I vote to eliminate Player5 + Player2(Villager): 49 | I vote to eliminate Player5 + Player3(Villager): 49 | I vote to eliminate Player5 + Player5(Werewolf): 49 | I vote to eliminate Player6 + Player6(Seer): 49 | I vote to eliminate Player5 + """ + pattern = re.compile(r"""\[([^\]]+)\]. Say ONLY: I vote to eliminate ...""") + chunks = {} + chunk_id = 0 + last_end = 0 + for match in pattern.finditer(text): + start = match.start() + chunk = text[last_end:start] + chunks[f'vote_{chunk_id}'] = chunk.strip() + last_end = match.end() + chunk_id += 1 + final_chunk = text[last_end:].strip() + if final_chunk: + chunks[f'vote_{chunk_id}'] = final_chunk + return chunks + + def _vote_rate_players(self, text: str): + """ + # calculate the rate of goodteam vote werewolves + :example: + + input: + ['Player1', 'Player2', 'Player3', 'Player5', 'Player6']. Say ONLY: I vote to eliminate ... + Player1(Witch): 49 | I vote to eliminate Player5 + Player2(Villager): 49 | I vote to eliminate Player5 + Player3(Villager): 49 | I vote to eliminate Player5 + Player5(Werewolf): 49 | I vote to eliminate Player6 + Player6(Seer): 49 | I vote to eliminate Player5 + + output: + werewolves: ['Player5'] + non_werewolves: ['Player1', 'Player2', 'Player3', 'Player6'] + as you can see :Player2(Villager) and Player3(Villager) vote to eliminate Player5(Werewolf) + :return goodteam vote rateability: 100.00% + """ + pattern = re.compile(r'(\w+)\(([^\)]+)\): \d+ \| I vote to eliminate (\w+)') + # find all werewolves + werewolves = [] + for match in pattern.finditer(text): + + if match.group(2) == 'Werewolf': + werewolves.append(match.group(1)) + + # find all non_werewolves + non_werewolves = [] + for match in pattern.finditer(text): + if match.group(2) != 'Werewolf': + non_werewolves.append(match.group(1)) + num_non_werewolves = len(non_werewolves) + + # count players other than werewolves made the correct votes + correct_votes = 0 + for match in pattern.finditer(text): + if match.group(2) != 'Werewolf' and match.group(3) in werewolves: + correct_votes += 1 + + # cal the rateability of non_werewolves + rate = correct_votes / num_non_werewolves + good_vote_rate = round(rate, 2) + return {"good_vote_rate": good_vote_rate, "werewolves": werewolves, "non_werewolves": non_werewolves} + + def get_goodteam_vote_rate(self, text: str) -> float: + goodteam_vote_rate = self._vote_rate_players(text)["good_vote_rate"] + return goodteam_vote_rate + + def get_werewolves(self, text: str) -> list: + werewolves_list = self._vote_rate_players(text)["werewolves"] + return werewolves_list + + def get_non_werewolves(self, text: str) -> list: + non_werewolves_list = self._vote_rate_players(text)["non_werewolves"] + return non_werewolves_list + + def get_votewolf_difficulty(self, werewolves: list, non_werewolves: list) -> str: + num_living_wolfs = len(werewolves) + num_living_players = len(werewolves) + len(non_werewolves) + votewolf_difficulty = "_{0} / {1}".format(num_living_wolfs, num_living_players) + return votewolf_difficulty + + def get_result_df(self, out_txtfile: str) -> pd.DataFrame: + """ + folder: sub folders for evals + file: evaluation file, each file represents one game + votes: the number of votes, eg. vote_1 represents the first vote of this game, + good_vote_rate:the rateability of a good person voting against a werewolf, + correct_votes / the total number of players other than werewolves + total_votes:the total number of votes cast + """ + with open(out_txtfile, "r") as out_file: + text = out_file.read() + chunks = self.parse_vote_text2chunks(text) + res = [] + for k, v in chunks.items(): + if v != "": + chunks_list = list(chunks.keys()) + total_votes = len(chunks_list) - 1 + werewolves = self.get_werewolves(v) + non_werewolves = self.get_non_werewolves(v) + good_vote_rate = self.get_goodteam_vote_rate(v) + votewolf_difficulty = self.get_votewolf_difficulty(werewolves, non_werewolves) + folder = Utils().filename_to_foldername(out_txtfile) + result = { + "folder": folder, + "file": Path(out_txtfile).stem + ".txt", + "vote_round": k, + "good_vote_rate": good_vote_rate, + "total_votes": total_votes, + "votewolf_difficulty": votewolf_difficulty + } + res.append(result) + df = pd.DataFrame(res) + return df + + def calc_avg_rate(self, IN_PATH) -> pd.DataFrame: + """ + get avg_rate for each game + avg_rate : the good_rate/total number of votes in the game + vote1_rate: First Round Voting Accuracy Rate + """ + infiles_list = self._get_log_fileslist(IN_PATH) + votefiles_list = self.extract_votes_from_logs(infiles_list) + df_list = [self._load_df_from_file(file) for file in votefiles_list] + combined_df = pd.concat(df_list, ignore_index=True) + # calculate the average good_vote_rate for each file + mean_rates = self._calculate_mean_rates(combined_df) + combined_df["avg_rate"] = combined_df["file"].map(mean_rates) + # calculate vote1 rate + vote1_rates = self._calc_vote1_rates(combined_df) + combined_df["vote1_rate"] = combined_df["folder"].map(vote1_rates.set_index("folder")["good_vote_rate"]) + combined_df.loc[combined_df["vote_round"] != "vote_1", "vote1_rate"] = np.nan + combined_df["vote1_rate"] = combined_df["vote1_rate"].apply(self._format_rates) + combined_df["good_vote_rate"] = combined_df["good_vote_rate"].apply(self._format_rates) + combined_df["avg_rate"] = combined_df["avg_rate"].apply(self._format_rates) + combined_df.sort_values(["file"], ascending=True, inplace=True) + return combined_df + + def _calc_vote1_rates(self, df): + df_vote1 = df[df["vote_round"] == 'vote_1'] + vote1_rates = df_vote1.groupby("folder")["good_vote_rate"].mean().reset_index() + return vote1_rates + + def _load_df_from_file(self, file): + return self.get_result_df(file) + + def _calculate_mean_rates(self, df): + return df.groupby("file")["good_vote_rate"].mean() + + def _format_rates(self, s): + return Utils().float_to_percent(s) + + def get_eval_csv(self, IN_PATH, EVAL_RESULT): + """ + IN_PATH : parent folder of ["01-10", "11-20", "21-30"] + EVAL_RESULT : output csv file path + """ + combined_df = self.calc_avg_rate(IN_PATH) + combined_df.to_csv(EVAL_RESULT, index=False) + + +if __name__ == '__main__': + IN_PATH = PROJECT_ROOT / "examples/werewolf_game/evals" + EVAL_RESULT = WORKSPACE_ROOT / "outputs" / 'goodteam_vote_rate.csv' + Vote().get_eval_csv(IN_PATH, EVAL_RESULT) diff --git a/examples/werewolf_game/evals/utils.py b/examples/werewolf_game/evals/utils.py index d788496a3..a3a5c539a 100644 --- a/examples/werewolf_game/evals/utils.py +++ b/examples/werewolf_game/evals/utils.py @@ -1,11 +1,13 @@ ''' Filename: MetaGPT/examples/werewolf_game/evals/utils.py Created Date: Oct 11, 2023 +Revised Date: Oct 20, 2023 Author: [Aria](https://github.com/ariafyy) ''' from metagpt.const import WORKSPACE_ROOT, PROJECT_ROOT import re - +import os,glob +from pathlib import Path class Utils: """Utils: utils of logs""" @@ -53,8 +55,70 @@ class Utils: else: out.write("\n") + @staticmethod + def pick_vote_log(in_logfile, out_txtfile): + """ + pick the vote log from the log file. + ready to AnnounceGameResult serves as the 'HINT_TEXT ' which indicates the end of the game. + based on bservation and reflection, then discuss is not in vote session. + """ + pattern_vote = r'(Player\d+)\(([A-Za-z]+)\): (\d+) \| (I vote to eliminate Player\d+)' + ignore_text = """reflection""" + HINT_TEXT = r"ready to AnnounceGameResult" + pattern_moderator = r'\[([^\]]+)\]\. Say ONLY: I vote to eliminate ...' + in_valid_block = False + + with open(in_logfile, "r") as f: + lines = f.read() + split_lines = lines.split(HINT_TEXT) + + if len(split_lines) < 2: + print(f"Key text :{HINT_TEXT} not found in {in_logfile}") + return + + relevant_lines = split_lines[1].split("\n") + with open(out_txtfile, "w") as out: + for line in relevant_lines: + if re.search(pattern_moderator, line): + in_valid_block = True + out.write(line.lstrip() + "\n") + + elif in_valid_block and re.search(pattern_vote, line): + out.write(line + "\n") + elif ignore_text in line: + in_valid_block = False + + @staticmethod + def get_file_list(path: str) -> list: + file_pattern = os.path.join(path, '*.txt') + files_list = glob.glob(file_pattern) + return files_list + + @staticmethod + def filename_to_foldername(out_txtfile: str): + """ + convert filename into its parent folder name + input:"....../# 01-10_10132100.txt" + output:# 01-10 + """ + s = Path(out_txtfile).stem + pattern_folder = r'([^_]*)_' + match = re.match(pattern_folder, s) + if match: + folder = match.group(1) + return folder + + @staticmethod + def float_to_percent(decimal: float) -> str: + """ + input: 1.00 + output: 100.00% + """ + percent = decimal * 100 + return f"{percent:.2f}%" if __name__ == '__main__': in_logfile = PROJECT_ROOT / "logs/log.txt" out_txtfile = "input your wish path" - Utils().polish_log(in_logfile, out_txtfile) + # Utils().polish_log(in_logfile, out_txtfile) + Utils().pick_vote_log(in_logfile, out_txtfile)