feat: polish log files for evals

This commit is contained in:
Aria F 2023-10-10 20:29:19 +08:00 committed by GitHub
parent 48043f4f9d
commit c55608cb76
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1,120 +1,68 @@
'''
Filename: MetaGPT/examples/werewolf_game/evals/utils.py
Created Date: Oct 2, 2023
Created Date: Oct 10, 2023
Author: [Aria](https://github.com/ariafyy)
'''
from metagpt.const import WORKSPACE_ROOT, PROJECT_ROOT
import re, json
import re
class Utils:
"""Utils: utils of logs"""
def __init__(self):
pass
def _action(self, text: str) -> str:
"""
# get action
input: I vote to eliminate Player3
output: vote
"""
text = text.lower()
if "vote" in text:
action = "vote"
return action
if "verify" in text:
action = "verify"
return action
if "kill" in text:
action = "kill"
return action
else:
action = "chat"
return action
def polish_log(self, in_logfile, out_txtfile):
"""polish logs for evaluation"""
pattern_text = r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) \| (\w+) +\| ([\w\.]+:\w+:\d+) - (.*\S)"
pattern_player = r"(Player(\d{1}): \w+)"
pattern_start = False
json_start = False
def _life(self, text: str) -> str:
"""
# get life
input: Kill Player6
output: dead
"""
text = text.lower()
if re.search(r'\b(eliminated|killed|kill)\b', text, re.I):
life = 'dead'
dead_role = re.findall(r'\[(.*?)\]', text)
if re.search(r'no one was killed', text, re.I):
return "alive", []
else:
return life, dead_role
else:
life = "alive"
return life, []
with open(in_logfile, "r") as f, open(out_txtfile, "w") as out:
for line in f.readlines():
matches = re.match(pattern_text, line)
if matches:
message = matches.group(4).strip()
pattern_start = True
json_start = False
def txt2data(self, file):
"""
input: .txt file
output: data for json format
"""
result = {}
count = 0
day = -1
flag = False
with open(file, "r") as f:
lines = f.readlines()
for line in lines:
if "Moderator(Moderator): 0" in line:
flag = True
if flag:
if "Its dark, everyone close your eyes." in line:
day += 1
count = 0
data = {}
parts = line.split("|")
data["role"] = parts[0].strip().split(":")[0]
data["day"] = day
data["turn"] = count
if len(parts) > 1:
data["text"] = parts[1].strip()
data["action"] = self._action(data["text"])
data["life"], data["dead_role"] = self._life(data["text"])
if "Moderator(Moderator) ready to InstructSpeak" not in message and "Moderator(Moderator) ready to ParseSpeak" not in message and "Total running cost:" not in message:
out.write("- " + message + '\n')
else:
continue
key = "day_{}".format(day)
if key not in result:
result[key] = []
result[key].append(data)
count += 1
return result
out.write('\n')
def data2json(self, in_file):
"""
output examples:
{
"day_0": [
{
"role": "Moderator(Moderator)",
"day": 0,
"turn": 0,
"text": "Its dark, everyone close your eyes. I will talk with you/your team secretly at night.",
"action": "chat",
"life": "alive",
"dead_role": []
},{}]
...
}
"""
elif pattern_start and not matches:
if "gpt-4 may update over time" in line:
line = ""
out.write(line)
result = self.txt2data(in_file)
self._save_json(result)
return result
elif line.strip().startswith("{"):
out.write(line.strip())
json_start = True
def _save_json(self, data):
with open(WORKSPACE_ROOT / 'werewolf_transcript.json', "w", encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
f.write('\n')
elif json_start and not line.strip().endswith("}"):
out.write(line.strip())
elif json_start and line.strip().endswith("}"):
out.write(line.strip())
json_start = False
elif line.startswith("(User):"):
out.write(line)
elif line.startswith("********** STEP:"):
out.write(line)
elif re.search(pattern_player, line):
out.write(line)
else:
out.write("\n")
if __name__ == '__main__':
txt_path = WORKSPACE_ROOT / "werewolf_transcript.txt"
log_path = PROJECT_ROOT / "logs/log.txt"
Utils().data2json(txt_path)
in_logfile = PROJECT_ROOT / "logs/log.txt"
out_txtfile = "input your wish path"
Utils().polish_log(in_logfile, out_txtfile)