introduce experience

This commit is contained in:
garylin2099 2023-10-13 21:25:26 +08:00
parent 5026a022e1
commit 4c0ed559cc
8 changed files with 212 additions and 48 deletions

View file

@ -8,11 +8,12 @@ class Speak(Action):
PROMPT_TEMPLATE = """
{
"BACKGROUND": "It's a Werewolf game, you are __profile__, say whatever possible to increase your chance of win"
"BACKGROUND": "It's a Werewolf game, in this game, we have 2 werewolves, 2 villagers, 1 guard, 1 witch, 1 seer. You are __profile__."
,"HISTORY": "You have knowledge to the following conversation: __context__"
,"ATTENTION": "You can NOT VOTE a player who is NOT ALIVE now!"
,"REFLECTION": "__reflection__"
,"STRATEGY": __strategy__
,"PAST_EXPERIENCES": "__experiences__"
,"MODERATOR_INSTRUCTION": __latest_instruction__,
,"RULE": "Please follow the moderator's latest instruction, figure out if you need to speak your opinion or directly to vote:
1. If the instruction is to SPEAK, speak in 200 words. Remember the goal of your role and try to achieve it using your speech;
@ -21,8 +22,10 @@ class Speak(Action):
{
"ROLE": "Your role, in this case, __profile__"
,"PLAYER_NAME": "Your name, in this case, __name__"
,"LIVING_PLAYERS": "List living players based on MODERATOR_INSTRUCTION. Return a LIST datatype."
,"THOUGHTS": "Based on `MODERATOR_INSTRUCTION` and `RULE`, carefully think about what to say or vote so that your chance of win as __profile__ maximizes. Give your step-by-step thought process, you should think no more than 3 steps. For example: My step-by-step thought process:..."
,"LIVING_PLAYERS": "List living players based on MODERATOR_INSTRUCTION. Return a json LIST datatype."
,"THOUGHTS": "Based on `MODERATOR_INSTRUCTION` and `RULE`, carefully think about what to say or vote so that your chance of win as __profile__ maximizes.
If you find similar situation in `PAST_EXPERIENCES`, you may draw lessons from them to refine your strategy, take better vote action, or improve your speech.
Give your step-by-step thought process, you should think no more than 3 steps. For example: My step-by-step thought process:..."
,"RESPONSE": "Based on `MODERATOR_INSTRUCTION`, `RULE`, and the 'THOUGHTS' you had, express your opinion or cast a vote."
}
}
@ -36,21 +39,19 @@ class Speak(Action):
super().__init__(name, context, llm)
@retry(stop=stop_after_attempt(2), wait=wait_fixed(1))
async def run(self, profile: str, name: str, context: str, latest_instruction: str, reflection: str = ""):
async def run(self, profile: str, name: str, context: str, latest_instruction: str, reflection: str = "", experiences: str = ""):
prompt = (
self.PROMPT_TEMPLATE.replace("__context__", context).replace("__profile__", profile)
.replace("__name__", name).replace("__latest_instruction__", latest_instruction)
.replace("__strategy__", self.STRATEGY).replace("__reflection__", reflection)
.replace("__experiences__", experiences)
)
rsp = await self._aask(prompt)
rsp = rsp.replace("\n", " ")
rsp_json = json.loads(rsp)
with open(WORKSPACE_ROOT / 'speak.txt', 'a') as f:
f.write(rsp)
return rsp_json['RESPONSE']
class NighttimeWhispers(Action):
@ -94,18 +95,19 @@ class NighttimeWhispers(Action):
PROMPT_TEMPLATE = """
{
"ROLE": "__profile__"
"BACKGROUND": "It's a Werewolf game, in this game, we have 2 werewolves, 2 villagers, 1 guard, 1 witch, 1 seer. You are __profile__."
,"HISTORY": "You have knowledge to the following conversation: __context__"
,"ACTION": "Choose one living player to __action__."
,"ATTENTION": "1. You can only __action__ a player who is alive this night! And you can not __action__ a player who is dead this night! 2. `HISTORY` is all the information you observed, DONT hallucinate other player actions!"
,"BACKGROUND": "It's a werewolf game and you are a __profile__. Here's the game history: __context__."
,"REFLECTION": "__reflection__"
,"STRATEGY": "__strategy__"
,"PAST_EXPERIENCES": "__experiences__"
,"OUTPUT_FORMAT":
{
"ROLE": "Your role, in this case, __profile__"
,"PLAYER_NAME": "Your name, in this case, __name__"
,"LIVING_PLAYERS": "List the players who is alive based on moderator's latest instruction. Return a LIST datatype."
,"THOUGHTS": "Choose one living player from `LIVING_PLAYERS` to __action__ this night. Return the reason why you choose to __action__ this player. If you observe nothing at first night, DONT imagine unexisting player actions! Give your step-by-step thought process, you should think no more than 3 steps. For example: My step-by-step thought process:..."
,"LIVING_PLAYERS": "List the players who is alive based on moderator's latest instruction. Return a json LIST datatype."
,"THOUGHTS": "Choose one living player from `LIVING_PLAYERS` to __action__ this night. Return the reason why you choose to __action__ this player. If you observe nothing at first night, DONT imagine unexisting player actions! If you find similar situation in `PAST_EXPERIENCES`, you may draw lessons from them to refine your strategy and take better actions. Give your step-by-step thought process, you should think no more than 3 steps. For example: My step-by-step thought process:..."
,"RESPONSE": "As a __profile__, you should choose one living player from `LIVING_PLAYERS` to __action__ this night according to the THOUGHTS you have just now. Return the player name ONLY."
}
}
@ -117,7 +119,7 @@ class NighttimeWhispers(Action):
def __init__(self, name="NightTimeWhispers", context=None, llm=None):
super().__init__(name, context, llm)
def _construct_prompt_json(self, role_profile: str, role_name: str, context: str, reflection: str, **kwargs):
def _construct_prompt_json(self, role_profile: str, role_name: str, context: str, reflection: str, experiences: str, **kwargs):
prompt_template = self.PROMPT_TEMPLATE
def replace_string(prompt_json: dict):
@ -132,6 +134,7 @@ class NighttimeWhispers(Action):
prompt_json[k] = prompt_json[k].replace("__action__", self.name)
prompt_json[k] = prompt_json[k].replace("__strategy__", self.STRATEGY)
prompt_json[k] = prompt_json[k].replace("__reflection__", reflection)
prompt_json[k] = prompt_json[k].replace("__experiences__", experiences)
return prompt_json
@ -139,48 +142,57 @@ class NighttimeWhispers(Action):
prompt_json = replace_string(prompt_json)
prompt_json: dict = self._update_prompt_json(prompt_json, role_profile, role_name, context, reflection, **kwargs)
prompt_json: dict = self._update_prompt_json(prompt_json, role_profile, role_name, context, reflection, experiences, **kwargs)
assert isinstance(prompt_json, dict)
prompt: str = json.dumps(prompt_json, indent=4, separators=(',', ': '), ensure_ascii=False)
prompt: str = json.dumps(prompt_json, indent=4, ensure_ascii=False)
return prompt
def _update_prompt_json(self, prompt_json: dict, role_profile: str, role_name: str, context: str, reflection: str) -> dict:
def _update_prompt_json(
self, prompt_json: dict, role_profile: str, role_name: str, context: str, reflection: str, experiences: str
) -> dict:
# one can modify the prompt_json dictionary here
return prompt_json
@retry(stop=stop_after_attempt(2), wait=wait_fixed(1))
async def run(self, context: str, profile: str, name: str, reflection: str = ""):
async def run(self, context: str, profile: str, name: str, reflection: str = "", experiences: str = ""):
prompt = self._construct_prompt_json(
role_profile=profile, role_name=name, context=context, reflection=reflection
role_profile=profile, role_name=name, context=context, reflection=reflection, experiences=experiences
)
rsp = await self._aask(prompt)
rsp = rsp.replace("\n", " ")
rsp_json = json.loads(rsp)
with open(WORKSPACE_ROOT / f'{self.name}.txt', 'a') as f:
f.write(rsp)
return f"{self.name} " + str(rsp_json["RESPONSE"])
return f"{self.name} " + rsp_json["RESPONSE"]
class Reflect(Action):
PROMPT_TEMPLATE = """
{
"BACKGROUND": "It's a Werewolf game, you are __profile__"
"BACKGROUND": "It's a Werewolf game, in this game, we have 2 werewolves, 2 villagers, 1 guard, 1 witch, 1 seer. You are __profile__."
,"HISTORY": "You have knowledge to the following conversation: __context__"
,"MODERATOR_INSTRUCTION": __latest_instruction__,
,"OUTPUT_FORMAT":
,"OUTPUT_FORMAT" (a json):
{
"ROLE": "Your role, in this case, __profile__"
,"PLAYER_NAME": "Your name, in this case, __name__"
,"LIVING_PLAYERS": "List living players based on MODERATOR_INSTRUCTION. Return a LIST datatype."
,"REFLECTION": "You are about to follow `MODERATOR_INSTRUCTION`, but before taking any action, think about
what insights you can draw from `HISTORY` for achieving your objective?
Try to figure out the role of each player including living or dead, and summarize the game states. Give your reflection in no more than three sentences."
,"STRATEGY": Based on your reflection, think at high level what strategy you will take, in one sentence.
"GAME_STATES": "You are about to follow `MODERATOR_INSTRUCTION`, but before taking any action, analyze each player, including the living and the dead, and summarize the game states.
For each player, your reflection should be a ONE-LINE json covering the following dimension, return a LIST of jsons (return an empty LIST for the first night):
[
{"TARGET": "the player you will analyze, if the player is yourself or your werewolf partner, indicate it" ,"STATUS": "living or dead, if dead, how was he/she possibly killed?", "CLAIMED_ROLE": "claims a role or not, if so, what role, any contradiction to others? If there is no claim, return 'None'", "SIDE_WITH": "sides with which players? If none, return 'None'", "ACCUSE": "accuses which players? If none, return 'None'"}
,{...}
,...
]"
,"REFLECTION": "Based on the whole `GAME_STATES`, return a json (return an empty string for the first night):
{
"Player1": "the true role (werewolf / special role / villager, living or dead) you infer about him/her, and why is this role? If the player is yourself or your werewolf partner, indicate it."
,...
,"Player7": "the true role (werewolf / special role / villager, living or dead) you infer about him/her, and why is this role? If the player is yourself or your werewolf partner, indicate it."
,"GAME_STATE_SUMMARIZATION": "summarize the current situation from your standpoint in one sentence, your summarization should catch the most important information from your reflection, such as conflicts, number of living werewolves, special roles, and villagers."
}"
}
}
"""
@ -200,4 +212,4 @@ class Reflect(Action):
rsp = rsp.replace("\n", " ")
rsp_json = json.loads(rsp)
return rsp_json['REFLECTION']
return json.dumps(rsp_json['REFLECTION'])

View file

@ -10,9 +10,9 @@ class Impersonate(Speak):
"""Action: werewolf impersonating a good guy in daytime speak"""
STRATEGY = """
Try continuously impersonating a role with special ability, such as a Seer or a Witch, in order to mislead
Try continuously impersonating a role, such as Seer, Guard, Villager, etc., in order to mislead
other players, make them trust you, and thus hiding your werewolf identity. However, pay attention to what your werewolf partner said,
if your werewolf partner has claimed to be a Seer or Witch, DONT claim to be the same role. Remmber NOT to reveal your real identity as a werewolf!
DONT claim the same role as your werewolf partner. Remmber NOT to reveal your real identity as a werewolf!
"""
def __init__(self, name="Impersonate", context=None, llm=None):

View file

@ -5,11 +5,13 @@ class Save(NighttimeWhispers):
def __init__(self, name="Save", context=None, llm=None):
super().__init__(name, context, llm)
def _update_prompt_json(self, prompt_json: dict, profile: str, name: str, context: str, reflection: str, **kwargs):
def _update_prompt_json(
self, prompt_json: dict, role_profile: str, role_name: str, context: str, reflection: str, experiences: str
) -> dict:
del prompt_json['ACTION']
del prompt_json['ATTENTION']
prompt_json["OUTPUT_FORMAT"]["THOUGHTS"] = "It is night time. Return the thinking steps of your decision of whether to save the player JUST be killed at this night."
prompt_json["OUTPUT_FORMAT"]["THOUGHTS"] = "It is night time. Return the thinking steps of your decision of whether to save the player JUST killed this night."
prompt_json["OUTPUT_FORMAT"]["RESPONSE"] = "Follow the Moderator's instruction, decide whether you want to save that person or not. Return SAVE or PASS."
return prompt_json
@ -28,10 +30,10 @@ class Poison(NighttimeWhispers):
def __init__(self, name="Poison", context=None, llm=None):
super().__init__(name, context, llm)
def _update_prompt_json(self, prompt_json: dict, profile: str, name: str, context: str, reflection: str, **kwargs):
def _update_prompt_json(
self, prompt_json: dict, role_profile: str, role_name: str, context: str, reflection: str, experiences: str
) -> dict:
prompt_json["OUTPUT_FORMAT"]["RESPONSE"] += "Or if you want to PASS, return PASS."
return prompt_json
async def run(self, *args, **kwargs):

View file

@ -4,7 +4,8 @@ from metagpt.roles import Role
from metagpt.schema import Message
from metagpt.logs import logger
from examples.werewolf_game.actions import ACTIONS, InstructSpeak, Speak, Reflect, NighttimeWhispers
from examples.werewolf_game.actions.experience_operation import AddNewExperiences, RetrieveExperiences
from examples.werewolf_game.schema import RoleExperience
class BasePlayer(Role):
def __init__(
@ -12,6 +13,9 @@ class BasePlayer(Role):
name: str = "PlayerXYZ",
profile: str = "BasePlayer",
special_action_names: list[str] = [],
use_reflection: bool = True,
use_experience: bool = False,
use_memory_selection: bool = False,
**kwargs,
):
super().__init__(name, profile, **kwargs)
@ -25,6 +29,16 @@ class BasePlayer(Role):
self._init_actions(capable_actions) # 给角色赋予行动技能
self.special_actions = special_actions
self.use_reflection = use_reflection
if not self.use_reflection and use_experience:
logger.warning("You must enable use_reflection before using experience")
self.use_experience = False
else:
self.use_experience = use_experience
self.use_memory_selection = use_memory_selection
self.experiences = []
async def _observe(self) -> int:
if self.status == 1:
# 死者不再参与游戏
@ -60,18 +74,21 @@ class BasePlayer(Role):
reflection = await Reflect().run(
profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction
)
) if self.use_reflection else ""
experiences = RetrieveExperiences().run(query=reflection, profile=self.profile) \
if self.use_experience else ""
# 根据自己定义的角色Action对应地去runrun的入参可能不同
if isinstance(todo, Speak):
rsp = await todo.run(
profile=self.profile, name=self.name, context=memories,
latest_instruction=latest_instruction, reflection=reflection
)
latest_instruction=latest_instruction, reflection=reflection, experiences=experiences)
restricted_to = ""
elif isinstance(todo, NighttimeWhispers):
rsp = await todo.run(profile=self.profile, name=self.name, context=memories, reflection=reflection)
rsp = await todo.run(profile=self.profile, name=self.name, context=memories,
reflection=reflection, experiences=experiences)
restricted_to = f"Moderator,{self.profile}" # 给Moderator发送使用特殊技能的加密消息
msg = Message(
@ -80,6 +97,11 @@ class BasePlayer(Role):
restricted_to=restricted_to
)
self.experiences.append(
RoleExperience(name=self.name, profile=self.profile, reflection=reflection,
instruction=latest_instruction, response=rsp)
)
logger.info(f"{self._setting}: {rsp}")
return msg
@ -97,3 +119,11 @@ class BasePlayer(Role):
def set_status(self, new_status):
self.status = new_status
def record_experiences(self, round_id: str, outcome: str, game_setup: str):
experiences = [exp for exp in self.experiences if exp.reflection]
for exp in experiences:
exp.round_id = round_id
exp.outcome = outcome
exp.game_setup = game_setup
AddNewExperiences().run(experiences)

View file

@ -1,5 +1,6 @@
import re
from collections import Counter
from datetime import datetime
from metagpt.const import WORKSPACE_ROOT
from metagpt.roles import Role
@ -27,6 +28,7 @@ class Moderator(Role):
self.eval_step_idx = []
# game states
self.game_setup = ""
self.living_players = []
self.werewolf_players = []
self.villager_players = []
@ -44,6 +46,7 @@ class Moderator(Role):
self.player_current_dead = []
def _parse_game_setup(self, game_setup: str):
self.game_setup = game_setup
self.living_players = re.findall(r"Player[0-9]+", game_setup)
self.werewolf_players = re.findall(r"Player[0-9]+: Werewolf", game_setup)
self.werewolf_players = [p.replace(": Werewolf", "") for p in self.werewolf_players]
@ -61,6 +64,18 @@ class Moderator(Role):
if player_name in role_setting:
role.set_status(new_status=1) # 更新为死亡
def _record_all_experiences(self):
roles_in_env = self._rc.env.get_roles()
timestamp = datetime.now().strftime('%Y-%m-%d-%H:%M:%S')
for _, role in roles_in_env.items():
if role == self:
continue
if self.winner == "werewolf":
outcome = "won" if role.name in self.werewolf_players else "lost"
else:
outcome = "won" if role.name not in self.werewolf_players else "lost"
role.record_experiences(round_id=timestamp, outcome=outcome, game_setup=self.game_setup)
async def _instruct_speak(self):
print("*" * 10, "STEP: ", self.step_idx, "*" * 10)
step_idx = self.step_idx % len(STEP_INSTRUCTIONS)
@ -87,7 +102,6 @@ class Moderator(Role):
msg_cause_by = latest_msg.cause_by
if msg_cause_by == Hunt:
self.player_hunted = target
# breakpoint()
elif msg_cause_by == Protect:
self.player_protected = target
elif msg_cause_by == Verify:
@ -167,6 +181,8 @@ class Moderator(Role):
elif not living_villagers or not living_special_roles:
self.winner = "werewolf"
self.win_reason = "villagers all dead" if not living_villagers else "special roles all dead"
if self.winner is not None:
self._record_all_experiences()
def _record_game_history(self):
if self.step_idx % len(STEP_INSTRUCTIONS) == 0 or self.winner is not None:

View file

@ -0,0 +1,13 @@
from pydantic import BaseModel
class RoleExperience(BaseModel):
id: str = ""
name: str = ""
profile: str
reflection: str
instruction: str = ""
response: str
outcome: str = ""
round_id: str = ""
game_setup: str = ""
version: str = "01-10"

View file

@ -8,7 +8,10 @@ from examples.werewolf_game.werewolf_game import WerewolfGame
from examples.werewolf_game.roles import Moderator, Villager, Werewolf, Guard, Seer, Witch
from examples.werewolf_game.roles.human_player import prepare_human_player
def init_game_setup(shuffle=True, add_human=False):
def init_game_setup(
shuffle=True, add_human=False,
use_reflection=True, use_experience=False, use_memory_selection=False
):
roles = [
Villager,
Villager,
@ -26,7 +29,12 @@ def init_game_setup(shuffle=True, add_human=False):
assigned_role = roles[assigned_role_idx]
roles[assigned_role_idx] = prepare_human_player(assigned_role)
players = [role(name=f"Player{i+1}") for i, role in enumerate(roles)]
players = [
role(
name=f"Player{i+1}",
use_reflection=use_reflection, use_experience=use_experience, use_memory_selection=use_memory_selection
) for i, role in enumerate(roles)
]
if add_human:
logger.info(f"You are assigned {players[assigned_role_idx].name}({players[assigned_role_idx].profile})")
@ -36,22 +44,29 @@ def init_game_setup(shuffle=True, add_human=False):
return game_setup, players
async def start_game(investment: float = 3.0, n_round: int = 5, shuffle : bool = True, add_human: bool = False):
async def start_game(
investment: float = 3.0, n_round: int = 5, shuffle : bool = True, add_human: bool = False,
use_reflection: bool = True, use_experience: bool = False, use_memory_selection: bool = False
):
game = WerewolfGame()
game_setup, players = init_game_setup(shuffle=shuffle, add_human=add_human)
game_setup, players = init_game_setup(shuffle=shuffle, add_human=add_human,
use_reflection=use_reflection, use_experience=use_experience, use_memory_selection=use_memory_selection)
players = [Moderator()] + players
game.hire(players)
game.invest(investment)
game.start_project(game_setup)
await game.run(n_round=n_round)
def main(investment: float = 3.0, n_round: int = 100, shuffle : bool = True, add_human: bool = False):
def main(investment: float = 20.0, n_round: int = 100, shuffle : bool = True, add_human: bool = False,
use_reflection: bool = True, use_experience: bool = False, use_memory_selection: bool = False):
"""
:param investment: contribute a certain dollar amount to watch the debate
:param n_round: maximum rounds of the debate
:return:
"""
asyncio.run(start_game(investment, n_round, shuffle, add_human))
asyncio.run(
start_game(investment, n_round, shuffle, add_human, use_reflection, use_experience, use_memory_selection)
)
if __name__ == '__main__':

View file

@ -0,0 +1,76 @@
import json
import os
import pytest
from metagpt.logs import logger
from metagpt.const import WORKSPACE_ROOT
from examples.werewolf_game.schema import RoleExperience
from examples.werewolf_game.actions.experience_operation import AddNewExperiences, RetrieveExperiences
class TestExperiencesOperation:
test_round_id = "test_01"
samples_to_add = [
RoleExperience(profile="Witch", reflection="The game is intense with two players claiming to be the Witch and one claiming to be the Seer. Player4's behavior is suspicious.", response="", outcome="", round_id=test_round_id),
RoleExperience(profile="Witch", reflection="The game is in a critical state with only three players left, and I need to make a wise decision to save Player7 or not.", response="", outcome="", round_id=test_round_id),
RoleExperience(profile="Seer", reflection="Player1, who is a werewolf, falsely claimed to be a Seer, and Player6, who might be a Witch, sided with him. I, as the real Seer, am under suspicion.", response="", outcome="", round_id=test_round_id),
]
@pytest.mark.asyncio
async def test_add(self):
saved_file = f"{WORKSPACE_ROOT}/werewolf_game/experiences/{self.test_round_id}.json"
if os.path.exists(saved_file):
os.remove(saved_file)
action = AddNewExperiences(collection_name="test", delete_existing=True)
action.run(self.samples_to_add)
# test insertion
inserted = action.collection.get()
assert len(inserted["documents"]) == len(self.samples_to_add)
# test if we record the samples correctly to local file
# & test if we could recover a embedding db from the file
action = AddNewExperiences(collection_name="test", delete_existing=True)
action.add_from_file(saved_file)
inserted = action.collection.get()
assert len(inserted["documents"]) == len(self.samples_to_add)
@pytest.mark.asyncio
async def test_retrieve(self):
action = RetrieveExperiences(collection_name="test")
query = "one player claimed to be Seer and the other Witch"
results = action.run(query, "Witch")
results = json.loads(results)
assert len(results) == 2
assert "The game is intense with two players" in results[0]
@pytest.mark.asyncio
async def test_check_experience_pool(self):
logger.info("check experience pool")
action = RetrieveExperiences(collection_name="role_reflection")
print(*action.collection.get()["metadatas"][-5:], sep="\n")
@pytest.mark.asyncio
async def test_retrieve_werewolf_experience(self):
action = RetrieveExperiences(collection_name="role_reflection")
query = "there are conflicts"
logger.info(f"test retrieval with {query=}")
results = action.run(query, "Werewolf")
@pytest.mark.asyncio
async def test_retrieve_villager_experience(self):
action = RetrieveExperiences(collection_name="role_reflection")
query = "there are conflicts"
logger.info(f"test retrieval with {query=}")
results = action.run(query, "Seer")