diff --git a/examples/werewolf_game/actions/__init__.py b/examples/werewolf_game/actions/__init__.py index 21a053980..784715907 100644 --- a/examples/werewolf_game/actions/__init__.py +++ b/examples/werewolf_game/actions/__init__.py @@ -1,5 +1,5 @@ from examples.werewolf_game.actions.moderator_actions import InstructSpeak -from examples.werewolf_game.actions.common_actions import Speak, NighttimeWhispers +from examples.werewolf_game.actions.common_actions import Speak, NighttimeWhispers, Reflect from examples.werewolf_game.actions.werewolf_actions import Hunt, Impersonate from examples.werewolf_game.actions.guard_actions import Protect from examples.werewolf_game.actions.seer_actions import Verify diff --git a/examples/werewolf_game/actions/common_actions.py b/examples/werewolf_game/actions/common_actions.py index 42f2223ba..2b9600260 100644 --- a/examples/werewolf_game/actions/common_actions.py +++ b/examples/werewolf_game/actions/common_actions.py @@ -1,7 +1,7 @@ from metagpt.actions import Action import json from metagpt.const import WORKSPACE_ROOT - +from tenacity import retry, stop_after_attempt, wait_fixed class Speak(Action): """Action: Any speak action in a game""" @@ -11,6 +11,7 @@ class Speak(Action): "BACKGROUND": "It's a Werewolf game, you are __profile__, say whatever possible to increase your chance of win" ,"HISTORY": "You have knowledge to the following conversation: __context__" ,"ATTENTION": "You can NOT VOTE a player who is NOT ALIVE now!" + ,"REFLECTION": "__reflection__" ,"STRATEGY": __strategy__ ,"MODERATOR_INSTRUCTION": __latest_instruction__, ,"RULE": "Please follow the moderator's latest instruction, figure out if you need to speak your opinion or directly to vote: @@ -34,23 +35,18 @@ class Speak(Action): def __init__(self, name="Speak", context=None, llm=None): super().__init__(name, context, llm) - async def run(self, profile: str, name: str, context: str, latest_instruction: str): + @retry(stop=stop_after_attempt(2), wait=wait_fixed(1)) + async def run(self, profile: str, name: str, context: str, latest_instruction: str, reflection: str = ""): prompt = ( self.PROMPT_TEMPLATE.replace("__context__", context).replace("__profile__", profile) .replace("__name__", name).replace("__latest_instruction__", latest_instruction) - .replace("__strategy__", self.STRATEGY) + .replace("__strategy__", self.STRATEGY).replace("__reflection__", reflection) ) - re_run = 2 - while re_run > 0: - rsp = await self._aask(prompt) - try: - rsp = rsp.replace("\n", " ") - rsp_json = json.loads(rsp) - break - except: - re_run -= 1 + rsp = await self._aask(prompt) + rsp = rsp.replace("\n", " ") + rsp_json = json.loads(rsp) with open(WORKSPACE_ROOT / 'speak.txt', 'a') as f: f.write(rsp) @@ -101,8 +97,9 @@ class NighttimeWhispers(Action): "ROLE": "__profile__" ,"ACTION": "Choose one living player to __action__." ,"ATTENTION": "1. You can only __action__ a player who is alive this night! And you can not __action__ a player who is dead this night! 2. `HISTORY` is all the information you observed, DONT hallucinate other player actions!" - ,"STRATEGY": "__strategy__" ,"BACKGROUND": "It's a werewolf game and you are a __profile__. Here's the game history: __context__." + ,"REFLECTION": "__reflection__" + ,"STRATEGY": "__strategy__" ,"OUTPUT_FORMAT": { "ROLE": "Your role, in this case, __profile__" @@ -120,7 +117,7 @@ class NighttimeWhispers(Action): def __init__(self, name="NightTimeWhispers", context=None, llm=None): super().__init__(name, context, llm) - def _construct_prompt_json(self, role_profile: str, role_name: str, context: str, **kwargs): + def _construct_prompt_json(self, role_profile: str, role_name: str, context: str, reflection: str, **kwargs): prompt_template = self.PROMPT_TEMPLATE def replace_string(prompt_json: dict): @@ -134,6 +131,7 @@ class NighttimeWhispers(Action): prompt_json[k] = prompt_json[k].replace("__context__", context) prompt_json[k] = prompt_json[k].replace("__action__", self.name) prompt_json[k] = prompt_json[k].replace("__strategy__", self.STRATEGY) + prompt_json[k] = prompt_json[k].replace("__reflection__", reflection) return prompt_json @@ -141,34 +139,65 @@ class NighttimeWhispers(Action): prompt_json = replace_string(prompt_json) - prompt_json: dict = self._update_prompt_json(prompt_json, role_profile, role_name, context, **kwargs) + prompt_json: dict = self._update_prompt_json(prompt_json, role_profile, role_name, context, reflection, **kwargs) assert isinstance(prompt_json, dict) prompt: str = json.dumps(prompt_json, indent=4, separators=(',', ': '), ensure_ascii=False) return prompt - def _update_prompt_json(self, prompt_json: dict, role_profile: str, role_name: str, context: str) -> dict: + def _update_prompt_json(self, prompt_json: dict, role_profile: str, role_name: str, context: str, reflection: str) -> dict: # one can modify the prompt_json dictionary here return prompt_json - async def run(self, context: str, profile: str, name: str): + @retry(stop=stop_after_attempt(2), wait=wait_fixed(1)) + async def run(self, context: str, profile: str, name: str, reflection: str = ""): - final_prompt = self._construct_prompt_json( - role_profile=profile, role_name=name, context=context + prompt = self._construct_prompt_json( + role_profile=profile, role_name=name, context=context, reflection=reflection ) - re_run = 2 - while re_run > 0: - rsp_content = await self._aask(final_prompt) - try: - rsp_content = rsp_content.replace("\n", " ") - rsp = json.loads(rsp_content) - break - except: - re_run -= 1 + rsp = await self._aask(prompt) + rsp = rsp.replace("\n", " ") + rsp_json = json.loads(rsp) with open(WORKSPACE_ROOT / f'{self.name}.txt', 'a') as f: - f.write(rsp_content) + f.write(rsp) - return f"{self.name} " + str(rsp["RESPONSE"]) + return f"{self.name} " + str(rsp_json["RESPONSE"]) + +class Reflect(Action): + PROMPT_TEMPLATE = """ + { + "BACKGROUND": "It's a Werewolf game, you are __profile__" + ,"HISTORY": "You have knowledge to the following conversation: __context__" + ,"MODERATOR_INSTRUCTION": __latest_instruction__, + ,"OUTPUT_FORMAT": + { + "ROLE": "Your role, in this case, __profile__" + ,"PLAYER_NAME": "Your name, in this case, __name__" + ,"LIVING_PLAYERS": "List living players based on MODERATOR_INSTRUCTION. Return a LIST datatype." + ,"REFLECTION": "You are about to follow `MODERATOR_INSTRUCTION`, but before taking any action, think about + what insights you can draw from `HISTORY` for achieving your objective? + Try to figure out the role of each player including living or dead, and summarize the game states. Give your reflection in no more than three sentences." + ,"STRATEGY": Based on your reflection, think at high level what strategy you will take, in one sentence. + } + } + """ + + def __init__(self, name="Reflect", context=None, llm=None): + super().__init__(name, context, llm) + + @retry(stop=stop_after_attempt(2), wait=wait_fixed(1)) + async def run(self, profile: str, name: str, context: str, latest_instruction: str): + + prompt = ( + self.PROMPT_TEMPLATE.replace("__context__", context).replace("__profile__", profile) + .replace("__name__", name).replace("__latest_instruction__", latest_instruction) + ) + + rsp = await self._aask(prompt) + rsp = rsp.replace("\n", " ") + rsp_json = json.loads(rsp) + + return rsp_json['REFLECTION'] diff --git a/examples/werewolf_game/actions/witch_actions.py b/examples/werewolf_game/actions/witch_actions.py index af8032a42..d5d8aa5a2 100644 --- a/examples/werewolf_game/actions/witch_actions.py +++ b/examples/werewolf_game/actions/witch_actions.py @@ -5,7 +5,7 @@ class Save(NighttimeWhispers): def __init__(self, name="Save", context=None, llm=None): super().__init__(name, context, llm) - def _update_prompt_json(self, prompt_json: dict, profile: str, name: str, context: str, **kwargs): + def _update_prompt_json(self, prompt_json: dict, profile: str, name: str, context: str, reflection: str, **kwargs): del prompt_json['ACTION'] del prompt_json['ATTENTION'] @@ -13,6 +13,11 @@ class Save(NighttimeWhispers): prompt_json["OUTPUT_FORMAT"]["RESPONSE"] = "Follow the Moderator's instruction, decide whether you want to save that person or not. Return SAVE or PASS." return prompt_json + + async def run(self, *args, **kwargs): + rsp = await super().run(*args, **kwargs) + action_name, rsp = rsp.split() + return rsp # 只需回复SAVE或PASS,不需要带上action名 class Poison(NighttimeWhispers): STRATEGY = """ @@ -23,8 +28,14 @@ class Poison(NighttimeWhispers): def __init__(self, name="Poison", context=None, llm=None): super().__init__(name, context, llm) - def _update_prompt_json(self, prompt_json: dict, profile: str, name: str, context: str, **kwargs): + def _update_prompt_json(self, prompt_json: dict, profile: str, name: str, context: str, reflection: str, **kwargs): prompt_json["OUTPUT_FORMAT"]["RESPONSE"] += "Or if you want to PASS, return PASS." return prompt_json + + async def run(self, *args, **kwargs): + rsp = await super().run(*args, **kwargs) + if "pass" in rsp.lower(): + action_name, rsp = rsp.split() # 带PASS,只需回复PASS,不需要带上action名,否则是Poison PlayerX,无需改动 + return rsp diff --git a/examples/werewolf_game/roles/base_player.py b/examples/werewolf_game/roles/base_player.py index 1a044673d..734228d46 100644 --- a/examples/werewolf_game/roles/base_player.py +++ b/examples/werewolf_game/roles/base_player.py @@ -3,7 +3,7 @@ import re from metagpt.roles import Role from metagpt.schema import Message from metagpt.logs import logger -from examples.werewolf_game.actions import ACTIONS, Speak, InstructSpeak +from examples.werewolf_game.actions import ACTIONS, InstructSpeak, Speak, Reflect, NighttimeWhispers class BasePlayer(Role): @@ -49,7 +49,42 @@ class BasePlayer(Role): async def _act(self): """每个角色要改写此函数以实现该角色的动作""" - raise NotImplementedError + # raise NotImplementedError + + # todo为_think时确定的,有两种情况,Speak或Protect + todo = self._rc.todo + logger.info(f"{self._setting}: ready to {str(todo)}") + + # 可以用这个函数获取该角色的全部记忆和最新的instruction + memories = self.get_all_memories() + latest_instruction = self.get_latest_instruction() + # print("*" * 10, f"{self._setting}'s current memories: {memories}", "*" * 10) + + reflection = await Reflect().run( + profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction + ) + + # 根据自己定义的角色Action,对应地去run,run的入参可能不同 + if isinstance(todo, Speak): + rsp = await todo.run( + profile=self.profile, name=self.name, context=memories, + latest_instruction=latest_instruction, reflection=reflection + ) + restricted_to = "" + + elif isinstance(todo, NighttimeWhispers): + rsp = await todo.run(profile=self.profile, name=self.name, context=memories, reflection=reflection) + restricted_to = f"Moderator,{self.profile}" # 给Moderator发送使用特殊技能的加密消息 + + msg = Message( + content=rsp, role=self.profile, sent_from=self.name, + cause_by=type(todo), send_to="", + restricted_to=restricted_to + ) + + logger.info(f"{self._setting}: {rsp}") + + return msg def get_all_memories(self) -> str: memories = self._rc.memory.get() diff --git a/examples/werewolf_game/roles/guard.py b/examples/werewolf_game/roles/guard.py index 580d16cd9..24cfbb7c1 100644 --- a/examples/werewolf_game/roles/guard.py +++ b/examples/werewolf_game/roles/guard.py @@ -1,7 +1,4 @@ from examples.werewolf_game.roles.base_player import BasePlayer -from examples.werewolf_game.actions import Speak, Protect -from metagpt.schema import Message -from metagpt.logs import logger class Guard(BasePlayer): def __init__( @@ -12,33 +9,3 @@ class Guard(BasePlayer): **kwargs, ): super().__init__(name, profile, special_action_names, **kwargs) - - async def _act(self): - # todo为_think时确定的,有两种情况,Speak或Protect - todo = self._rc.todo - logger.info(f"{self._setting}: ready to {str(todo)}") - - # 可以用这个函数获取该角色的全部记忆和最新的instruction - memories = self.get_all_memories() - latest_instruction = self.get_latest_instruction() - # print("*" * 10, f"{self._setting}'s current memories: {memories}", "*" * 10) - - # 根据自己定义的角色Action,对应地去run,run的入参可能不同 - if isinstance(todo, Speak): - rsp = await todo.run(profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction) - msg = Message( - content=rsp, role=self.profile, sent_from=self.name, - cause_by=Speak, send_to="", restricted_to="", - ) - - elif isinstance(todo, Protect): - rsp = await todo.run(profile=self.profile, name=self.name, context=memories) - msg = Message( - content=rsp, role=self.profile, sent_from=self.name, - cause_by=Protect, send_to="", - restricted_to=f"Moderator,{self.profile}", # 给Moderator发送守卫要保护的人加密消息 - ) - - logger.info(f"{self._setting}: {rsp}") - - return msg diff --git a/examples/werewolf_game/roles/seer.py b/examples/werewolf_game/roles/seer.py index 54a15689d..769713e8f 100644 --- a/examples/werewolf_game/roles/seer.py +++ b/examples/werewolf_game/roles/seer.py @@ -1,9 +1,4 @@ -from examples.werewolf_game.actions.seer_actions import Verify from examples.werewolf_game.roles.base_player import BasePlayer -from examples.werewolf_game.actions import Speak -from metagpt.schema import Message -from metagpt.logs import logger - class Seer(BasePlayer): def __init__( @@ -14,32 +9,3 @@ class Seer(BasePlayer): **kwargs, ): super().__init__(name, profile, special_action_names, **kwargs) - - async def _act(self): - todo = self._rc.todo - logger.info(f"{self._setting}: ready to {str(todo)}") - - # 可以用这个函数获取该角色的全部记忆和最新的instruction - memories = self.get_all_memories() - latest_instruction = self.get_latest_instruction() - # print("*" * 10, f"{self._setting}'s current memories: {memories}", "*" * 10) - - # 基于todo的类型,调用不同的action - if isinstance(todo, Speak): - rsp = await todo.run(profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction) - msg = Message( - content=rsp, role=self.profile, sent_from=self.name, - cause_by=Speak, send_to="", restricted_to="", - ) - - elif isinstance(todo, Verify): - rsp = await todo.run(profile=self.profile, name=self.name, context=memories) - msg = Message( - content=rsp, role=self.profile, sent_from=self.name, - cause_by=Verify, send_to="", - restricted_to=f"Moderator,{self.profile}", - ) - - logger.info(f"{self._setting}: {rsp}") - - return msg diff --git a/examples/werewolf_game/roles/villager.py b/examples/werewolf_game/roles/villager.py index e6e59a51e..7a39071a6 100644 --- a/examples/werewolf_game/roles/villager.py +++ b/examples/werewolf_game/roles/villager.py @@ -1,7 +1,4 @@ from examples.werewolf_game.roles.base_player import BasePlayer -from examples.werewolf_game.actions import Speak -from metagpt.schema import Message -from metagpt.logs import logger class Villager(BasePlayer): def __init__( @@ -12,27 +9,3 @@ class Villager(BasePlayer): **kwargs, ): super().__init__(name, profile, special_action_names, **kwargs) - - async def _act(self): - - # todo为_think时确定的,在村民这里,就只有一种todo,即Speak - todo = self._rc.todo - logger.info(f"{self._setting}: ready to {todo}") - - # 可以用这个函数获取该角色的全部记忆和最新的instruction - memories = self.get_all_memories() - latest_instruction = self.get_latest_instruction() - # print("*" * 10, f"{self._setting}'s current memories: {memories}", "*" * 10) - - # 根据自己定义的角色Action,对应地去run - rsp = await todo.run(profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction) - - # 返回消息,注意给Moderator发送的加密消息需要用restricted_to="Moderator" - msg = Message( - content=rsp, role=self.profile, sent_from=self.name, - cause_by=Speak, send_to="", restricted_to="", - ) - - logger.info(f"{self._setting}: {rsp}") - - return msg diff --git a/examples/werewolf_game/roles/werewolf.py b/examples/werewolf_game/roles/werewolf.py index 786e37691..00e93c96c 100644 --- a/examples/werewolf_game/roles/werewolf.py +++ b/examples/werewolf_game/roles/werewolf.py @@ -1,7 +1,5 @@ from examples.werewolf_game.roles.base_player import BasePlayer -from examples.werewolf_game.actions import Speak, Hunt, Impersonate -from metagpt.schema import Message -from metagpt.logs import logger +from examples.werewolf_game.actions import Speak, Impersonate class Werewolf(BasePlayer): def __init__( @@ -13,33 +11,8 @@ class Werewolf(BasePlayer): ): super().__init__(name, profile, special_action_names, **kwargs) - async def _act(self): - # todo为_think时确定的,有两种情况,Speak或Hunt - todo = self._rc.todo - logger.info(f"{self._setting}: ready to {str(todo)}") - - # 可以用这个函数获取该角色的全部记忆和最新的instruction - memories = self.get_all_memories() - latest_instruction = self.get_latest_instruction() - # print("*" * 10, f"{self._setting}'s current memories: {memories}", "*" * 10) - - # 根据自己定义的角色Action,对应地去run,run的入参可能不同 - if isinstance(todo, Speak): - # rsp = await todo.run(profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction) - rsp = await Impersonate().run(profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction) - msg = Message( - content=rsp, role=self.profile, sent_from=self.name, - cause_by=Speak, send_to="", restricted_to="", - ) - - elif isinstance(todo, Hunt): - rsp = await todo.run(profile=self.profile, name=self.name, context=memories) - msg = Message( - content=rsp, role=self.profile, sent_from=self.name, - cause_by=Hunt, send_to="", - restricted_to=f"Moderator,{self.profile}", # 给Moderator及狼阵营发送要杀的人的加密消息 - ) - - logger.info(f"{self._setting}: {rsp}") - - return msg + async def _think(self): + """狼人白天发言时需要伪装,与其他角色不同,因此需要重写_think""" + await super()._think() + if isinstance(self._rc.todo, Speak): + self._rc.todo = Impersonate() diff --git a/examples/werewolf_game/roles/witch.py b/examples/werewolf_game/roles/witch.py index a570677df..c3f0e24d0 100644 --- a/examples/werewolf_game/roles/witch.py +++ b/examples/werewolf_game/roles/witch.py @@ -1,7 +1,5 @@ from examples.werewolf_game.actions import InstructSpeak, Speak, Save, Poison from examples.werewolf_game.roles.base_player import BasePlayer -from metagpt.schema import Message -from metagpt.logs import logger class Witch(BasePlayer): def __init__( @@ -14,7 +12,7 @@ class Witch(BasePlayer): super().__init__(name, profile, special_action_names, **kwargs) async def _think(self): - # 女巫涉及两个特殊技能,因此在此需要改写_think进行路由 + """女巫涉及两个特殊技能,因此在此需要改写_think进行路由""" news = self._rc.news[0] assert news.cause_by == InstructSpeak # 消息为来自Moderator的指令时,才去做动作 if not news.restricted_to: @@ -30,41 +28,3 @@ class Witch(BasePlayer): self._rc.todo = Poison() else: raise ValueError("Moderator's instructions must include save or poison keyword") - - async def _act(self): - # todo为_think时确定的,有三种情况,Speak或Save或Poison - todo = self._rc.todo - logger.info(f"{self._setting}: ready to {str(todo)}") - - # 可以用这个函数获取该角色的全部记忆 - memories = self.get_all_memories() - latest_instruction = self.get_latest_instruction() - # print("*" * 10, f"{self._setting}'s current memories: {memories}", "*" * 10) - - # 根据自己定义的角色Action,对应地去run,run的入参可能不同 - if isinstance(todo, Speak): - rsp = await todo.run(profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction) - msg = Message( - content=rsp, role=self.profile, sent_from=self.name, - cause_by=Speak, send_to="", restricted_to="", - ) - - elif isinstance(todo, Save): - rsp = await todo.run(profile=self.profile, name=self.name, context=memories) - msg = Message( - content=rsp, role=self.profile, sent_from=self.name, - cause_by=Save, send_to="", - restricted_to=f"Moderator,{self.profile}", # 给Moderator发送要救的人的加密消息 - ) - - elif isinstance(todo, Poison): - rsp = await todo.run(profile=self.profile, name=self.name, context=memories) - msg = Message( - content=rsp, role=self.profile, sent_from=self.name, - cause_by=Poison, send_to="", - restricted_to=f"Moderator,{self.profile}", # 给Moderator发送要读的人的加密消息 - ) - - logger.info(f"{self._setting}: {rsp}") - - return msg