introduce simple reflection & abstract common _act

2026-05-15 11:02:36 +02:00 · 2023-10-11 16:30:07 +08:00 · 2023-10-11 16:30:07 +08:00 · d015c81fc4
commit d015c81fc4
parent ef1a9a4609
9 changed files with 117 additions and 203 deletions
--- a/examples/werewolf_game/actions/init.py
+++ b/examples/werewolf_game/actions/init.py
@ -1,5 +1,5 @@
 from examples.werewolf_game.actions.moderator_actions import InstructSpeak
-from examples.werewolf_game.actions.common_actions import Speak, NighttimeWhispers
+from examples.werewolf_game.actions.common_actions import Speak, NighttimeWhispers, Reflect
 from examples.werewolf_game.actions.werewolf_actions import Hunt, Impersonate
 from examples.werewolf_game.actions.guard_actions import Protect
 from examples.werewolf_game.actions.seer_actions import Verify
--- a/examples/werewolf_game/actions/common_actions.py
+++ b/examples/werewolf_game/actions/common_actions.py
@ -1,7 +1,7 @@
 from metagpt.actions import Action
 import json
 from metagpt.const import WORKSPACE_ROOT
-
+from tenacity import retry, stop_after_attempt, wait_fixed

 class Speak(Action):
    """Action: Any speak action in a game"""
@ -11,6 +11,7 @@ class Speak(Action):
    "BACKGROUND": "It's a Werewolf game, you are __profile__, say whatever possible to increase your chance of win"
    ,"HISTORY": "You have knowledge to the following conversation: __context__"
    ,"ATTENTION": "You can NOT VOTE a player who is NOT ALIVE now!"
+    ,"REFLECTION": "__reflection__"
    ,"STRATEGY": __strategy__
    ,"MODERATOR_INSTRUCTION": __latest_instruction__,
    ,"RULE": "Please follow the moderator's latest instruction, figure out if you need to speak your opinion or directly to vote:
@ -34,23 +35,18 @@ class Speak(Action):
    def __init__(self, name="Speak", context=None, llm=None):
        super().__init__(name, context, llm)

-    async def run(self, profile: str, name: str, context: str, latest_instruction: str):
+    @retry(stop=stop_after_attempt(2), wait=wait_fixed(1))
+    async def run(self, profile: str, name: str, context: str, latest_instruction: str, reflection: str = ""):

        prompt = (
            self.PROMPT_TEMPLATE.replace("__context__", context).replace("__profile__", profile)
            .replace("__name__", name).replace("__latest_instruction__", latest_instruction)
-            .replace("__strategy__", self.STRATEGY)
+            .replace("__strategy__", self.STRATEGY).replace("__reflection__", reflection)
        )

-        re_run = 2
-        while re_run > 0:
-            rsp = await self._aask(prompt)
-            try:
-                rsp = rsp.replace("\n", " ")
-                rsp_json = json.loads(rsp)
-                break
-            except:
-                re_run -= 1
+        rsp = await self._aask(prompt)
+        rsp = rsp.replace("\n", " ")
+        rsp_json = json.loads(rsp)

        with open(WORKSPACE_ROOT / 'speak.txt', 'a') as f:
            f.write(rsp)
@ -101,8 +97,9 @@ class NighttimeWhispers(Action):
    "ROLE": "__profile__"
    ,"ACTION": "Choose one living player to __action__."
    ,"ATTENTION": "1. You can only __action__ a player who is alive this night! And you can not __action__ a player who is dead this night!  2. `HISTORY` is all the information you observed, DONT hallucinate other player actions!"
-    ,"STRATEGY": "__strategy__"
    ,"BACKGROUND": "It's a werewolf game and you are a __profile__. Here's the game history: __context__."
+    ,"REFLECTION": "__reflection__"
+    ,"STRATEGY": "__strategy__"
    ,"OUTPUT_FORMAT":
        {
        "ROLE": "Your role, in this case, __profile__"
@ -120,7 +117,7 @@ class NighttimeWhispers(Action):
    def __init__(self, name="NightTimeWhispers", context=None, llm=None):
        super().__init__(name, context, llm)

-    def _construct_prompt_json(self, role_profile: str, role_name: str, context: str, **kwargs):
+    def _construct_prompt_json(self, role_profile: str, role_name: str, context: str, reflection: str, **kwargs):
        prompt_template = self.PROMPT_TEMPLATE

        def replace_string(prompt_json: dict):
@ -134,6 +131,7 @@ class NighttimeWhispers(Action):
                prompt_json[k] = prompt_json[k].replace("__context__", context)
                prompt_json[k] = prompt_json[k].replace("__action__", self.name)
                prompt_json[k] = prompt_json[k].replace("__strategy__", self.STRATEGY)
+                prompt_json[k] = prompt_json[k].replace("__reflection__", reflection)

            return prompt_json
        
@ -141,34 +139,65 @@ class NighttimeWhispers(Action):

        prompt_json = replace_string(prompt_json)

-        prompt_json: dict = self._update_prompt_json(prompt_json, role_profile, role_name, context, **kwargs)
+        prompt_json: dict = self._update_prompt_json(prompt_json, role_profile, role_name, context, reflection, **kwargs)
        assert isinstance(prompt_json, dict)

        prompt: str = json.dumps(prompt_json, indent=4, separators=(',', ': '), ensure_ascii=False)
        
        return prompt

-    def _update_prompt_json(self, prompt_json: dict, role_profile: str, role_name: str, context: str) -> dict:
+    def _update_prompt_json(self, prompt_json: dict, role_profile: str, role_name: str, context: str, reflection: str) -> dict:
        # one can modify the prompt_json dictionary here
        return prompt_json

-    async def run(self, context: str, profile: str, name: str):
+    @retry(stop=stop_after_attempt(2), wait=wait_fixed(1))
+    async def run(self, context: str, profile: str, name: str, reflection: str = ""):

-        final_prompt = self._construct_prompt_json(
-            role_profile=profile, role_name=name, context=context
+        prompt = self._construct_prompt_json(
+            role_profile=profile, role_name=name, context=context, reflection=reflection
        )

-        re_run = 2
-        while re_run > 0:
-            rsp_content = await self._aask(final_prompt)
-            try:
-                rsp_content = rsp_content.replace("\n", " ")
-                rsp = json.loads(rsp_content)
-                break
-            except:
-                re_run -= 1
+        rsp = await self._aask(prompt)
+        rsp = rsp.replace("\n", " ")
+        rsp_json = json.loads(rsp)

        with open(WORKSPACE_ROOT / f'{self.name}.txt', 'a') as f:
-            f.write(rsp_content)
+            f.write(rsp)

-        return f"{self.name} " + str(rsp["RESPONSE"])
+        return f"{self.name} " + str(rsp_json["RESPONSE"])
+
+class Reflect(Action):
+    PROMPT_TEMPLATE = """
+    {
+    "BACKGROUND": "It's a Werewolf game, you are __profile__"
+    ,"HISTORY": "You have knowledge to the following conversation: __context__"
+    ,"MODERATOR_INSTRUCTION": __latest_instruction__,
+    ,"OUTPUT_FORMAT":
+        {
+        "ROLE": "Your role, in this case, __profile__"
+        ,"PLAYER_NAME": "Your name, in this case, __name__"
+        ,"LIVING_PLAYERS": "List living players based on MODERATOR_INSTRUCTION. Return a LIST datatype."
+        ,"REFLECTION": "You are about to follow `MODERATOR_INSTRUCTION`, but before taking any action, think about 
+                        what insights you can draw from `HISTORY` for achieving your objective?
+                        Try to figure out the role of each player including living or dead, and summarize the game states. Give your reflection in no more than three sentences."
+        ,"STRATEGY": Based on your reflection, think at high level what strategy you will take, in one sentence.
+        }
+    }
+    """
+
+    def __init__(self, name="Reflect", context=None, llm=None):
+        super().__init__(name, context, llm)
+
+    @retry(stop=stop_after_attempt(2), wait=wait_fixed(1))
+    async def run(self, profile: str, name: str, context: str, latest_instruction: str):
+
+        prompt = (
+            self.PROMPT_TEMPLATE.replace("__context__", context).replace("__profile__", profile)
+            .replace("__name__", name).replace("__latest_instruction__", latest_instruction)
+        )
+
+        rsp = await self._aask(prompt)
+        rsp = rsp.replace("\n", " ")
+        rsp_json = json.loads(rsp)
+
+        return rsp_json['REFLECTION']
--- a/examples/werewolf_game/actions/witch_actions.py
+++ b/examples/werewolf_game/actions/witch_actions.py
@ -5,7 +5,7 @@ class Save(NighttimeWhispers):
    def __init__(self, name="Save", context=None, llm=None):
        super().__init__(name, context, llm)

-    def _update_prompt_json(self, prompt_json: dict, profile: str, name: str, context: str, **kwargs):
+    def _update_prompt_json(self, prompt_json: dict, profile: str, name: str, context: str, reflection: str, **kwargs):
        del prompt_json['ACTION']
        del prompt_json['ATTENTION']

@ -13,6 +13,11 @@ class Save(NighttimeWhispers):
        prompt_json["OUTPUT_FORMAT"]["RESPONSE"] = "Follow the Moderator's instruction, decide whether you want to save that person or not. Return SAVE or PASS."

        return prompt_json
+    
+    async def run(self, *args, **kwargs):
+        rsp = await super().run(*args, **kwargs)
+        action_name, rsp = rsp.split()
+        return rsp # 只需回复SAVE或PASS，不需要带上action名

 class Poison(NighttimeWhispers):
    STRATEGY = """
@ -23,8 +28,14 @@ class Poison(NighttimeWhispers):
    def __init__(self, name="Poison", context=None, llm=None):
        super().__init__(name, context, llm)

-    def _update_prompt_json(self, prompt_json: dict, profile: str, name: str, context: str, **kwargs):
+    def _update_prompt_json(self, prompt_json: dict, profile: str, name: str, context: str, reflection: str, **kwargs):

        prompt_json["OUTPUT_FORMAT"]["RESPONSE"] += "Or if you want to PASS, return PASS."

        return prompt_json
+
+    async def run(self, *args, **kwargs):
+        rsp = await super().run(*args, **kwargs)
+        if "pass" in rsp.lower():
+            action_name, rsp = rsp.split() # 带PASS，只需回复PASS，不需要带上action名，否则是Poison PlayerX，无需改动
+        return rsp
--- a/examples/werewolf_game/roles/base_player.py
+++ b/examples/werewolf_game/roles/base_player.py
@ -3,7 +3,7 @@ import re
 from metagpt.roles import Role
 from metagpt.schema import Message
 from metagpt.logs import logger
-from examples.werewolf_game.actions import ACTIONS, Speak, InstructSpeak
+from examples.werewolf_game.actions import ACTIONS, InstructSpeak, Speak, Reflect, NighttimeWhispers


 class BasePlayer(Role):
@ -49,7 +49,42 @@ class BasePlayer(Role):

    async def _act(self):
        """每个角色要改写此函数以实现该角色的动作"""
-        raise NotImplementedError
+        # raise NotImplementedError
+                
+        # todo为_think时确定的，有两种情况，Speak或Protect
+        todo = self._rc.todo
+        logger.info(f"{self._setting}: ready to {str(todo)}")
+
+        # 可以用这个函数获取该角色的全部记忆和最新的instruction
+        memories = self.get_all_memories()
+        latest_instruction = self.get_latest_instruction()
+        # print("*" * 10, f"{self._setting}'s current memories: {memories}", "*" * 10)
+
+        reflection = await Reflect().run(
+            profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction
+        )
+
+        # 根据自己定义的角色Action，对应地去run，run的入参可能不同
+        if isinstance(todo, Speak):
+            rsp = await todo.run(
+                profile=self.profile, name=self.name, context=memories,
+                latest_instruction=latest_instruction, reflection=reflection
+            )
+            restricted_to = ""
+
+        elif isinstance(todo, NighttimeWhispers):
+            rsp = await todo.run(profile=self.profile, name=self.name, context=memories, reflection=reflection)
+            restricted_to = f"Moderator,{self.profile}" # 给Moderator发送使用特殊技能的加密消息
+
+        msg = Message(
+            content=rsp, role=self.profile, sent_from=self.name,
+            cause_by=type(todo), send_to="",
+            restricted_to=restricted_to
+        )
+
+        logger.info(f"{self._setting}: {rsp}")
+
+        return msg

    def get_all_memories(self) -> str:
        memories = self._rc.memory.get()
--- a/examples/werewolf_game/roles/guard.py
+++ b/examples/werewolf_game/roles/guard.py
@ -1,7 +1,4 @@
 from examples.werewolf_game.roles.base_player import BasePlayer
-from examples.werewolf_game.actions import Speak, Protect
-from metagpt.schema import Message
-from metagpt.logs import logger

 class Guard(BasePlayer):
    def __init__(
@ -12,33 +9,3 @@ class Guard(BasePlayer):
        **kwargs,
    ):
        super().__init__(name, profile, special_action_names, **kwargs)
-
-    async def _act(self):
-        # todo为_think时确定的，有两种情况，Speak或Protect
-        todo = self._rc.todo
-        logger.info(f"{self._setting}: ready to {str(todo)}")
-
-        # 可以用这个函数获取该角色的全部记忆和最新的instruction
-        memories = self.get_all_memories()
-        latest_instruction = self.get_latest_instruction()
-        # print("*" * 10, f"{self._setting}'s current memories: {memories}", "*" * 10)
-
-        # 根据自己定义的角色Action，对应地去run，run的入参可能不同
-        if isinstance(todo, Speak):
-            rsp = await todo.run(profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction)
-            msg = Message(
-                content=rsp, role=self.profile, sent_from=self.name,
-                cause_by=Speak, send_to="", restricted_to="",
-            )
-
-        elif isinstance(todo, Protect):
-            rsp = await todo.run(profile=self.profile, name=self.name, context=memories)
-            msg = Message(
-                content=rsp, role=self.profile, sent_from=self.name,
-                cause_by=Protect, send_to="",
-                restricted_to=f"Moderator,{self.profile}", # 给Moderator发送守卫要保护的人加密消息
-            )
-
-        logger.info(f"{self._setting}: {rsp}")
-
-        return msg
--- a/examples/werewolf_game/roles/seer.py
+++ b/examples/werewolf_game/roles/seer.py
@ -1,9 +1,4 @@
-from examples.werewolf_game.actions.seer_actions import Verify
 from examples.werewolf_game.roles.base_player import BasePlayer
-from examples.werewolf_game.actions import Speak
-from metagpt.schema import Message
-from metagpt.logs import logger
-

 class Seer(BasePlayer):
    def __init__(
@ -14,32 +9,3 @@ class Seer(BasePlayer):
            **kwargs,
    ):
        super().__init__(name, profile, special_action_names, **kwargs)
-
-    async def _act(self):
-        todo = self._rc.todo
-        logger.info(f"{self._setting}: ready to {str(todo)}")
-
-        # 可以用这个函数获取该角色的全部记忆和最新的instruction
-        memories = self.get_all_memories()
-        latest_instruction = self.get_latest_instruction()
-        # print("*" * 10, f"{self._setting}'s current memories: {memories}", "*" * 10)
-
-        # 基于todo的类型，调用不同的action
-        if isinstance(todo, Speak):
-            rsp = await todo.run(profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction)
-            msg = Message(
-                content=rsp, role=self.profile, sent_from=self.name,
-                cause_by=Speak, send_to="", restricted_to="",
-            )
-
-        elif isinstance(todo, Verify):
-            rsp = await todo.run(profile=self.profile, name=self.name, context=memories)
-            msg = Message(
-                content=rsp, role=self.profile, sent_from=self.name,
-                cause_by=Verify, send_to="",
-                restricted_to=f"Moderator,{self.profile}",
-            )
-
-        logger.info(f"{self._setting}: {rsp}")
-
-        return msg
--- a/examples/werewolf_game/roles/villager.py
+++ b/examples/werewolf_game/roles/villager.py
@ -1,7 +1,4 @@
 from examples.werewolf_game.roles.base_player import BasePlayer
-from examples.werewolf_game.actions import Speak
-from metagpt.schema import Message
-from metagpt.logs import logger

 class Villager(BasePlayer):
    def __init__(
@ -12,27 +9,3 @@ class Villager(BasePlayer):
        **kwargs,
    ):
        super().__init__(name, profile, special_action_names, **kwargs)
-
-    async def _act(self):
-
-        # todo为_think时确定的，在村民这里，就只有一种todo，即Speak
-        todo = self._rc.todo
-        logger.info(f"{self._setting}: ready to {todo}")
-
-        # 可以用这个函数获取该角色的全部记忆和最新的instruction
-        memories = self.get_all_memories()
-        latest_instruction = self.get_latest_instruction()
-        # print("*" * 10, f"{self._setting}'s current memories: {memories}", "*" * 10)
-
-        # 根据自己定义的角色Action，对应地去run
-        rsp = await todo.run(profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction)
-
-        # 返回消息，注意给Moderator发送的加密消息需要用restricted_to="Moderator"
-        msg = Message(
-            content=rsp, role=self.profile, sent_from=self.name,
-            cause_by=Speak, send_to="", restricted_to="",
-        )
-
-        logger.info(f"{self._setting}: {rsp}")
-
-        return msg
--- a/examples/werewolf_game/roles/werewolf.py
+++ b/examples/werewolf_game/roles/werewolf.py
@ -1,7 +1,5 @@
 from examples.werewolf_game.roles.base_player import BasePlayer
-from examples.werewolf_game.actions import Speak, Hunt, Impersonate
-from metagpt.schema import Message
-from metagpt.logs import logger
+from examples.werewolf_game.actions import Speak, Impersonate

 class Werewolf(BasePlayer):
    def __init__(
@ -13,33 +11,8 @@ class Werewolf(BasePlayer):
    ):
        super().__init__(name, profile, special_action_names, **kwargs)

-    async def _act(self):
-        # todo为_think时确定的，有两种情况，Speak或Hunt
-        todo = self._rc.todo
-        logger.info(f"{self._setting}: ready to {str(todo)}")
-
-        # 可以用这个函数获取该角色的全部记忆和最新的instruction
-        memories = self.get_all_memories()
-        latest_instruction = self.get_latest_instruction()
-        # print("*" * 10, f"{self._setting}'s current memories: {memories}", "*" * 10)
-
-        # 根据自己定义的角色Action，对应地去run，run的入参可能不同
-        if isinstance(todo, Speak):
-            # rsp = await todo.run(profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction)
-            rsp = await Impersonate().run(profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction)
-            msg = Message(
-                content=rsp, role=self.profile, sent_from=self.name,
-                cause_by=Speak, send_to="", restricted_to="",
-            )
-
-        elif isinstance(todo, Hunt):
-            rsp = await todo.run(profile=self.profile, name=self.name, context=memories)
-            msg = Message(
-                content=rsp, role=self.profile, sent_from=self.name,
-                cause_by=Hunt, send_to="",
-                restricted_to=f"Moderator,{self.profile}", # 给Moderator及狼阵营发送要杀的人的加密消息
-            )
-
-        logger.info(f"{self._setting}: {rsp}")
-
-        return msg
+    async def _think(self):
+        """狼人白天发言时需要伪装，与其他角色不同，因此需要重写_think"""
+        await super()._think()
+        if isinstance(self._rc.todo, Speak):
+            self._rc.todo = Impersonate()
--- a/examples/werewolf_game/roles/witch.py
+++ b/examples/werewolf_game/roles/witch.py
@ -1,7 +1,5 @@
 from examples.werewolf_game.actions import InstructSpeak, Speak, Save, Poison
 from examples.werewolf_game.roles.base_player import BasePlayer
-from metagpt.schema import Message
-from metagpt.logs import logger

 class Witch(BasePlayer):
    def __init__(
@ -14,7 +12,7 @@ class Witch(BasePlayer):
        super().__init__(name, profile, special_action_names, **kwargs)

    async def _think(self):
-        # 女巫涉及两个特殊技能，因此在此需要改写_think进行路由
+        """女巫涉及两个特殊技能，因此在此需要改写_think进行路由"""
        news = self._rc.news[0]
        assert news.cause_by == InstructSpeak # 消息为来自Moderator的指令时，才去做动作
        if not news.restricted_to:
@ -30,41 +28,3 @@ class Witch(BasePlayer):
                self._rc.todo = Poison()
            else:
                raise ValueError("Moderator's instructions must include save or poison keyword")
-
-    async def _act(self):
-        # todo为_think时确定的，有三种情况，Speak或Save或Poison
-        todo = self._rc.todo
-        logger.info(f"{self._setting}: ready to {str(todo)}")
-
-        # 可以用这个函数获取该角色的全部记忆
-        memories = self.get_all_memories()
-        latest_instruction = self.get_latest_instruction()
-        # print("*" * 10, f"{self._setting}'s current memories: {memories}", "*" * 10)
-
-        # 根据自己定义的角色Action，对应地去run，run的入参可能不同
-        if isinstance(todo, Speak):
-            rsp = await todo.run(profile=self.profile, name=self.name, context=memories, latest_instruction=latest_instruction)
-            msg = Message(
-                content=rsp, role=self.profile, sent_from=self.name,
-                cause_by=Speak, send_to="", restricted_to="",
-            )
-
-        elif isinstance(todo, Save):
-            rsp = await todo.run(profile=self.profile, name=self.name, context=memories)
-            msg = Message(
-                content=rsp, role=self.profile, sent_from=self.name,
-                cause_by=Save, send_to="",
-                restricted_to=f"Moderator,{self.profile}", # 给Moderator发送要救的人的加密消息
-            )
-
-        elif isinstance(todo, Poison):
-            rsp = await todo.run(profile=self.profile, name=self.name, context=memories)
-            msg = Message(
-                content=rsp, role=self.profile, sent_from=self.name,
-                cause_by=Poison, send_to="",
-                restricted_to=f"Moderator,{self.profile}", # 给Moderator发送要读的人的加密消息
-            )
-
-        logger.info(f"{self._setting}: {rsp}")
-
-        return msg