From 250fe83de9deecebecdec1d90b873cfeddeff3fb Mon Sep 17 00:00:00 2001
From: garylin2099 <garylin2099@gmail.com>
Date: Mon, 6 Nov 2023 14:37:29 +0800
Subject: [PATCH 1/2] redefine react and provide multiple react modes

---
 examples/build_customized_agent.py     | 38 ++--------
 metagpt/roles/invoice_ocr_assistant.py | 26 +------
 metagpt/roles/researcher.py            | 19 +----
 metagpt/roles/role.py                  | 98 ++++++++++++++++++++++----
 4 files changed, 94 insertions(+), 87 deletions(-)

diff --git a/examples/build_customized_agent.py b/examples/build_customized_agent.py
index 87d7a9c76..2bc9e31e5 100644
--- a/examples/build_customized_agent.py
+++ b/examples/build_customized_agent.py
@@ -75,10 +75,9 @@ class SimpleCoder(Role):
         logger.info(f"{self._setting}: ready to {self._rc.todo}")
         todo = self._rc.todo
 
-        msg = self._rc.memory.get()[-1] # retrieve the latest memory
-        instruction = msg.content
+        msg = self.get_memories(k=1)[0] # find the most recent messages
 
-        code_text = await SimpleWriteCode().run(instruction)
+        code_text = await SimpleWriteCode().run(msg.content)
         msg = Message(content=code_text, role=self.profile, cause_by=todo)
 
         return msg
@@ -92,43 +91,20 @@ class RunnableCoder(Role):
     ):
         super().__init__(name, profile, **kwargs)
         self._init_actions([SimpleWriteCode, SimpleRunCode])
-
-    async def _think(self) -> None:
-        if self._rc.todo is None:
-            self._set_state(0)
-            return
-
-        if self._rc.state + 1 < len(self._states):
-            self._set_state(self._rc.state + 1)
-        else:
-            self._rc.todo = None
+        self._set_react_mode(react_mode="by_order")
 
     async def _act(self) -> Message:
         logger.info(f"{self._setting}: ready to {self._rc.todo}")
         todo = self._rc.todo
-        msg = self._rc.memory.get()[-1]
 
-        if isinstance(todo, SimpleWriteCode):
-            instruction = msg.content
-            result = await SimpleWriteCode().run(instruction)
+        msg = self.get_memories(k=1)[0] # find the most k recent messages
+        result = await todo.run(msg.content)
 
-        elif isinstance(todo, SimpleRunCode):
-            code_text = msg.content
-            result = await SimpleRunCode().run(code_text)
-
-        msg = Message(content=result, role=self.profile, cause_by=todo)
+        msg = Message(content=result, role=self.profile, cause_by=type(todo))
         self._rc.memory.add(msg)
         return msg
 
-    async def _react(self) -> Message:
-        while True:
-            await self._think()
-            if self._rc.todo is None:
-                break
-            await self._act()
-        return Message(content="All job done", role=self.profile)
-
-def main(msg="write a function that calculates the sum of a list"):
+def main(msg="write a function that calculates the product of a list"):
     # role = SimpleCoder()
     role = RunnableCoder()
     logger.info(msg)
diff --git a/metagpt/roles/invoice_ocr_assistant.py b/metagpt/roles/invoice_ocr_assistant.py
index c307b20c0..15f831c97 100644
--- a/metagpt/roles/invoice_ocr_assistant.py
+++ b/metagpt/roles/invoice_ocr_assistant.py
@@ -42,17 +42,7 @@ class InvoiceOCRAssistant(Role):
         self.filename = ""
         self.origin_query = ""
         self.orc_data = None
-
-    async def _think(self) -> None:
-        """Determine the next action to be taken by the role."""
-        if self._rc.todo is None:
-            self._set_state(0)
-            return
-
-        if self._rc.state + 1 < len(self._states):
-            self._set_state(self._rc.state + 1)
-        else:
-            self._rc.todo = None
+        self._set_react_mode(react_mode="by_order")
 
     async def _act(self) -> Message:
         """Perform an action as determined by the role.
@@ -94,17 +84,3 @@ class InvoiceOCRAssistant(Role):
         msg = Message(content=content, instruct_content=resp)
         self._rc.memory.add(msg)
         return msg
-
-    async def _react(self) -> Message:
-        """Execute the invoice ocr assistant's think and actions.
-
-        Returns:
-            A message containing the final result of the assistant's actions.
-        """
-        while True:
-            await self._think()
-            if self._rc.todo is None:
-                break
-            msg = await self._act()
-        return msg
-
diff --git a/metagpt/roles/researcher.py b/metagpt/roles/researcher.py
index acb46c718..c5512121a 100644
--- a/metagpt/roles/researcher.py
+++ b/metagpt/roles/researcher.py
@@ -31,20 +31,11 @@ class Researcher(Role):
     ):
         super().__init__(name, profile, goal, constraints, **kwargs)
         self._init_actions([CollectLinks(name), WebBrowseAndSummarize(name), ConductResearch(name)])
+        self._set_react_mode(react_mode="by_order")
         self.language = language
         if language not in ("en-us", "zh-cn"):
             logger.warning(f"The language `{language}` has not been tested, it may not work.")
 
-    async def _think(self) -> None:
-        if self._rc.todo is None:
-            self._set_state(0)
-            return
-
-        if self._rc.state + 1 < len(self._states):
-            self._set_state(self._rc.state + 1)
-        else:
-            self._rc.todo = None
-
     async def _act(self) -> Message:
         logger.info(f"{self._setting}: ready to {self._rc.todo}")
         todo = self._rc.todo
@@ -73,12 +64,8 @@ class Researcher(Role):
         self._rc.memory.add(ret)
         return ret
 
-    async def _react(self) -> Message:
-        while True:
-            await self._think()
-            if self._rc.todo is None:
-                break
-            msg = await self._act()
+    async def react(self) -> Message:
+        msg = await super().react()
         report = msg.instruct_content
         self.write_report(report.topic, report.content)
         return msg
diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py
index 44bb3e976..88b98f4b4 100644
--- a/metagpt/roles/role.py
+++ b/metagpt/roles/role.py
@@ -7,7 +7,7 @@
 """
 from __future__ import annotations
 
-from typing import Iterable, Type
+from typing import Iterable, Type, Union
 
 from pydantic import BaseModel, Field
 
@@ -27,12 +27,15 @@ Please note that only the text between the first and second "===" is information
 {history}
 ===
 
-You can now choose one of the following stages to decide the stage you need to go in the next step:
+Your previous stage: {previous_state}
+
+Now choose one of the following stages you need to go to in the next step:
 {states}
 
 Just answer a number between 0-{n_states}, choose the most suitable stage according to the understanding of the conversation.
 Please note that the answer only needs a number, no need to add any other text.
-If there is no conversation record, choose 0.
+If there is no conversation record or your previous stage is None, choose 0.
+If you think you have completed your goal and don't need to go to any of the stages, return -1.
 Do not answer anything else, and do not add any other information in your answer.
 """
 
@@ -67,7 +70,7 @@ class RoleContext(BaseModel):
     env: 'Environment' = Field(default=None)
     memory: Memory = Field(default_factory=Memory)
     long_term_memory: LongTermMemory = Field(default_factory=LongTermMemory)
-    state: int = Field(default=0)
+    state: int = Field(default=None)
     todo: Action = Field(default=None)
     watch: set[Type[Action]] = Field(default_factory=set)
     news: list[Type[Message]] = Field(default=[])
@@ -100,6 +103,9 @@ class Role:
         self._actions = []
         self._role_id = str(self._setting)
         self._rc = RoleContext()
+        # see `_set_react_mode` function for definitions of the following two attributes
+        self.react_mode = "react"
+        self.max_react_loop = 1
 
     def _reset(self):
         self._states = []
@@ -116,17 +122,37 @@ class Role:
             self._actions.append(i)
             self._states.append(f"{idx}. {action}")
 
+    def _set_react_mode(self, react_mode: str, max_react_loop: int = 1):
+        """Set strategy of the Role reacting to observed Message. Variation lies in how
+        this Role elects action to perform during the _think stage, especially if it is capable of multiple Actions.
+
+        Args:
+            react_mode (str): Mode for choosing action during the _think stage, can be one of
+                        "by_order": switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ...;
+                        "react": standard think-act loop in the ReAct paper, alternating thinking and acting to solve the task, i.e. _think -> _act -> _think -> _act -> ... 
+                                 Use llm to select actions in _think dynamically;
+                        "plan_and_act": first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ...
+                                        Use llm to come up with the plan dynamically.
+                        Defaults to "by_order".
+            max_react_loop (int): Maximum react cycles to execute, used to prevent the agent from reacting forever.
+                                  Take effect only when react_mode is react, in which we use llm to choose actions, including termination.
+                                  Defaults to 1, i.e. _think -> _act (-> return result and end)
+        """
+        self.react_mode = react_mode
+        if react_mode == "react":
+            self.max_react_loop = max_react_loop
+
     def _watch(self, actions: Iterable[Type[Action]]):
         """Listen to the corresponding behaviors"""
         self._rc.watch.update(actions)
         # check RoleContext after adding watch actions
         self._rc.check(self._role_id)
 
-    def _set_state(self, state):
+    def _set_state(self, state: Union[int, None]):
         """Update the current state."""
         self._rc.state = state
         logger.debug(self._actions)
-        self._rc.todo = self._actions[self._rc.state]
+        self._rc.todo = self._actions[self._rc.state] if state is not None else None
 
     def set_env(self, env: 'Environment'):
         """Set the environment in which the role works. The role can talk to the environment and can also receive messages by observing."""
@@ -151,13 +177,19 @@ class Role:
             return
         prompt = self._get_prefix()
         prompt += STATE_TEMPLATE.format(history=self._rc.history, states="\n".join(self._states),
-                                        n_states=len(self._states) - 1)
+                                        n_states=len(self._states) - 1, previous_state=self._rc.state)
+        # print(prompt)
         next_state = await self._llm.aask(prompt)
         logger.debug(f"{prompt=}")
-        if not next_state.isdigit() or int(next_state) not in range(len(self._states)):
+        if not next_state.isdigit() or int(next_state) not in range(-1, len(self._states)):
             logger.warning(f'Invalid answer of state, {next_state=}')
-            next_state = "0"
-        self._set_state(int(next_state))
+            next_state = None
+        else:
+            next_state = int(next_state)
+            if next_state == -1:
+                logger.info(f"End actions with {next_state=}")
+                next_state = None
+        self._set_state(next_state)
 
     async def _act(self) -> Message:
         # prompt = self.get_prefix()
@@ -203,10 +235,42 @@ class Role:
         self._rc.env.publish_message(msg)
 
     async def _react(self) -> Message:
-        """Think first, then act"""
-        await self._think()
-        logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
-        return await self._act()
+        """Think first, then act, until the Role _think it is time to stop and requires no more todo.
+        This is the standard think-act loop in the ReAct paper, which alternates thinking and acting in task solving, i.e. _think -> _act -> _think -> _act -> ... 
+        Use llm to select actions in _think dynamically
+        """
+        actions_taken = 0
+        while actions_taken < self.max_react_loop:
+            # think
+            await self._think()
+            if self._rc.todo is None:
+                break
+            # act
+            logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
+            rsp = await self._act()
+            actions_taken += 1
+        return rsp # return output from the last action
+
+    async def _act_by_order(self) -> Message:
+        """switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ..."""
+        for i in range(len(self._states)):
+            self._set_state(i)
+            rsp = await self._act()
+        return rsp # return output from the last action
+
+    async def _plan_and_act(self) -> Message:
+        """first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ... Use llm to come up with the plan dynamically."""
+        # TODO: to be implemented
+        return Message("")
+
+    async def react(self) -> Message:
+        """Entry to one of three strategies by which Role reacts to the observed Message"""
+        if self.react_mode == "react":
+            return await self._react()
+        elif self.react_mode == "by_order":
+            return await self._act_by_order()
+        elif self.react_mode == "plan_and_act":
+            return await self._plan_and_act()
 
     def recv(self, message: Message) -> None:
         """add message to history."""
@@ -223,6 +287,10 @@ class Role:
 
         return await self._react()
 
+    def get_memories(self, k=0) -> list[Message]:
+        """A wrapper to return the most recent k memories of this role, return all when k=0"""
+        return self._rc.memory.get(k=k)
+
     async def run(self, message=None):
         """Observe, and think and act based on the results of the observation"""
         if message:
@@ -237,7 +305,7 @@ class Role:
             logger.debug(f"{self._setting}: no news. waiting.")
             return
 
-        rsp = await self._react()
+        rsp = await self.react()
         # Publish the reply to the environment, waiting for the next subscriber to process
         self._publish_message(rsp)
         return rsp

From c6350efd7f9df1409454d3c3bd56039885d0964a Mon Sep 17 00:00:00 2001
From: garylin2099 <garylin2099@gmail.com>
Date: Wed, 8 Nov 2023 14:20:12 +0800
Subject: [PATCH 2/2] default state to -1, mv react_mode to rc, use enum

---
 examples/build_customized_agent.py |  2 +-
 metagpt/roles/role.py              | 61 ++++++++++++++++++------------
 2 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/examples/build_customized_agent.py b/examples/build_customized_agent.py
index 2bc9e31e5..c7069b768 100644
--- a/examples/build_customized_agent.py
+++ b/examples/build_customized_agent.py
@@ -104,7 +104,7 @@ class RunnableCoder(Role):
         self._rc.memory.add(msg)
         return msg
 
-def main(msg="write a function that calculates the product of a list"):
+def main(msg="write a function that calculates the product of a list and run it"):
     # role = SimpleCoder()
     role = RunnableCoder()
     logger.info(msg)
diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py
index 88b98f4b4..0251176f7 100644
--- a/metagpt/roles/role.py
+++ b/metagpt/roles/role.py
@@ -8,6 +8,7 @@
 from __future__ import annotations
 
 from typing import Iterable, Type, Union
+from enum import Enum
 
 from pydantic import BaseModel, Field
 
@@ -34,7 +35,6 @@ Now choose one of the following stages you need to go to in the next step:
 
 Just answer a number between 0-{n_states}, choose the most suitable stage according to the understanding of the conversation.
 Please note that the answer only needs a number, no need to add any other text.
-If there is no conversation record or your previous stage is None, choose 0.
 If you think you have completed your goal and don't need to go to any of the stages, return -1.
 Do not answer anything else, and do not add any other information in your answer.
 """
@@ -49,6 +49,14 @@ ROLE_TEMPLATE = """Your response should be based on the previous conversation hi
 {name}: {result}
 """
 
+class RoleReactMode(str, Enum):
+    REACT = "react"
+    BY_ORDER = "by_order"
+    PLAN_AND_ACT = "plan_and_act"
+
+    @classmethod
+    def values(cls):
+        return [item.value for item in cls]
 
 class RoleSetting(BaseModel):
     """Role Settings"""
@@ -70,10 +78,12 @@ class RoleContext(BaseModel):
     env: 'Environment' = Field(default=None)
     memory: Memory = Field(default_factory=Memory)
     long_term_memory: LongTermMemory = Field(default_factory=LongTermMemory)
-    state: int = Field(default=None)
+    state: int = Field(default=-1) # -1 indicates initial or termination state where todo is None
     todo: Action = Field(default=None)
     watch: set[Type[Action]] = Field(default_factory=set)
     news: list[Type[Message]] = Field(default=[])
+    react_mode: RoleReactMode = RoleReactMode.REACT # see `Role._set_react_mode` for definitions of the following two attributes
+    max_react_loop: int = 1
 
     class Config:
         arbitrary_types_allowed = True
@@ -103,9 +113,6 @@ class Role:
         self._actions = []
         self._role_id = str(self._setting)
         self._rc = RoleContext()
-        # see `_set_react_mode` function for definitions of the following two attributes
-        self.react_mode = "react"
-        self.max_react_loop = 1
 
     def _reset(self):
         self._states = []
@@ -127,20 +134,21 @@ class Role:
         this Role elects action to perform during the _think stage, especially if it is capable of multiple Actions.
 
         Args:
-            react_mode (str): Mode for choosing action during the _think stage, can be one of
-                        "by_order": switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ...;
-                        "react": standard think-act loop in the ReAct paper, alternating thinking and acting to solve the task, i.e. _think -> _act -> _think -> _act -> ... 
+            react_mode (str): Mode for choosing action during the _think stage, can be one of:
+                        "react": standard think-act loop in the ReAct paper, alternating thinking and acting to solve the task, i.e. _think -> _act -> _think -> _act -> ...
                                  Use llm to select actions in _think dynamically;
+                        "by_order": switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ...;
                         "plan_and_act": first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ...
                                         Use llm to come up with the plan dynamically.
-                        Defaults to "by_order".
+                        Defaults to "react".
             max_react_loop (int): Maximum react cycles to execute, used to prevent the agent from reacting forever.
                                   Take effect only when react_mode is react, in which we use llm to choose actions, including termination.
                                   Defaults to 1, i.e. _think -> _act (-> return result and end)
         """
-        self.react_mode = react_mode
-        if react_mode == "react":
-            self.max_react_loop = max_react_loop
+        assert react_mode in RoleReactMode.values(), f"react_mode must be one of {RoleReactMode.values()}"
+        self._rc.react_mode = react_mode
+        if react_mode == RoleReactMode.REACT:
+            self._rc.max_react_loop = max_react_loop
 
     def _watch(self, actions: Iterable[Type[Action]]):
         """Listen to the corresponding behaviors"""
@@ -148,11 +156,11 @@ class Role:
         # check RoleContext after adding watch actions
         self._rc.check(self._role_id)
 
-    def _set_state(self, state: Union[int, None]):
+    def _set_state(self, state: int):
         """Update the current state."""
         self._rc.state = state
         logger.debug(self._actions)
-        self._rc.todo = self._actions[self._rc.state] if state is not None else None
+        self._rc.todo = self._actions[self._rc.state] if state >= 0 else None
 
     def set_env(self, env: 'Environment'):
         """Set the environment in which the role works. The role can talk to the environment and can also receive messages by observing."""
@@ -181,14 +189,14 @@ class Role:
         # print(prompt)
         next_state = await self._llm.aask(prompt)
         logger.debug(f"{prompt=}")
-        if not next_state.isdigit() or int(next_state) not in range(-1, len(self._states)):
-            logger.warning(f'Invalid answer of state, {next_state=}')
-            next_state = None
+        if (not next_state.isdigit() and next_state != "-1") \
+            or int(next_state) not in range(-1, len(self._states)):
+            logger.warning(f'Invalid answer of state, {next_state=}, will be set to -1')
+            next_state = -1
         else:
             next_state = int(next_state)
             if next_state == -1:
                 logger.info(f"End actions with {next_state=}")
-                next_state = None
         self._set_state(next_state)
 
     async def _act(self) -> Message:
@@ -240,7 +248,8 @@ class Role:
         Use llm to select actions in _think dynamically
         """
         actions_taken = 0
-        while actions_taken < self.max_react_loop:
+        rsp = Message("No actions taken yet") # will be overwritten after Role _act
+        while actions_taken < self._rc.max_react_loop:
             # think
             await self._think()
             if self._rc.todo is None:
@@ -265,12 +274,14 @@ class Role:
 
     async def react(self) -> Message:
         """Entry to one of three strategies by which Role reacts to the observed Message"""
-        if self.react_mode == "react":
-            return await self._react()
-        elif self.react_mode == "by_order":
-            return await self._act_by_order()
-        elif self.react_mode == "plan_and_act":
-            return await self._plan_and_act()
+        if self._rc.react_mode == RoleReactMode.REACT:
+            rsp = await self._react()
+        elif self._rc.react_mode == RoleReactMode.BY_ORDER:
+            rsp = await self._act_by_order()
+        elif self._rc.react_mode == RoleReactMode.PLAN_AND_ACT:
+            rsp = await self._plan_and_act()
+        self._set_state(state=-1) # current reaction is complete, reset state to -1 and todo back to None
+        return rsp
 
     def recv(self, message: Message) -> None:
         """add message to history."""