Merge pull request #479 from garylin2099/general_react

redefine react and provide multiple react modes
2026-04-30 11:26:23 +02:00 · 2023-11-08 19:04:57 +08:00 · 2023-11-08 19:04:57 +08:00 · f404abdeb0
commit f404abdeb0
parent 0a57bad742 c6350efd7f
4 changed files with 106 additions and 88 deletions
--- a/examples/build_customized_agent.py
+++ b/examples/build_customized_agent.py
@ -75,10 +75,9 @@ class SimpleCoder(Role):
        logger.info(f"{self._setting}: ready to {self._rc.todo}")
        todo = self._rc.todo

-        msg = self._rc.memory.get()[-1] # retrieve the latest memory
-        instruction = msg.content
+        msg = self.get_memories(k=1)[0] # find the most recent messages

-        code_text = await SimpleWriteCode().run(instruction)
+        code_text = await SimpleWriteCode().run(msg.content)
        msg = Message(content=code_text, role=self.profile, cause_by=todo)

        return msg
@ -92,43 +91,20 @@ class RunnableCoder(Role):
    ):
        super().__init__(name, profile, **kwargs)
        self._init_actions([SimpleWriteCode, SimpleRunCode])
-
-    async def _think(self) -> None:
-        if self._rc.todo is None:
-            self._set_state(0)
-            return
-
-        if self._rc.state + 1 < len(self._states):
-            self._set_state(self._rc.state + 1)
-        else:
-            self._rc.todo = None
+        self._set_react_mode(react_mode="by_order")

    async def _act(self) -> Message:
        logger.info(f"{self._setting}: ready to {self._rc.todo}")
        todo = self._rc.todo
-        msg = self._rc.memory.get()[-1]

-        if isinstance(todo, SimpleWriteCode):
-            instruction = msg.content
-            result = await SimpleWriteCode().run(instruction)
+        msg = self.get_memories(k=1)[0] # find the most k recent messages
+        result = await todo.run(msg.content)

-        elif isinstance(todo, SimpleRunCode):
-            code_text = msg.content
-            result = await SimpleRunCode().run(code_text)
-
-        msg = Message(content=result, role=self.profile, cause_by=todo)
+        msg = Message(content=result, role=self.profile, cause_by=type(todo))
        self._rc.memory.add(msg)
        return msg

-    async def _react(self) -> Message:
-        while True:
-            await self._think()
-            if self._rc.todo is None:
-                break
-            await self._act()
-        return Message(content="All job done", role=self.profile)
-
-def main(msg="write a function that calculates the sum of a list"):
+def main(msg="write a function that calculates the product of a list and run it"):
    # role = SimpleCoder()
    role = RunnableCoder()
    logger.info(msg)
--- a/metagpt/roles/invoice_ocr_assistant.py
+++ b/metagpt/roles/invoice_ocr_assistant.py
@ -42,17 +42,7 @@ class InvoiceOCRAssistant(Role):
        self.filename = ""
        self.origin_query = ""
        self.orc_data = None
-
-    async def _think(self) -> None:
-        """Determine the next action to be taken by the role."""
-        if self._rc.todo is None:
-            self._set_state(0)
-            return
-
-        if self._rc.state + 1 < len(self._states):
-            self._set_state(self._rc.state + 1)
-        else:
-            self._rc.todo = None
+        self._set_react_mode(react_mode="by_order")

    async def _act(self) -> Message:
        """Perform an action as determined by the role.
@ -94,17 +84,3 @@ class InvoiceOCRAssistant(Role):
        msg = Message(content=content, instruct_content=resp)
        self._rc.memory.add(msg)
        return msg
-
-    async def _react(self) -> Message:
-        """Execute the invoice ocr assistant's think and actions.
-
-        Returns:
-            A message containing the final result of the assistant's actions.
-        """
-        while True:
-            await self._think()
-            if self._rc.todo is None:
-                break
-            msg = await self._act()
-        return msg
-
--- a/metagpt/roles/researcher.py
+++ b/metagpt/roles/researcher.py
@ -31,20 +31,11 @@ class Researcher(Role):
    ):
        super().__init__(name, profile, goal, constraints, **kwargs)
        self._init_actions([CollectLinks(name), WebBrowseAndSummarize(name), ConductResearch(name)])
+        self._set_react_mode(react_mode="by_order")
        self.language = language
        if language not in ("en-us", "zh-cn"):
            logger.warning(f"The language `{language}` has not been tested, it may not work.")

-    async def _think(self) -> None:
-        if self._rc.todo is None:
-            self._set_state(0)
-            return
-
-        if self._rc.state + 1 < len(self._states):
-            self._set_state(self._rc.state + 1)
-        else:
-            self._rc.todo = None
-
    async def _act(self) -> Message:
        logger.info(f"{self._setting}: ready to {self._rc.todo}")
        todo = self._rc.todo
@ -73,12 +64,8 @@ class Researcher(Role):
        self._rc.memory.add(ret)
        return ret

-    async def _react(self) -> Message:
-        while True:
-            await self._think()
-            if self._rc.todo is None:
-                break
-            msg = await self._act()
+    async def react(self) -> Message:
+        msg = await super().react()
        report = msg.instruct_content
        self.write_report(report.topic, report.content)
        return msg
--- a/metagpt/roles/role.py
+++ b/metagpt/roles/role.py
@ -7,7 +7,8 @@
 """
 from __future__ import annotations

-from typing import Iterable, Type
+from typing import Iterable, Type, Union
+from enum import Enum

 from pydantic import BaseModel, Field

@ -27,12 +28,14 @@ Please note that only the text between the first and second "===" is information
 {history}
 ===

-You can now choose one of the following stages to decide the stage you need to go in the next step:
+Your previous stage: {previous_state}
+
+Now choose one of the following stages you need to go to in the next step:
 {states}

 Just answer a number between 0-{n_states}, choose the most suitable stage according to the understanding of the conversation.
 Please note that the answer only needs a number, no need to add any other text.
-If there is no conversation record, choose 0.
+If you think you have completed your goal and don't need to go to any of the stages, return -1.
 Do not answer anything else, and do not add any other information in your answer.
 """

@ -46,6 +49,14 @@ ROLE_TEMPLATE = """Your response should be based on the previous conversation hi
 {name}: {result}
 """

+class RoleReactMode(str, Enum):
+    REACT = "react"
+    BY_ORDER = "by_order"
+    PLAN_AND_ACT = "plan_and_act"
+
+    @classmethod
+    def values(cls):
+        return [item.value for item in cls]

 class RoleSetting(BaseModel):
    """Role Settings"""
@ -67,10 +78,12 @@ class RoleContext(BaseModel):
    env: 'Environment' = Field(default=None)
    memory: Memory = Field(default_factory=Memory)
    long_term_memory: LongTermMemory = Field(default_factory=LongTermMemory)
-    state: int = Field(default=0)
+    state: int = Field(default=-1) # -1 indicates initial or termination state where todo is None
    todo: Action = Field(default=None)
    watch: set[Type[Action]] = Field(default_factory=set)
    news: list[Type[Message]] = Field(default=[])
+    react_mode: RoleReactMode = RoleReactMode.REACT # see `Role._set_react_mode` for definitions of the following two attributes
+    max_react_loop: int = 1

    class Config:
        arbitrary_types_allowed = True
@ -116,17 +129,38 @@ class Role:
            self._actions.append(i)
            self._states.append(f"{idx}. {action}")

+    def _set_react_mode(self, react_mode: str, max_react_loop: int = 1):
+        """Set strategy of the Role reacting to observed Message. Variation lies in how
+        this Role elects action to perform during the _think stage, especially if it is capable of multiple Actions.
+
+        Args:
+            react_mode (str): Mode for choosing action during the _think stage, can be one of:
+                        "react": standard think-act loop in the ReAct paper, alternating thinking and acting to solve the task, i.e. _think -> _act -> _think -> _act -> ...
+                                 Use llm to select actions in _think dynamically;
+                        "by_order": switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ...;
+                        "plan_and_act": first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ...
+                                        Use llm to come up with the plan dynamically.
+                        Defaults to "react".
+            max_react_loop (int): Maximum react cycles to execute, used to prevent the agent from reacting forever.
+                                  Take effect only when react_mode is react, in which we use llm to choose actions, including termination.
+                                  Defaults to 1, i.e. _think -> _act (-> return result and end)
+        """
+        assert react_mode in RoleReactMode.values(), f"react_mode must be one of {RoleReactMode.values()}"
+        self._rc.react_mode = react_mode
+        if react_mode == RoleReactMode.REACT:
+            self._rc.max_react_loop = max_react_loop
+
    def _watch(self, actions: Iterable[Type[Action]]):
        """Listen to the corresponding behaviors"""
        self._rc.watch.update(actions)
        # check RoleContext after adding watch actions
        self._rc.check(self._role_id)

-    def _set_state(self, state):
+    def _set_state(self, state: int):
        """Update the current state."""
        self._rc.state = state
        logger.debug(self._actions)
-        self._rc.todo = self._actions[self._rc.state]
+        self._rc.todo = self._actions[self._rc.state] if state >= 0 else None

    def set_env(self, env: 'Environment'):
        """Set the environment in which the role works. The role can talk to the environment and can also receive messages by observing."""
@ -151,13 +185,19 @@ class Role:
            return
        prompt = self._get_prefix()
        prompt += STATE_TEMPLATE.format(history=self._rc.history, states="\n".join(self._states),
-                                        n_states=len(self._states) - 1)
+                                        n_states=len(self._states) - 1, previous_state=self._rc.state)
+        # print(prompt)
        next_state = await self._llm.aask(prompt)
        logger.debug(f"{prompt=}")
-        if not next_state.isdigit() or int(next_state) not in range(len(self._states)):
-            logger.warning(f'Invalid answer of state, {next_state=}')
-            next_state = "0"
-        self._set_state(int(next_state))
+        if (not next_state.isdigit() and next_state != "-1") \
+            or int(next_state) not in range(-1, len(self._states)):
+            logger.warning(f'Invalid answer of state, {next_state=}, will be set to -1')
+            next_state = -1
+        else:
+            next_state = int(next_state)
+            if next_state == -1:
+                logger.info(f"End actions with {next_state=}")
+        self._set_state(next_state)

    async def _act(self) -> Message:
        # prompt = self.get_prefix()
@ -203,10 +243,45 @@ class Role:
        self._rc.env.publish_message(msg)

    async def _react(self) -> Message:
-        """Think first, then act"""
-        await self._think()
-        logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
-        return await self._act()
+        """Think first, then act, until the Role _think it is time to stop and requires no more todo.
+        This is the standard think-act loop in the ReAct paper, which alternates thinking and acting in task solving, i.e. _think -> _act -> _think -> _act -> ... 
+        Use llm to select actions in _think dynamically
+        """
+        actions_taken = 0
+        rsp = Message("No actions taken yet") # will be overwritten after Role _act
+        while actions_taken < self._rc.max_react_loop:
+            # think
+            await self._think()
+            if self._rc.todo is None:
+                break
+            # act
+            logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
+            rsp = await self._act()
+            actions_taken += 1
+        return rsp # return output from the last action
+
+    async def _act_by_order(self) -> Message:
+        """switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ..."""
+        for i in range(len(self._states)):
+            self._set_state(i)
+            rsp = await self._act()
+        return rsp # return output from the last action
+
+    async def _plan_and_act(self) -> Message:
+        """first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ... Use llm to come up with the plan dynamically."""
+        # TODO: to be implemented
+        return Message("")
+
+    async def react(self) -> Message:
+        """Entry to one of three strategies by which Role reacts to the observed Message"""
+        if self._rc.react_mode == RoleReactMode.REACT:
+            rsp = await self._react()
+        elif self._rc.react_mode == RoleReactMode.BY_ORDER:
+            rsp = await self._act_by_order()
+        elif self._rc.react_mode == RoleReactMode.PLAN_AND_ACT:
+            rsp = await self._plan_and_act()
+        self._set_state(state=-1) # current reaction is complete, reset state to -1 and todo back to None
+        return rsp

    def recv(self, message: Message) -> None:
        """add message to history."""
@ -223,6 +298,10 @@ class Role:

        return await self._react()

+    def get_memories(self, k=0) -> list[Message]:
+        """A wrapper to return the most recent k memories of this role, return all when k=0"""
+        return self._rc.memory.get(k=k)
+
    async def run(self, message=None):
        """Observe, and think and act based on the results of the observation"""
        if message:
@ -237,7 +316,7 @@ class Role:
            logger.debug(f"{self._setting}: no news. waiting.")
            return

-        rsp = await self._react()
+        rsp = await self.react()
        # Publish the reply to the environment, waiting for the next subscriber to process
        self._publish_message(rsp)
        return rsp