redefine react and provide multiple react modes

2026-06-20 15:38:09 +02:00 · 2023-11-06 14:37:29 +08:00 · 2023-11-06 14:37:29 +08:00 · 250fe83de9
commit 250fe83de9
parent ccc4c9e04d
4 changed files with 94 additions and 87 deletions
--- a/metagpt/roles/invoice_ocr_assistant.py
+++ b/metagpt/roles/invoice_ocr_assistant.py
@ -42,17 +42,7 @@ class InvoiceOCRAssistant(Role):
        self.filename = ""
        self.origin_query = ""
        self.orc_data = None
-
-    async def _think(self) -> None:
-        """Determine the next action to be taken by the role."""
-        if self._rc.todo is None:
-            self._set_state(0)
-            return
-
-        if self._rc.state + 1 < len(self._states):
-            self._set_state(self._rc.state + 1)
-        else:
-            self._rc.todo = None
+        self._set_react_mode(react_mode="by_order")

    async def _act(self) -> Message:
        """Perform an action as determined by the role.
@ -94,17 +84,3 @@ class InvoiceOCRAssistant(Role):
        msg = Message(content=content, instruct_content=resp)
        self._rc.memory.add(msg)
        return msg
-
-    async def _react(self) -> Message:
-        """Execute the invoice ocr assistant's think and actions.
-
-        Returns:
-            A message containing the final result of the assistant's actions.
-        """
-        while True:
-            await self._think()
-            if self._rc.todo is None:
-                break
-            msg = await self._act()
-        return msg
-
--- a/metagpt/roles/researcher.py
+++ b/metagpt/roles/researcher.py
@ -31,20 +31,11 @@ class Researcher(Role):
    ):
        super().__init__(name, profile, goal, constraints, **kwargs)
        self._init_actions([CollectLinks(name), WebBrowseAndSummarize(name), ConductResearch(name)])
+        self._set_react_mode(react_mode="by_order")
        self.language = language
        if language not in ("en-us", "zh-cn"):
            logger.warning(f"The language `{language}` has not been tested, it may not work.")

-    async def _think(self) -> None:
-        if self._rc.todo is None:
-            self._set_state(0)
-            return
-
-        if self._rc.state + 1 < len(self._states):
-            self._set_state(self._rc.state + 1)
-        else:
-            self._rc.todo = None
-
    async def _act(self) -> Message:
        logger.info(f"{self._setting}: ready to {self._rc.todo}")
        todo = self._rc.todo
@ -73,12 +64,8 @@ class Researcher(Role):
        self._rc.memory.add(ret)
        return ret

-    async def _react(self) -> Message:
-        while True:
-            await self._think()
-            if self._rc.todo is None:
-                break
-            msg = await self._act()
+    async def react(self) -> Message:
+        msg = await super().react()
        report = msg.instruct_content
        self.write_report(report.topic, report.content)
        return msg
--- a/metagpt/roles/role.py
+++ b/metagpt/roles/role.py
@ -7,7 +7,7 @@
 """
 from __future__ import annotations

-from typing import Iterable, Type
+from typing import Iterable, Type, Union

 from pydantic import BaseModel, Field

@ -27,12 +27,15 @@ Please note that only the text between the first and second "===" is information
 {history}
 ===

-You can now choose one of the following stages to decide the stage you need to go in the next step:
+Your previous stage: {previous_state}
+
+Now choose one of the following stages you need to go to in the next step:
 {states}

 Just answer a number between 0-{n_states}, choose the most suitable stage according to the understanding of the conversation.
 Please note that the answer only needs a number, no need to add any other text.
-If there is no conversation record, choose 0.
+If there is no conversation record or your previous stage is None, choose 0.
+If you think you have completed your goal and don't need to go to any of the stages, return -1.
 Do not answer anything else, and do not add any other information in your answer.
 """

@ -67,7 +70,7 @@ class RoleContext(BaseModel):
    env: 'Environment' = Field(default=None)
    memory: Memory = Field(default_factory=Memory)
    long_term_memory: LongTermMemory = Field(default_factory=LongTermMemory)
-    state: int = Field(default=0)
+    state: int = Field(default=None)
    todo: Action = Field(default=None)
    watch: set[Type[Action]] = Field(default_factory=set)
    news: list[Type[Message]] = Field(default=[])
@ -100,6 +103,9 @@ class Role:
        self._actions = []
        self._role_id = str(self._setting)
        self._rc = RoleContext()
+        # see `_set_react_mode` function for definitions of the following two attributes
+        self.react_mode = "react"
+        self.max_react_loop = 1

    def _reset(self):
        self._states = []
@ -116,17 +122,37 @@ class Role:
            self._actions.append(i)
            self._states.append(f"{idx}. {action}")

+    def _set_react_mode(self, react_mode: str, max_react_loop: int = 1):
+        """Set strategy of the Role reacting to observed Message. Variation lies in how
+        this Role elects action to perform during the _think stage, especially if it is capable of multiple Actions.
+
+        Args:
+            react_mode (str): Mode for choosing action during the _think stage, can be one of
+                        "by_order": switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ...;
+                        "react": standard think-act loop in the ReAct paper, alternating thinking and acting to solve the task, i.e. _think -> _act -> _think -> _act -> ... 
+                                 Use llm to select actions in _think dynamically;
+                        "plan_and_act": first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ...
+                                        Use llm to come up with the plan dynamically.
+                        Defaults to "by_order".
+            max_react_loop (int): Maximum react cycles to execute, used to prevent the agent from reacting forever.
+                                  Take effect only when react_mode is react, in which we use llm to choose actions, including termination.
+                                  Defaults to 1, i.e. _think -> _act (-> return result and end)
+        """
+        self.react_mode = react_mode
+        if react_mode == "react":
+            self.max_react_loop = max_react_loop
+
    def _watch(self, actions: Iterable[Type[Action]]):
        """Listen to the corresponding behaviors"""
        self._rc.watch.update(actions)
        # check RoleContext after adding watch actions
        self._rc.check(self._role_id)

-    def _set_state(self, state):
+    def _set_state(self, state: Union[int, None]):
        """Update the current state."""
        self._rc.state = state
        logger.debug(self._actions)
-        self._rc.todo = self._actions[self._rc.state]
+        self._rc.todo = self._actions[self._rc.state] if state is not None else None

    def set_env(self, env: 'Environment'):
        """Set the environment in which the role works. The role can talk to the environment and can also receive messages by observing."""
@ -151,13 +177,19 @@ class Role:
            return
        prompt = self._get_prefix()
        prompt += STATE_TEMPLATE.format(history=self._rc.history, states="\n".join(self._states),
-                                        n_states=len(self._states) - 1)
+                                        n_states=len(self._states) - 1, previous_state=self._rc.state)
+        # print(prompt)
        next_state = await self._llm.aask(prompt)
        logger.debug(f"{prompt=}")
-        if not next_state.isdigit() or int(next_state) not in range(len(self._states)):
+        if not next_state.isdigit() or int(next_state) not in range(-1, len(self._states)):
            logger.warning(f'Invalid answer of state, {next_state=}')
-            next_state = "0"
-        self._set_state(int(next_state))
+            next_state = None
+        else:
+            next_state = int(next_state)
+            if next_state == -1:
+                logger.info(f"End actions with {next_state=}")
+                next_state = None
+        self._set_state(next_state)

    async def _act(self) -> Message:
        # prompt = self.get_prefix()
@ -203,10 +235,42 @@ class Role:
        self._rc.env.publish_message(msg)

    async def _react(self) -> Message:
-        """Think first, then act"""
-        await self._think()
-        logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
-        return await self._act()
+        """Think first, then act, until the Role _think it is time to stop and requires no more todo.
+        This is the standard think-act loop in the ReAct paper, which alternates thinking and acting in task solving, i.e. _think -> _act -> _think -> _act -> ... 
+        Use llm to select actions in _think dynamically
+        """
+        actions_taken = 0
+        while actions_taken < self.max_react_loop:
+            # think
+            await self._think()
+            if self._rc.todo is None:
+                break
+            # act
+            logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
+            rsp = await self._act()
+            actions_taken += 1
+        return rsp # return output from the last action
+
+    async def _act_by_order(self) -> Message:
+        """switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ..."""
+        for i in range(len(self._states)):
+            self._set_state(i)
+            rsp = await self._act()
+        return rsp # return output from the last action
+
+    async def _plan_and_act(self) -> Message:
+        """first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ... Use llm to come up with the plan dynamically."""
+        # TODO: to be implemented
+        return Message("")
+
+    async def react(self) -> Message:
+        """Entry to one of three strategies by which Role reacts to the observed Message"""
+        if self.react_mode == "react":
+            return await self._react()
+        elif self.react_mode == "by_order":
+            return await self._act_by_order()
+        elif self.react_mode == "plan_and_act":
+            return await self._plan_and_act()

    def recv(self, message: Message) -> None:
        """add message to history."""
@ -223,6 +287,10 @@ class Role:

        return await self._react()

+    def get_memories(self, k=0) -> list[Message]:
+        """A wrapper to return the most recent k memories of this role, return all when k=0"""
+        return self._rc.memory.get(k=k)
+
    async def run(self, message=None):
        """Observe, and think and act based on the results of the observation"""
        if message:
@ -237,7 +305,7 @@ class Role:
            logger.debug(f"{self._setting}: no news. waiting.")
            return

-        rsp = await self._react()
+        rsp = await self.react()
        # Publish the reply to the environment, waiting for the next subscriber to process
        self._publish_message(rsp)
        return rsp