diff --git a/examples/build_customized_agent.py b/examples/build_customized_agent.py index 87d7a9c76..c7069b768 100644 --- a/examples/build_customized_agent.py +++ b/examples/build_customized_agent.py @@ -75,10 +75,9 @@ class SimpleCoder(Role): logger.info(f"{self._setting}: ready to {self._rc.todo}") todo = self._rc.todo - msg = self._rc.memory.get()[-1] # retrieve the latest memory - instruction = msg.content + msg = self.get_memories(k=1)[0] # find the most recent messages - code_text = await SimpleWriteCode().run(instruction) + code_text = await SimpleWriteCode().run(msg.content) msg = Message(content=code_text, role=self.profile, cause_by=todo) return msg @@ -92,43 +91,20 @@ class RunnableCoder(Role): ): super().__init__(name, profile, **kwargs) self._init_actions([SimpleWriteCode, SimpleRunCode]) - - async def _think(self) -> None: - if self._rc.todo is None: - self._set_state(0) - return - - if self._rc.state + 1 < len(self._states): - self._set_state(self._rc.state + 1) - else: - self._rc.todo = None + self._set_react_mode(react_mode="by_order") async def _act(self) -> Message: logger.info(f"{self._setting}: ready to {self._rc.todo}") todo = self._rc.todo - msg = self._rc.memory.get()[-1] - if isinstance(todo, SimpleWriteCode): - instruction = msg.content - result = await SimpleWriteCode().run(instruction) + msg = self.get_memories(k=1)[0] # find the most k recent messages + result = await todo.run(msg.content) - elif isinstance(todo, SimpleRunCode): - code_text = msg.content - result = await SimpleRunCode().run(code_text) - - msg = Message(content=result, role=self.profile, cause_by=todo) + msg = Message(content=result, role=self.profile, cause_by=type(todo)) self._rc.memory.add(msg) return msg - async def _react(self) -> Message: - while True: - await self._think() - if self._rc.todo is None: - break - await self._act() - return Message(content="All job done", role=self.profile) - -def main(msg="write a function that calculates the sum of a list"): +def main(msg="write a function that calculates the product of a list and run it"): # role = SimpleCoder() role = RunnableCoder() logger.info(msg) diff --git a/metagpt/roles/invoice_ocr_assistant.py b/metagpt/roles/invoice_ocr_assistant.py index c307b20c0..15f831c97 100644 --- a/metagpt/roles/invoice_ocr_assistant.py +++ b/metagpt/roles/invoice_ocr_assistant.py @@ -42,17 +42,7 @@ class InvoiceOCRAssistant(Role): self.filename = "" self.origin_query = "" self.orc_data = None - - async def _think(self) -> None: - """Determine the next action to be taken by the role.""" - if self._rc.todo is None: - self._set_state(0) - return - - if self._rc.state + 1 < len(self._states): - self._set_state(self._rc.state + 1) - else: - self._rc.todo = None + self._set_react_mode(react_mode="by_order") async def _act(self) -> Message: """Perform an action as determined by the role. @@ -94,17 +84,3 @@ class InvoiceOCRAssistant(Role): msg = Message(content=content, instruct_content=resp) self._rc.memory.add(msg) return msg - - async def _react(self) -> Message: - """Execute the invoice ocr assistant's think and actions. - - Returns: - A message containing the final result of the assistant's actions. - """ - while True: - await self._think() - if self._rc.todo is None: - break - msg = await self._act() - return msg - diff --git a/metagpt/roles/researcher.py b/metagpt/roles/researcher.py index acb46c718..c5512121a 100644 --- a/metagpt/roles/researcher.py +++ b/metagpt/roles/researcher.py @@ -31,20 +31,11 @@ class Researcher(Role): ): super().__init__(name, profile, goal, constraints, **kwargs) self._init_actions([CollectLinks(name), WebBrowseAndSummarize(name), ConductResearch(name)]) + self._set_react_mode(react_mode="by_order") self.language = language if language not in ("en-us", "zh-cn"): logger.warning(f"The language `{language}` has not been tested, it may not work.") - async def _think(self) -> None: - if self._rc.todo is None: - self._set_state(0) - return - - if self._rc.state + 1 < len(self._states): - self._set_state(self._rc.state + 1) - else: - self._rc.todo = None - async def _act(self) -> Message: logger.info(f"{self._setting}: ready to {self._rc.todo}") todo = self._rc.todo @@ -73,12 +64,8 @@ class Researcher(Role): self._rc.memory.add(ret) return ret - async def _react(self) -> Message: - while True: - await self._think() - if self._rc.todo is None: - break - msg = await self._act() + async def react(self) -> Message: + msg = await super().react() report = msg.instruct_content self.write_report(report.topic, report.content) return msg diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py index 44bb3e976..0251176f7 100644 --- a/metagpt/roles/role.py +++ b/metagpt/roles/role.py @@ -7,7 +7,8 @@ """ from __future__ import annotations -from typing import Iterable, Type +from typing import Iterable, Type, Union +from enum import Enum from pydantic import BaseModel, Field @@ -27,12 +28,14 @@ Please note that only the text between the first and second "===" is information {history} === -You can now choose one of the following stages to decide the stage you need to go in the next step: +Your previous stage: {previous_state} + +Now choose one of the following stages you need to go to in the next step: {states} Just answer a number between 0-{n_states}, choose the most suitable stage according to the understanding of the conversation. Please note that the answer only needs a number, no need to add any other text. -If there is no conversation record, choose 0. +If you think you have completed your goal and don't need to go to any of the stages, return -1. Do not answer anything else, and do not add any other information in your answer. """ @@ -46,6 +49,14 @@ ROLE_TEMPLATE = """Your response should be based on the previous conversation hi {name}: {result} """ +class RoleReactMode(str, Enum): + REACT = "react" + BY_ORDER = "by_order" + PLAN_AND_ACT = "plan_and_act" + + @classmethod + def values(cls): + return [item.value for item in cls] class RoleSetting(BaseModel): """Role Settings""" @@ -67,10 +78,12 @@ class RoleContext(BaseModel): env: 'Environment' = Field(default=None) memory: Memory = Field(default_factory=Memory) long_term_memory: LongTermMemory = Field(default_factory=LongTermMemory) - state: int = Field(default=0) + state: int = Field(default=-1) # -1 indicates initial or termination state where todo is None todo: Action = Field(default=None) watch: set[Type[Action]] = Field(default_factory=set) news: list[Type[Message]] = Field(default=[]) + react_mode: RoleReactMode = RoleReactMode.REACT # see `Role._set_react_mode` for definitions of the following two attributes + max_react_loop: int = 1 class Config: arbitrary_types_allowed = True @@ -116,17 +129,38 @@ class Role: self._actions.append(i) self._states.append(f"{idx}. {action}") + def _set_react_mode(self, react_mode: str, max_react_loop: int = 1): + """Set strategy of the Role reacting to observed Message. Variation lies in how + this Role elects action to perform during the _think stage, especially if it is capable of multiple Actions. + + Args: + react_mode (str): Mode for choosing action during the _think stage, can be one of: + "react": standard think-act loop in the ReAct paper, alternating thinking and acting to solve the task, i.e. _think -> _act -> _think -> _act -> ... + Use llm to select actions in _think dynamically; + "by_order": switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ...; + "plan_and_act": first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ... + Use llm to come up with the plan dynamically. + Defaults to "react". + max_react_loop (int): Maximum react cycles to execute, used to prevent the agent from reacting forever. + Take effect only when react_mode is react, in which we use llm to choose actions, including termination. + Defaults to 1, i.e. _think -> _act (-> return result and end) + """ + assert react_mode in RoleReactMode.values(), f"react_mode must be one of {RoleReactMode.values()}" + self._rc.react_mode = react_mode + if react_mode == RoleReactMode.REACT: + self._rc.max_react_loop = max_react_loop + def _watch(self, actions: Iterable[Type[Action]]): """Listen to the corresponding behaviors""" self._rc.watch.update(actions) # check RoleContext after adding watch actions self._rc.check(self._role_id) - def _set_state(self, state): + def _set_state(self, state: int): """Update the current state.""" self._rc.state = state logger.debug(self._actions) - self._rc.todo = self._actions[self._rc.state] + self._rc.todo = self._actions[self._rc.state] if state >= 0 else None def set_env(self, env: 'Environment'): """Set the environment in which the role works. The role can talk to the environment and can also receive messages by observing.""" @@ -151,13 +185,19 @@ class Role: return prompt = self._get_prefix() prompt += STATE_TEMPLATE.format(history=self._rc.history, states="\n".join(self._states), - n_states=len(self._states) - 1) + n_states=len(self._states) - 1, previous_state=self._rc.state) + # print(prompt) next_state = await self._llm.aask(prompt) logger.debug(f"{prompt=}") - if not next_state.isdigit() or int(next_state) not in range(len(self._states)): - logger.warning(f'Invalid answer of state, {next_state=}') - next_state = "0" - self._set_state(int(next_state)) + if (not next_state.isdigit() and next_state != "-1") \ + or int(next_state) not in range(-1, len(self._states)): + logger.warning(f'Invalid answer of state, {next_state=}, will be set to -1') + next_state = -1 + else: + next_state = int(next_state) + if next_state == -1: + logger.info(f"End actions with {next_state=}") + self._set_state(next_state) async def _act(self) -> Message: # prompt = self.get_prefix() @@ -203,10 +243,45 @@ class Role: self._rc.env.publish_message(msg) async def _react(self) -> Message: - """Think first, then act""" - await self._think() - logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}") - return await self._act() + """Think first, then act, until the Role _think it is time to stop and requires no more todo. + This is the standard think-act loop in the ReAct paper, which alternates thinking and acting in task solving, i.e. _think -> _act -> _think -> _act -> ... + Use llm to select actions in _think dynamically + """ + actions_taken = 0 + rsp = Message("No actions taken yet") # will be overwritten after Role _act + while actions_taken < self._rc.max_react_loop: + # think + await self._think() + if self._rc.todo is None: + break + # act + logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}") + rsp = await self._act() + actions_taken += 1 + return rsp # return output from the last action + + async def _act_by_order(self) -> Message: + """switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ...""" + for i in range(len(self._states)): + self._set_state(i) + rsp = await self._act() + return rsp # return output from the last action + + async def _plan_and_act(self) -> Message: + """first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ... Use llm to come up with the plan dynamically.""" + # TODO: to be implemented + return Message("") + + async def react(self) -> Message: + """Entry to one of three strategies by which Role reacts to the observed Message""" + if self._rc.react_mode == RoleReactMode.REACT: + rsp = await self._react() + elif self._rc.react_mode == RoleReactMode.BY_ORDER: + rsp = await self._act_by_order() + elif self._rc.react_mode == RoleReactMode.PLAN_AND_ACT: + rsp = await self._plan_and_act() + self._set_state(state=-1) # current reaction is complete, reset state to -1 and todo back to None + return rsp def recv(self, message: Message) -> None: """add message to history.""" @@ -223,6 +298,10 @@ class Role: return await self._react() + def get_memories(self, k=0) -> list[Message]: + """A wrapper to return the most recent k memories of this role, return all when k=0""" + return self._rc.memory.get(k=k) + async def run(self, message=None): """Observe, and think and act based on the results of the observation""" if message: @@ -237,7 +316,7 @@ class Role: logger.debug(f"{self._setting}: no news. waiting.") return - rsp = await self._react() + rsp = await self.react() # Publish the reply to the environment, waiting for the next subscriber to process self._publish_message(rsp) return rsp