mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-04-30 11:26:23 +02:00
Merge pull request #479 from garylin2099/general_react
redefine react and provide multiple react modes
This commit is contained in:
commit
f404abdeb0
4 changed files with 106 additions and 88 deletions
|
|
@ -75,10 +75,9 @@ class SimpleCoder(Role):
|
|||
logger.info(f"{self._setting}: ready to {self._rc.todo}")
|
||||
todo = self._rc.todo
|
||||
|
||||
msg = self._rc.memory.get()[-1] # retrieve the latest memory
|
||||
instruction = msg.content
|
||||
msg = self.get_memories(k=1)[0] # find the most recent messages
|
||||
|
||||
code_text = await SimpleWriteCode().run(instruction)
|
||||
code_text = await SimpleWriteCode().run(msg.content)
|
||||
msg = Message(content=code_text, role=self.profile, cause_by=todo)
|
||||
|
||||
return msg
|
||||
|
|
@ -92,43 +91,20 @@ class RunnableCoder(Role):
|
|||
):
|
||||
super().__init__(name, profile, **kwargs)
|
||||
self._init_actions([SimpleWriteCode, SimpleRunCode])
|
||||
|
||||
async def _think(self) -> None:
|
||||
if self._rc.todo is None:
|
||||
self._set_state(0)
|
||||
return
|
||||
|
||||
if self._rc.state + 1 < len(self._states):
|
||||
self._set_state(self._rc.state + 1)
|
||||
else:
|
||||
self._rc.todo = None
|
||||
self._set_react_mode(react_mode="by_order")
|
||||
|
||||
async def _act(self) -> Message:
|
||||
logger.info(f"{self._setting}: ready to {self._rc.todo}")
|
||||
todo = self._rc.todo
|
||||
msg = self._rc.memory.get()[-1]
|
||||
|
||||
if isinstance(todo, SimpleWriteCode):
|
||||
instruction = msg.content
|
||||
result = await SimpleWriteCode().run(instruction)
|
||||
msg = self.get_memories(k=1)[0] # find the most k recent messages
|
||||
result = await todo.run(msg.content)
|
||||
|
||||
elif isinstance(todo, SimpleRunCode):
|
||||
code_text = msg.content
|
||||
result = await SimpleRunCode().run(code_text)
|
||||
|
||||
msg = Message(content=result, role=self.profile, cause_by=todo)
|
||||
msg = Message(content=result, role=self.profile, cause_by=type(todo))
|
||||
self._rc.memory.add(msg)
|
||||
return msg
|
||||
|
||||
async def _react(self) -> Message:
|
||||
while True:
|
||||
await self._think()
|
||||
if self._rc.todo is None:
|
||||
break
|
||||
await self._act()
|
||||
return Message(content="All job done", role=self.profile)
|
||||
|
||||
def main(msg="write a function that calculates the sum of a list"):
|
||||
def main(msg="write a function that calculates the product of a list and run it"):
|
||||
# role = SimpleCoder()
|
||||
role = RunnableCoder()
|
||||
logger.info(msg)
|
||||
|
|
|
|||
|
|
@ -42,17 +42,7 @@ class InvoiceOCRAssistant(Role):
|
|||
self.filename = ""
|
||||
self.origin_query = ""
|
||||
self.orc_data = None
|
||||
|
||||
async def _think(self) -> None:
|
||||
"""Determine the next action to be taken by the role."""
|
||||
if self._rc.todo is None:
|
||||
self._set_state(0)
|
||||
return
|
||||
|
||||
if self._rc.state + 1 < len(self._states):
|
||||
self._set_state(self._rc.state + 1)
|
||||
else:
|
||||
self._rc.todo = None
|
||||
self._set_react_mode(react_mode="by_order")
|
||||
|
||||
async def _act(self) -> Message:
|
||||
"""Perform an action as determined by the role.
|
||||
|
|
@ -94,17 +84,3 @@ class InvoiceOCRAssistant(Role):
|
|||
msg = Message(content=content, instruct_content=resp)
|
||||
self._rc.memory.add(msg)
|
||||
return msg
|
||||
|
||||
async def _react(self) -> Message:
|
||||
"""Execute the invoice ocr assistant's think and actions.
|
||||
|
||||
Returns:
|
||||
A message containing the final result of the assistant's actions.
|
||||
"""
|
||||
while True:
|
||||
await self._think()
|
||||
if self._rc.todo is None:
|
||||
break
|
||||
msg = await self._act()
|
||||
return msg
|
||||
|
||||
|
|
|
|||
|
|
@ -31,20 +31,11 @@ class Researcher(Role):
|
|||
):
|
||||
super().__init__(name, profile, goal, constraints, **kwargs)
|
||||
self._init_actions([CollectLinks(name), WebBrowseAndSummarize(name), ConductResearch(name)])
|
||||
self._set_react_mode(react_mode="by_order")
|
||||
self.language = language
|
||||
if language not in ("en-us", "zh-cn"):
|
||||
logger.warning(f"The language `{language}` has not been tested, it may not work.")
|
||||
|
||||
async def _think(self) -> None:
|
||||
if self._rc.todo is None:
|
||||
self._set_state(0)
|
||||
return
|
||||
|
||||
if self._rc.state + 1 < len(self._states):
|
||||
self._set_state(self._rc.state + 1)
|
||||
else:
|
||||
self._rc.todo = None
|
||||
|
||||
async def _act(self) -> Message:
|
||||
logger.info(f"{self._setting}: ready to {self._rc.todo}")
|
||||
todo = self._rc.todo
|
||||
|
|
@ -73,12 +64,8 @@ class Researcher(Role):
|
|||
self._rc.memory.add(ret)
|
||||
return ret
|
||||
|
||||
async def _react(self) -> Message:
|
||||
while True:
|
||||
await self._think()
|
||||
if self._rc.todo is None:
|
||||
break
|
||||
msg = await self._act()
|
||||
async def react(self) -> Message:
|
||||
msg = await super().react()
|
||||
report = msg.instruct_content
|
||||
self.write_report(report.topic, report.content)
|
||||
return msg
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@
|
|||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterable, Type
|
||||
from typing import Iterable, Type, Union
|
||||
from enum import Enum
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
|
@ -27,12 +28,14 @@ Please note that only the text between the first and second "===" is information
|
|||
{history}
|
||||
===
|
||||
|
||||
You can now choose one of the following stages to decide the stage you need to go in the next step:
|
||||
Your previous stage: {previous_state}
|
||||
|
||||
Now choose one of the following stages you need to go to in the next step:
|
||||
{states}
|
||||
|
||||
Just answer a number between 0-{n_states}, choose the most suitable stage according to the understanding of the conversation.
|
||||
Please note that the answer only needs a number, no need to add any other text.
|
||||
If there is no conversation record, choose 0.
|
||||
If you think you have completed your goal and don't need to go to any of the stages, return -1.
|
||||
Do not answer anything else, and do not add any other information in your answer.
|
||||
"""
|
||||
|
||||
|
|
@ -46,6 +49,14 @@ ROLE_TEMPLATE = """Your response should be based on the previous conversation hi
|
|||
{name}: {result}
|
||||
"""
|
||||
|
||||
class RoleReactMode(str, Enum):
|
||||
REACT = "react"
|
||||
BY_ORDER = "by_order"
|
||||
PLAN_AND_ACT = "plan_and_act"
|
||||
|
||||
@classmethod
|
||||
def values(cls):
|
||||
return [item.value for item in cls]
|
||||
|
||||
class RoleSetting(BaseModel):
|
||||
"""Role Settings"""
|
||||
|
|
@ -67,10 +78,12 @@ class RoleContext(BaseModel):
|
|||
env: 'Environment' = Field(default=None)
|
||||
memory: Memory = Field(default_factory=Memory)
|
||||
long_term_memory: LongTermMemory = Field(default_factory=LongTermMemory)
|
||||
state: int = Field(default=0)
|
||||
state: int = Field(default=-1) # -1 indicates initial or termination state where todo is None
|
||||
todo: Action = Field(default=None)
|
||||
watch: set[Type[Action]] = Field(default_factory=set)
|
||||
news: list[Type[Message]] = Field(default=[])
|
||||
react_mode: RoleReactMode = RoleReactMode.REACT # see `Role._set_react_mode` for definitions of the following two attributes
|
||||
max_react_loop: int = 1
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
|
@ -116,17 +129,38 @@ class Role:
|
|||
self._actions.append(i)
|
||||
self._states.append(f"{idx}. {action}")
|
||||
|
||||
def _set_react_mode(self, react_mode: str, max_react_loop: int = 1):
|
||||
"""Set strategy of the Role reacting to observed Message. Variation lies in how
|
||||
this Role elects action to perform during the _think stage, especially if it is capable of multiple Actions.
|
||||
|
||||
Args:
|
||||
react_mode (str): Mode for choosing action during the _think stage, can be one of:
|
||||
"react": standard think-act loop in the ReAct paper, alternating thinking and acting to solve the task, i.e. _think -> _act -> _think -> _act -> ...
|
||||
Use llm to select actions in _think dynamically;
|
||||
"by_order": switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ...;
|
||||
"plan_and_act": first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ...
|
||||
Use llm to come up with the plan dynamically.
|
||||
Defaults to "react".
|
||||
max_react_loop (int): Maximum react cycles to execute, used to prevent the agent from reacting forever.
|
||||
Take effect only when react_mode is react, in which we use llm to choose actions, including termination.
|
||||
Defaults to 1, i.e. _think -> _act (-> return result and end)
|
||||
"""
|
||||
assert react_mode in RoleReactMode.values(), f"react_mode must be one of {RoleReactMode.values()}"
|
||||
self._rc.react_mode = react_mode
|
||||
if react_mode == RoleReactMode.REACT:
|
||||
self._rc.max_react_loop = max_react_loop
|
||||
|
||||
def _watch(self, actions: Iterable[Type[Action]]):
|
||||
"""Listen to the corresponding behaviors"""
|
||||
self._rc.watch.update(actions)
|
||||
# check RoleContext after adding watch actions
|
||||
self._rc.check(self._role_id)
|
||||
|
||||
def _set_state(self, state):
|
||||
def _set_state(self, state: int):
|
||||
"""Update the current state."""
|
||||
self._rc.state = state
|
||||
logger.debug(self._actions)
|
||||
self._rc.todo = self._actions[self._rc.state]
|
||||
self._rc.todo = self._actions[self._rc.state] if state >= 0 else None
|
||||
|
||||
def set_env(self, env: 'Environment'):
|
||||
"""Set the environment in which the role works. The role can talk to the environment and can also receive messages by observing."""
|
||||
|
|
@ -151,13 +185,19 @@ class Role:
|
|||
return
|
||||
prompt = self._get_prefix()
|
||||
prompt += STATE_TEMPLATE.format(history=self._rc.history, states="\n".join(self._states),
|
||||
n_states=len(self._states) - 1)
|
||||
n_states=len(self._states) - 1, previous_state=self._rc.state)
|
||||
# print(prompt)
|
||||
next_state = await self._llm.aask(prompt)
|
||||
logger.debug(f"{prompt=}")
|
||||
if not next_state.isdigit() or int(next_state) not in range(len(self._states)):
|
||||
logger.warning(f'Invalid answer of state, {next_state=}')
|
||||
next_state = "0"
|
||||
self._set_state(int(next_state))
|
||||
if (not next_state.isdigit() and next_state != "-1") \
|
||||
or int(next_state) not in range(-1, len(self._states)):
|
||||
logger.warning(f'Invalid answer of state, {next_state=}, will be set to -1')
|
||||
next_state = -1
|
||||
else:
|
||||
next_state = int(next_state)
|
||||
if next_state == -1:
|
||||
logger.info(f"End actions with {next_state=}")
|
||||
self._set_state(next_state)
|
||||
|
||||
async def _act(self) -> Message:
|
||||
# prompt = self.get_prefix()
|
||||
|
|
@ -203,10 +243,45 @@ class Role:
|
|||
self._rc.env.publish_message(msg)
|
||||
|
||||
async def _react(self) -> Message:
|
||||
"""Think first, then act"""
|
||||
await self._think()
|
||||
logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
|
||||
return await self._act()
|
||||
"""Think first, then act, until the Role _think it is time to stop and requires no more todo.
|
||||
This is the standard think-act loop in the ReAct paper, which alternates thinking and acting in task solving, i.e. _think -> _act -> _think -> _act -> ...
|
||||
Use llm to select actions in _think dynamically
|
||||
"""
|
||||
actions_taken = 0
|
||||
rsp = Message("No actions taken yet") # will be overwritten after Role _act
|
||||
while actions_taken < self._rc.max_react_loop:
|
||||
# think
|
||||
await self._think()
|
||||
if self._rc.todo is None:
|
||||
break
|
||||
# act
|
||||
logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
|
||||
rsp = await self._act()
|
||||
actions_taken += 1
|
||||
return rsp # return output from the last action
|
||||
|
||||
async def _act_by_order(self) -> Message:
|
||||
"""switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ..."""
|
||||
for i in range(len(self._states)):
|
||||
self._set_state(i)
|
||||
rsp = await self._act()
|
||||
return rsp # return output from the last action
|
||||
|
||||
async def _plan_and_act(self) -> Message:
|
||||
"""first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ... Use llm to come up with the plan dynamically."""
|
||||
# TODO: to be implemented
|
||||
return Message("")
|
||||
|
||||
async def react(self) -> Message:
|
||||
"""Entry to one of three strategies by which Role reacts to the observed Message"""
|
||||
if self._rc.react_mode == RoleReactMode.REACT:
|
||||
rsp = await self._react()
|
||||
elif self._rc.react_mode == RoleReactMode.BY_ORDER:
|
||||
rsp = await self._act_by_order()
|
||||
elif self._rc.react_mode == RoleReactMode.PLAN_AND_ACT:
|
||||
rsp = await self._plan_and_act()
|
||||
self._set_state(state=-1) # current reaction is complete, reset state to -1 and todo back to None
|
||||
return rsp
|
||||
|
||||
def recv(self, message: Message) -> None:
|
||||
"""add message to history."""
|
||||
|
|
@ -223,6 +298,10 @@ class Role:
|
|||
|
||||
return await self._react()
|
||||
|
||||
def get_memories(self, k=0) -> list[Message]:
|
||||
"""A wrapper to return the most recent k memories of this role, return all when k=0"""
|
||||
return self._rc.memory.get(k=k)
|
||||
|
||||
async def run(self, message=None):
|
||||
"""Observe, and think and act based on the results of the observation"""
|
||||
if message:
|
||||
|
|
@ -237,7 +316,7 @@ class Role:
|
|||
logger.debug(f"{self._setting}: no news. waiting.")
|
||||
return
|
||||
|
||||
rsp = await self._react()
|
||||
rsp = await self.react()
|
||||
# Publish the reply to the environment, waiting for the next subscriber to process
|
||||
self._publish_message(rsp)
|
||||
return rsp
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue