redefine react and provide multiple react modes

This commit is contained in:
garylin2099 2023-11-06 14:37:29 +08:00
parent ccc4c9e04d
commit 250fe83de9
4 changed files with 94 additions and 87 deletions

View file

@ -42,17 +42,7 @@ class InvoiceOCRAssistant(Role):
self.filename = ""
self.origin_query = ""
self.orc_data = None
async def _think(self) -> None:
"""Determine the next action to be taken by the role."""
if self._rc.todo is None:
self._set_state(0)
return
if self._rc.state + 1 < len(self._states):
self._set_state(self._rc.state + 1)
else:
self._rc.todo = None
self._set_react_mode(react_mode="by_order")
async def _act(self) -> Message:
"""Perform an action as determined by the role.
@ -94,17 +84,3 @@ class InvoiceOCRAssistant(Role):
msg = Message(content=content, instruct_content=resp)
self._rc.memory.add(msg)
return msg
async def _react(self) -> Message:
"""Execute the invoice ocr assistant's think and actions.
Returns:
A message containing the final result of the assistant's actions.
"""
while True:
await self._think()
if self._rc.todo is None:
break
msg = await self._act()
return msg

View file

@ -31,20 +31,11 @@ class Researcher(Role):
):
super().__init__(name, profile, goal, constraints, **kwargs)
self._init_actions([CollectLinks(name), WebBrowseAndSummarize(name), ConductResearch(name)])
self._set_react_mode(react_mode="by_order")
self.language = language
if language not in ("en-us", "zh-cn"):
logger.warning(f"The language `{language}` has not been tested, it may not work.")
async def _think(self) -> None:
if self._rc.todo is None:
self._set_state(0)
return
if self._rc.state + 1 < len(self._states):
self._set_state(self._rc.state + 1)
else:
self._rc.todo = None
async def _act(self) -> Message:
logger.info(f"{self._setting}: ready to {self._rc.todo}")
todo = self._rc.todo
@ -73,12 +64,8 @@ class Researcher(Role):
self._rc.memory.add(ret)
return ret
async def _react(self) -> Message:
while True:
await self._think()
if self._rc.todo is None:
break
msg = await self._act()
async def react(self) -> Message:
msg = await super().react()
report = msg.instruct_content
self.write_report(report.topic, report.content)
return msg

View file

@ -7,7 +7,7 @@
"""
from __future__ import annotations
from typing import Iterable, Type
from typing import Iterable, Type, Union
from pydantic import BaseModel, Field
@ -27,12 +27,15 @@ Please note that only the text between the first and second "===" is information
{history}
===
You can now choose one of the following stages to decide the stage you need to go in the next step:
Your previous stage: {previous_state}
Now choose one of the following stages you need to go to in the next step:
{states}
Just answer a number between 0-{n_states}, choose the most suitable stage according to the understanding of the conversation.
Please note that the answer only needs a number, no need to add any other text.
If there is no conversation record, choose 0.
If there is no conversation record or your previous stage is None, choose 0.
If you think you have completed your goal and don't need to go to any of the stages, return -1.
Do not answer anything else, and do not add any other information in your answer.
"""
@ -67,7 +70,7 @@ class RoleContext(BaseModel):
env: 'Environment' = Field(default=None)
memory: Memory = Field(default_factory=Memory)
long_term_memory: LongTermMemory = Field(default_factory=LongTermMemory)
state: int = Field(default=0)
state: int = Field(default=None)
todo: Action = Field(default=None)
watch: set[Type[Action]] = Field(default_factory=set)
news: list[Type[Message]] = Field(default=[])
@ -100,6 +103,9 @@ class Role:
self._actions = []
self._role_id = str(self._setting)
self._rc = RoleContext()
# see `_set_react_mode` function for definitions of the following two attributes
self.react_mode = "react"
self.max_react_loop = 1
def _reset(self):
self._states = []
@ -116,17 +122,37 @@ class Role:
self._actions.append(i)
self._states.append(f"{idx}. {action}")
def _set_react_mode(self, react_mode: str, max_react_loop: int = 1):
"""Set strategy of the Role reacting to observed Message. Variation lies in how
this Role elects action to perform during the _think stage, especially if it is capable of multiple Actions.
Args:
react_mode (str): Mode for choosing action during the _think stage, can be one of
"by_order": switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ...;
"react": standard think-act loop in the ReAct paper, alternating thinking and acting to solve the task, i.e. _think -> _act -> _think -> _act -> ...
Use llm to select actions in _think dynamically;
"plan_and_act": first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ...
Use llm to come up with the plan dynamically.
Defaults to "by_order".
max_react_loop (int): Maximum react cycles to execute, used to prevent the agent from reacting forever.
Take effect only when react_mode is react, in which we use llm to choose actions, including termination.
Defaults to 1, i.e. _think -> _act (-> return result and end)
"""
self.react_mode = react_mode
if react_mode == "react":
self.max_react_loop = max_react_loop
def _watch(self, actions: Iterable[Type[Action]]):
"""Listen to the corresponding behaviors"""
self._rc.watch.update(actions)
# check RoleContext after adding watch actions
self._rc.check(self._role_id)
def _set_state(self, state):
def _set_state(self, state: Union[int, None]):
"""Update the current state."""
self._rc.state = state
logger.debug(self._actions)
self._rc.todo = self._actions[self._rc.state]
self._rc.todo = self._actions[self._rc.state] if state is not None else None
def set_env(self, env: 'Environment'):
"""Set the environment in which the role works. The role can talk to the environment and can also receive messages by observing."""
@ -151,13 +177,19 @@ class Role:
return
prompt = self._get_prefix()
prompt += STATE_TEMPLATE.format(history=self._rc.history, states="\n".join(self._states),
n_states=len(self._states) - 1)
n_states=len(self._states) - 1, previous_state=self._rc.state)
# print(prompt)
next_state = await self._llm.aask(prompt)
logger.debug(f"{prompt=}")
if not next_state.isdigit() or int(next_state) not in range(len(self._states)):
if not next_state.isdigit() or int(next_state) not in range(-1, len(self._states)):
logger.warning(f'Invalid answer of state, {next_state=}')
next_state = "0"
self._set_state(int(next_state))
next_state = None
else:
next_state = int(next_state)
if next_state == -1:
logger.info(f"End actions with {next_state=}")
next_state = None
self._set_state(next_state)
async def _act(self) -> Message:
# prompt = self.get_prefix()
@ -203,10 +235,42 @@ class Role:
self._rc.env.publish_message(msg)
async def _react(self) -> Message:
"""Think first, then act"""
await self._think()
logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
return await self._act()
"""Think first, then act, until the Role _think it is time to stop and requires no more todo.
This is the standard think-act loop in the ReAct paper, which alternates thinking and acting in task solving, i.e. _think -> _act -> _think -> _act -> ...
Use llm to select actions in _think dynamically
"""
actions_taken = 0
while actions_taken < self.max_react_loop:
# think
await self._think()
if self._rc.todo is None:
break
# act
logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
rsp = await self._act()
actions_taken += 1
return rsp # return output from the last action
async def _act_by_order(self) -> Message:
"""switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ..."""
for i in range(len(self._states)):
self._set_state(i)
rsp = await self._act()
return rsp # return output from the last action
async def _plan_and_act(self) -> Message:
"""first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ... Use llm to come up with the plan dynamically."""
# TODO: to be implemented
return Message("")
async def react(self) -> Message:
"""Entry to one of three strategies by which Role reacts to the observed Message"""
if self.react_mode == "react":
return await self._react()
elif self.react_mode == "by_order":
return await self._act_by_order()
elif self.react_mode == "plan_and_act":
return await self._plan_and_act()
def recv(self, message: Message) -> None:
"""add message to history."""
@ -223,6 +287,10 @@ class Role:
return await self._react()
def get_memories(self, k=0) -> list[Message]:
"""A wrapper to return the most recent k memories of this role, return all when k=0"""
return self._rc.memory.get(k=k)
async def run(self, message=None):
"""Observe, and think and act based on the results of the observation"""
if message:
@ -237,7 +305,7 @@ class Role:
logger.debug(f"{self._setting}: no news. waiting.")
return
rsp = await self._react()
rsp = await self.react()
# Publish the reply to the environment, waiting for the next subscriber to process
self._publish_message(rsp)
return rsp