Merge pull request #479 from garylin2099/general_react

redefine react and provide multiple react modes
This commit is contained in:
garylin2099 2023-11-08 19:04:57 +08:00 committed by GitHub
commit f404abdeb0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 106 additions and 88 deletions

View file

@ -75,10 +75,9 @@ class SimpleCoder(Role):
logger.info(f"{self._setting}: ready to {self._rc.todo}")
todo = self._rc.todo
msg = self._rc.memory.get()[-1] # retrieve the latest memory
instruction = msg.content
msg = self.get_memories(k=1)[0] # find the most recent messages
code_text = await SimpleWriteCode().run(instruction)
code_text = await SimpleWriteCode().run(msg.content)
msg = Message(content=code_text, role=self.profile, cause_by=todo)
return msg
@ -92,43 +91,20 @@ class RunnableCoder(Role):
):
super().__init__(name, profile, **kwargs)
self._init_actions([SimpleWriteCode, SimpleRunCode])
async def _think(self) -> None:
if self._rc.todo is None:
self._set_state(0)
return
if self._rc.state + 1 < len(self._states):
self._set_state(self._rc.state + 1)
else:
self._rc.todo = None
self._set_react_mode(react_mode="by_order")
async def _act(self) -> Message:
logger.info(f"{self._setting}: ready to {self._rc.todo}")
todo = self._rc.todo
msg = self._rc.memory.get()[-1]
if isinstance(todo, SimpleWriteCode):
instruction = msg.content
result = await SimpleWriteCode().run(instruction)
msg = self.get_memories(k=1)[0] # find the most k recent messages
result = await todo.run(msg.content)
elif isinstance(todo, SimpleRunCode):
code_text = msg.content
result = await SimpleRunCode().run(code_text)
msg = Message(content=result, role=self.profile, cause_by=todo)
msg = Message(content=result, role=self.profile, cause_by=type(todo))
self._rc.memory.add(msg)
return msg
async def _react(self) -> Message:
while True:
await self._think()
if self._rc.todo is None:
break
await self._act()
return Message(content="All job done", role=self.profile)
def main(msg="write a function that calculates the sum of a list"):
def main(msg="write a function that calculates the product of a list and run it"):
# role = SimpleCoder()
role = RunnableCoder()
logger.info(msg)

View file

@ -42,17 +42,7 @@ class InvoiceOCRAssistant(Role):
self.filename = ""
self.origin_query = ""
self.orc_data = None
async def _think(self) -> None:
"""Determine the next action to be taken by the role."""
if self._rc.todo is None:
self._set_state(0)
return
if self._rc.state + 1 < len(self._states):
self._set_state(self._rc.state + 1)
else:
self._rc.todo = None
self._set_react_mode(react_mode="by_order")
async def _act(self) -> Message:
"""Perform an action as determined by the role.
@ -94,17 +84,3 @@ class InvoiceOCRAssistant(Role):
msg = Message(content=content, instruct_content=resp)
self._rc.memory.add(msg)
return msg
async def _react(self) -> Message:
"""Execute the invoice ocr assistant's think and actions.
Returns:
A message containing the final result of the assistant's actions.
"""
while True:
await self._think()
if self._rc.todo is None:
break
msg = await self._act()
return msg

View file

@ -31,20 +31,11 @@ class Researcher(Role):
):
super().__init__(name, profile, goal, constraints, **kwargs)
self._init_actions([CollectLinks(name), WebBrowseAndSummarize(name), ConductResearch(name)])
self._set_react_mode(react_mode="by_order")
self.language = language
if language not in ("en-us", "zh-cn"):
logger.warning(f"The language `{language}` has not been tested, it may not work.")
async def _think(self) -> None:
if self._rc.todo is None:
self._set_state(0)
return
if self._rc.state + 1 < len(self._states):
self._set_state(self._rc.state + 1)
else:
self._rc.todo = None
async def _act(self) -> Message:
logger.info(f"{self._setting}: ready to {self._rc.todo}")
todo = self._rc.todo
@ -73,12 +64,8 @@ class Researcher(Role):
self._rc.memory.add(ret)
return ret
async def _react(self) -> Message:
while True:
await self._think()
if self._rc.todo is None:
break
msg = await self._act()
async def react(self) -> Message:
msg = await super().react()
report = msg.instruct_content
self.write_report(report.topic, report.content)
return msg

View file

@ -7,7 +7,8 @@
"""
from __future__ import annotations
from typing import Iterable, Type
from typing import Iterable, Type, Union
from enum import Enum
from pydantic import BaseModel, Field
@ -27,12 +28,14 @@ Please note that only the text between the first and second "===" is information
{history}
===
You can now choose one of the following stages to decide the stage you need to go in the next step:
Your previous stage: {previous_state}
Now choose one of the following stages you need to go to in the next step:
{states}
Just answer a number between 0-{n_states}, choose the most suitable stage according to the understanding of the conversation.
Please note that the answer only needs a number, no need to add any other text.
If there is no conversation record, choose 0.
If you think you have completed your goal and don't need to go to any of the stages, return -1.
Do not answer anything else, and do not add any other information in your answer.
"""
@ -46,6 +49,14 @@ ROLE_TEMPLATE = """Your response should be based on the previous conversation hi
{name}: {result}
"""
class RoleReactMode(str, Enum):
REACT = "react"
BY_ORDER = "by_order"
PLAN_AND_ACT = "plan_and_act"
@classmethod
def values(cls):
return [item.value for item in cls]
class RoleSetting(BaseModel):
"""Role Settings"""
@ -67,10 +78,12 @@ class RoleContext(BaseModel):
env: 'Environment' = Field(default=None)
memory: Memory = Field(default_factory=Memory)
long_term_memory: LongTermMemory = Field(default_factory=LongTermMemory)
state: int = Field(default=0)
state: int = Field(default=-1) # -1 indicates initial or termination state where todo is None
todo: Action = Field(default=None)
watch: set[Type[Action]] = Field(default_factory=set)
news: list[Type[Message]] = Field(default=[])
react_mode: RoleReactMode = RoleReactMode.REACT # see `Role._set_react_mode` for definitions of the following two attributes
max_react_loop: int = 1
class Config:
arbitrary_types_allowed = True
@ -116,17 +129,38 @@ class Role:
self._actions.append(i)
self._states.append(f"{idx}. {action}")
def _set_react_mode(self, react_mode: str, max_react_loop: int = 1):
"""Set strategy of the Role reacting to observed Message. Variation lies in how
this Role elects action to perform during the _think stage, especially if it is capable of multiple Actions.
Args:
react_mode (str): Mode for choosing action during the _think stage, can be one of:
"react": standard think-act loop in the ReAct paper, alternating thinking and acting to solve the task, i.e. _think -> _act -> _think -> _act -> ...
Use llm to select actions in _think dynamically;
"by_order": switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ...;
"plan_and_act": first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ...
Use llm to come up with the plan dynamically.
Defaults to "react".
max_react_loop (int): Maximum react cycles to execute, used to prevent the agent from reacting forever.
Take effect only when react_mode is react, in which we use llm to choose actions, including termination.
Defaults to 1, i.e. _think -> _act (-> return result and end)
"""
assert react_mode in RoleReactMode.values(), f"react_mode must be one of {RoleReactMode.values()}"
self._rc.react_mode = react_mode
if react_mode == RoleReactMode.REACT:
self._rc.max_react_loop = max_react_loop
def _watch(self, actions: Iterable[Type[Action]]):
"""Listen to the corresponding behaviors"""
self._rc.watch.update(actions)
# check RoleContext after adding watch actions
self._rc.check(self._role_id)
def _set_state(self, state):
def _set_state(self, state: int):
"""Update the current state."""
self._rc.state = state
logger.debug(self._actions)
self._rc.todo = self._actions[self._rc.state]
self._rc.todo = self._actions[self._rc.state] if state >= 0 else None
def set_env(self, env: 'Environment'):
"""Set the environment in which the role works. The role can talk to the environment and can also receive messages by observing."""
@ -151,13 +185,19 @@ class Role:
return
prompt = self._get_prefix()
prompt += STATE_TEMPLATE.format(history=self._rc.history, states="\n".join(self._states),
n_states=len(self._states) - 1)
n_states=len(self._states) - 1, previous_state=self._rc.state)
# print(prompt)
next_state = await self._llm.aask(prompt)
logger.debug(f"{prompt=}")
if not next_state.isdigit() or int(next_state) not in range(len(self._states)):
logger.warning(f'Invalid answer of state, {next_state=}')
next_state = "0"
self._set_state(int(next_state))
if (not next_state.isdigit() and next_state != "-1") \
or int(next_state) not in range(-1, len(self._states)):
logger.warning(f'Invalid answer of state, {next_state=}, will be set to -1')
next_state = -1
else:
next_state = int(next_state)
if next_state == -1:
logger.info(f"End actions with {next_state=}")
self._set_state(next_state)
async def _act(self) -> Message:
# prompt = self.get_prefix()
@ -203,10 +243,45 @@ class Role:
self._rc.env.publish_message(msg)
async def _react(self) -> Message:
"""Think first, then act"""
await self._think()
logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
return await self._act()
"""Think first, then act, until the Role _think it is time to stop and requires no more todo.
This is the standard think-act loop in the ReAct paper, which alternates thinking and acting in task solving, i.e. _think -> _act -> _think -> _act -> ...
Use llm to select actions in _think dynamically
"""
actions_taken = 0
rsp = Message("No actions taken yet") # will be overwritten after Role _act
while actions_taken < self._rc.max_react_loop:
# think
await self._think()
if self._rc.todo is None:
break
# act
logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
rsp = await self._act()
actions_taken += 1
return rsp # return output from the last action
async def _act_by_order(self) -> Message:
"""switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ..."""
for i in range(len(self._states)):
self._set_state(i)
rsp = await self._act()
return rsp # return output from the last action
async def _plan_and_act(self) -> Message:
"""first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ... Use llm to come up with the plan dynamically."""
# TODO: to be implemented
return Message("")
async def react(self) -> Message:
"""Entry to one of three strategies by which Role reacts to the observed Message"""
if self._rc.react_mode == RoleReactMode.REACT:
rsp = await self._react()
elif self._rc.react_mode == RoleReactMode.BY_ORDER:
rsp = await self._act_by_order()
elif self._rc.react_mode == RoleReactMode.PLAN_AND_ACT:
rsp = await self._plan_and_act()
self._set_state(state=-1) # current reaction is complete, reset state to -1 and todo back to None
return rsp
def recv(self, message: Message) -> None:
"""add message to history."""
@ -223,6 +298,10 @@ class Role:
return await self._react()
def get_memories(self, k=0) -> list[Message]:
"""A wrapper to return the most recent k memories of this role, return all when k=0"""
return self._rc.memory.get(k=k)
async def run(self, message=None):
"""Observe, and think and act based on the results of the observation"""
if message:
@ -237,7 +316,7 @@ class Role:
logger.debug(f"{self._setting}: no news. waiting.")
return
rsp = await self._react()
rsp = await self.react()
# Publish the reply to the environment, waiting for the next subscriber to process
self._publish_message(rsp)
return rsp