update locally

This commit is contained in:
stellahsr 2023-10-06 16:16:09 +08:00
parent eb9ea304a5
commit dc0fd134fb
4 changed files with 180 additions and 42 deletions

View file

@ -51,6 +51,7 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True):
chest_observation: str = Field(default="") # eg: "Chests: None\n\n"
mf_instance: MineflayerEnv = Field(default_factory=MineflayerEnv)
runtime_status: bool = False # equal to action execution status: success or failed
@property
def progress(self):
@ -200,6 +201,10 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True):
chatlog.add(item)
return "I also need " + ", ".join(chatlog) + "." if chatlog else ""
def reset_block_info(self):
# revert all the placing event in the last step
pass
def update_exploration_progress(self, success: bool):
"""
Split task into completed_tasks or failed_tasks
@ -209,6 +214,9 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True):
"conversations": self.conversations,
}
"""
# update runtime status in game memory
self.runtime_status = success
task = self.current_task
if task.startswith("Deposit useless items into the chest at"):
return

View file

@ -2,6 +2,8 @@
# @Date : 2023/9/23 12:45
# @Author : stellahong (stellahong@fuzhi.ai)
# @Desc :
import copy
from metagpt.logs import logger
from metagpt.roles.minecraft.minecraft_base import Minecraft as Base
from metagpt.schema import Message, HumanMessage, SystemMessage
@ -17,6 +19,8 @@ from metagpt.config import CONFIG
from metagpt.actions.minecraft.control_primitives_context import (
load_skills_code_context,
)
from metagpt.utils.minecraft import fix_and_parse_json
from metagpt.roles.minecraft.critic_agent import CriticReviewer
@agent_registry.register("action_developer")
@ -25,28 +29,31 @@ class ActionDeveloper(Base):
iterative prompting mechanism in paper.
generate action code based on environment observation and plan, as well as skills retrieval results
"""
def __init__(
self,
name: str = "Bob",
profile: str = "Generate code for specified tasks",
goal: str = "Produce accurate and efficient code solutions in Python and JavaScript",
constraints: str = "Adhere to coding best practices and style guidelines",
self,
name: str = "Bob",
profile: str = "Generate code for specified tasks",
goal: str = "Produce accurate and efficient code solutions in Python and JavaScript",
constraints: str = "Adhere to coding best practices and style guidelines",
) -> None:
super().__init__(name, profile, goal, constraints)
# Initialize actions specific to the Action role
self._init_actions([GenerateActionCode])
# Set events or actions the ActionAgent should watch or be aware of
# 需要根据events进行自己chest_observation的更新
self._watch([RetrieveSkills])
self.rollout_num_iter = 0
self.task_max_retries = 4
self.critic_reviewer = CriticReviewer()
def render_system_message(self, skills=[], *args, **kwargs):
"""
According to basic skills context files to genenarate js skill codes.
Refer to @ https://github.com/MineDojo/Voyager/blob/main/voyager/agents/action.py
"""
action_template = utils.load_prompt("action_template")
base_skills = [
"exploreUntil",
@ -69,21 +76,21 @@ class ActionDeveloper(Base):
system_action_message = SystemMessage(content=system_action_prompt)
assert isinstance(system_action_message, SystemMessage)
return system_action_message
def render_human_message(
self, events, code="", task="", context="", critique="", *args, **kwargs
self, events, code="", task="", context="", critique="", *args, **kwargs
):
"""
Integrate observation about the environment(especially events), add to HumanMessage.
Refer to @ https://github.com/MineDojo/Voyager/blob/main/voyager/agents/action.py
"""
# Deal with events info
chat_messages = []
error_messages = []
# damage_messages = [] # TODO: try to add damage_messages into prompt later
assert events[-1][0] == "observe", "Last event must be observe"
for i, (event_type, event) in enumerate(events):
if event_type == "onChat":
chat_messages.append(event["onChat"])
@ -101,30 +108,30 @@ class ActionDeveloper(Base):
inventory_used = event["status"]["inventoryUsed"]
inventory = event["inventory"]
assert i == len(events) - 1, "observe must be the last event"
# Collect all the environment information into a str: observation
observation = ""
observation = (
f"Code from the last round:\n{code or 'No code in the first round'}\n\n"
)
if error_messages:
error = "\n".join(error_messages)
observation += f"Execution error:\n{error}\n\n"
else:
observation += f"Execution error: No error\n\n"
if chat_messages:
chat_log = "\n".join(chat_messages)
observation += f"Chat log: {chat_log}\n\n"
else:
observation += f"Chat log: None\n\n"
observation += f"Biome: {biome}\n\n"
observation += f"Time: {time_of_day}\n\n"
observation += f"Nearby blocks: {', '.join(voxels) if voxels else 'None'}\n\n"
if entities:
nearby_entities = [
k for k, v in sorted(entities.items(), key=lambda x: x[1])
@ -132,35 +139,35 @@ class ActionDeveloper(Base):
observation += f"Nearby entities (nearest to farthest): {', '.join(nearby_entities)}\n\n"
else:
observation += f"Nearby entities (nearest to farthest): None\n\n"
observation += f"Health: {health:.1f}/20\n\n"
observation += f"Hunger: {hunger:.1f}/20\n\n"
observation += f"Position: x={position['x']:.1f}, y={position['y']:.1f}, z={position['z']:.1f}\n\n"
observation += f"Equipment: {equipment}\n\n"
observation += f"Inventory ({inventory_used}/36): {'Empty' if not inventory else ', '.join(inventory)}\n\n"
if not (
task == "Place and deposit useless items into a chest"
or task.startswith("Deposit useless items into the chest at")
task == "Place and deposit useless items into a chest"
or task.startswith("Deposit useless items into the chest at")
):
observation += self.game_memory.chest_observation
observation += f"Task: {task}\n\n"
observation += f"Context: {context or 'None'}\n\n"
observation += f"Critique: {critique or 'None'}\n\n"
return HumanMessage(content=observation)
def encapsule_message(
self,
events,
code="",
task="",
context="",
critique="",
skills=[],
*args,
**kwargs,
self,
events,
code="",
task="",
context="",
critique="",
skills=[],
*args,
**kwargs,
):
system_message = self.render_system_message(skills=skills)
human_message = self.render_human_message(
@ -170,7 +177,7 @@ class ActionDeveloper(Base):
"system_msg": [system_message.content],
"human_msg": human_message.content,
}
async def _observe(self) -> int:
await super()._observe()
for msg in self._rc.news:
@ -180,7 +187,109 @@ class ActionDeveloper(Base):
] # only relevant msgs count as observed news
logger.info(len(self._rc.news))
return len(self._rc.news)
async def run_step(self, human_msg, system_msg, *args, **kwargs):
while True:
messages, reward, done, info = self.runcode_and_evaluate(human_msg, system_msg, *args, **kwargs)
if done:
break
return messages, reward, done, info
async def handle_add_new_skills(
self, task, program_name, program_code, skills, *args, **kwargs
):
skill_desp = self.game_memory.skill_desp
new_skills_info = await AddNewSkills().run(
task, program_name, program_code, skills, skill_desp
)
# update skills in game memory
self.perform_game_info_callback(new_skills_info, self.game_memory.append_skill)
async def retrieve_skills(self, query, skills, *args, **kwargs):
retrieve_skills = await RetrieveSkills().run(query, skills)
logger.info(f"Render Action Agent system message with {len(retrieve_skills)} skills")
self.perform_game_info_callback(retrieve_skills, self.game_memory.update_retrieve_skills)
# return Message(content=f"{retrieve_skills}", instruct_content="retrieve_skills",
# role=self.profile, send_to=agent_registry.entries["action_developer"]()._setting.name)
async def runcode_and_evaluate(self, human_msg, system_msg, *args, **kwargs):
"""
equal to step() in voyager
"""
task = self.game_memory.current_task
context = self.game_memory.context
# 更新生成的代码和对应程序名称
code, program_name = await GenerateActionCode().run(
human_msg, system_msg, *args, **kwargs
)
# logger.warning(type(code))
# logger.info(f"Code is Here:{code}")
if code is not None:
# fixme若有独立的mc code执行入口函数使用独立的函数
events = await self._obtain_events()
# 注意这里的events对应是执行了新的action函数之后的events信息
# 更新了评估结果, 回调了最新的环境信息到ga
await self.critic_reviewer._act() # todo: critic act内的update event放在这里似乎更合理
critique = self.game_memory.critique
event_summary = self.game_memory.event_summary
skills = self.game_memory.skills
if not self.game_memory.runtime_status:
# todo: callback game memory reset block info
logger.info("Not success, reset block info !")
logger.info(
f"\033[32m****Action Agent human message****\n{self.messages[-1].content}\033[0m"
)
# add new skills no matter success or not
# add_new_skills_message = {
# "task": task,
# "program_name": program_name,
# "program_code": code,
# "skills": self.game_memory.skills,
# }
new_skill_info = {"query": context + "\n\n" + event_summary, "skills": skills}
# await self.handle_add_new_skills(**add_new_skills_message)
await self.retrieve_skills(**new_skill_info)
retrieve_skills = self.game_memory.retrieve_skills
message = self.encapsule_message(
events=events,
code=code,
task=task,
context=context,
critique=critique,
skills=retrieve_skills,
)
else:
message = {
"system_msg": [system_msg.content],
"human_msg": human_msg.content,
}
logger.info(f"\033[34m Trying again!\033[0m")
self.rollout_num_iter += 1
done = (self.rollout_num_iter >= self.task_max_retries or self.game_memory.runtime_status)
info = {
"task": self.task,
"success": self.game_memory.runtime_status,
"conversations": self.conversations,
}
self.perform_game_info_callback(code, self.game_memory.update_code)
self.perform_game_info_callback(
program_name, self.game_memory.update_program_name
)
return message, 0, done, info
async def generate_action_code(self, human_msg, system_msg, *args, **kwargs):
code, program_name = await GenerateActionCode().run(
human_msg, system_msg, *args, **kwargs
@ -198,11 +307,12 @@ class ActionDeveloper(Base):
)
# logger.info(msg)
return msg
async def _act(self) -> Message:
todo = self._rc.todo
logger.debug(f"Todo is {todo}")
self.maintain_actions(todo)
# 获取最新的游戏周边信息
events = await self._obtain_events()
self.perform_game_info_callback(events, self.game_memory.update_event)
@ -211,7 +321,8 @@ class ActionDeveloper(Base):
code = self.game_memory.code
critique = self.game_memory.critique
retrieve_skills = self.game_memory.retrieve_skills
# 对自己所需的环境信息进行处理
message = self.encapsule_message(
events=events,
code=code,
@ -222,11 +333,11 @@ class ActionDeveloper(Base):
)
logger.info(todo)
handler_map = {
GenerateActionCode: self.generate_action_code,
GenerateActionCode: self.run_step#self.generate_action_code,
}
handler = handler_map.get(type(todo))
logger.info(handler)
if handler:
msg = await handler(**message)
msg.cause_by = type(todo)
@ -234,5 +345,7 @@ class ActionDeveloper(Base):
logger.info(msg.send_to)
self._publish_message(msg)
return msg
raise ValueError(f"Unknown todo type: {type(todo)}")
# await self.run_step()

View file

@ -28,12 +28,28 @@ class CriticReviewer(Base):
) -> None:
super().__init__(name, profile, goal, constraints)
# Initialize actions specific to the CriticReviewer role
# self._init_actions([VerifyTask])
self._init_actions([VerifyTask])
# Set events or actions the CriticReviewer should watch or be aware of
# 需要获取最新的events来进行评估
self._watch([GenerateActionCode, AddNewSkills])
async def run(self, message=None):
"""Observe, only get the observation"""
if message:
if isinstance(message, str):
message = Message(message)
if isinstance(message, Message):
self.recv(message)
if isinstance(message, list):
self.recv(Message("\n".join(message)))
elif not await self._observe():
# If there is no new information, suspend and wait
logger.info(f"{self._setting}: no news. waiting.")
return
self._rc.todo = VerifyTask
def render_system_message(self):
system_message = SystemMessage(content=load_prompt("critic"))
return system_message
@ -119,6 +135,7 @@ class CriticReviewer(Base):
self.perform_game_info_callback(
success, self.game_memory.update_exploration_progress
)
self.perform_game_info_callback(critique, self.game_memory.update_critique)
return Message(
content=f"{critique}",
instruct_content="verify_task",

View file

@ -19,7 +19,7 @@ async def learn(task="Start", investment: float = 50.0, n_round: int = 3):
[
CurriculumDesigner(),
ActionDeveloper(),
CriticReviewer(),
# CriticReviewer(),
SkillManager(),
]