mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-17 15:35:21 +02:00
update locally
This commit is contained in:
parent
eb9ea304a5
commit
dc0fd134fb
4 changed files with 180 additions and 42 deletions
|
|
@ -51,6 +51,7 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True):
|
|||
chest_observation: str = Field(default="") # eg: "Chests: None\n\n"
|
||||
|
||||
mf_instance: MineflayerEnv = Field(default_factory=MineflayerEnv)
|
||||
runtime_status: bool = False # equal to action execution status: success or failed
|
||||
|
||||
@property
|
||||
def progress(self):
|
||||
|
|
@ -200,6 +201,10 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True):
|
|||
chatlog.add(item)
|
||||
return "I also need " + ", ".join(chatlog) + "." if chatlog else ""
|
||||
|
||||
def reset_block_info(self):
|
||||
# revert all the placing event in the last step
|
||||
pass
|
||||
|
||||
def update_exploration_progress(self, success: bool):
|
||||
"""
|
||||
Split task into completed_tasks or failed_tasks
|
||||
|
|
@ -209,6 +214,9 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True):
|
|||
"conversations": self.conversations,
|
||||
}
|
||||
"""
|
||||
# update runtime status in game memory
|
||||
self.runtime_status = success
|
||||
|
||||
task = self.current_task
|
||||
if task.startswith("Deposit useless items into the chest at"):
|
||||
return
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
# @Date : 2023/9/23 12:45
|
||||
# @Author : stellahong (stellahong@fuzhi.ai)
|
||||
# @Desc :
|
||||
import copy
|
||||
|
||||
from metagpt.logs import logger
|
||||
from metagpt.roles.minecraft.minecraft_base import Minecraft as Base
|
||||
from metagpt.schema import Message, HumanMessage, SystemMessage
|
||||
|
|
@ -17,6 +19,8 @@ from metagpt.config import CONFIG
|
|||
from metagpt.actions.minecraft.control_primitives_context import (
|
||||
load_skills_code_context,
|
||||
)
|
||||
from metagpt.utils.minecraft import fix_and_parse_json
|
||||
from metagpt.roles.minecraft.critic_agent import CriticReviewer
|
||||
|
||||
|
||||
@agent_registry.register("action_developer")
|
||||
|
|
@ -25,28 +29,31 @@ class ActionDeveloper(Base):
|
|||
iterative prompting mechanism in paper.
|
||||
generate action code based on environment observation and plan, as well as skills retrieval results
|
||||
"""
|
||||
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str = "Bob",
|
||||
profile: str = "Generate code for specified tasks",
|
||||
goal: str = "Produce accurate and efficient code solutions in Python and JavaScript",
|
||||
constraints: str = "Adhere to coding best practices and style guidelines",
|
||||
self,
|
||||
name: str = "Bob",
|
||||
profile: str = "Generate code for specified tasks",
|
||||
goal: str = "Produce accurate and efficient code solutions in Python and JavaScript",
|
||||
constraints: str = "Adhere to coding best practices and style guidelines",
|
||||
) -> None:
|
||||
super().__init__(name, profile, goal, constraints)
|
||||
# Initialize actions specific to the Action role
|
||||
self._init_actions([GenerateActionCode])
|
||||
|
||||
|
||||
# Set events or actions the ActionAgent should watch or be aware of
|
||||
# 需要根据events进行自己chest_observation的更新
|
||||
self._watch([RetrieveSkills])
|
||||
|
||||
self.rollout_num_iter = 0
|
||||
self.task_max_retries = 4
|
||||
self.critic_reviewer = CriticReviewer()
|
||||
|
||||
def render_system_message(self, skills=[], *args, **kwargs):
|
||||
"""
|
||||
According to basic skills context files to genenarate js skill codes.
|
||||
Refer to @ https://github.com/MineDojo/Voyager/blob/main/voyager/agents/action.py
|
||||
"""
|
||||
|
||||
|
||||
action_template = utils.load_prompt("action_template")
|
||||
base_skills = [
|
||||
"exploreUntil",
|
||||
|
|
@ -69,21 +76,21 @@ class ActionDeveloper(Base):
|
|||
system_action_message = SystemMessage(content=system_action_prompt)
|
||||
assert isinstance(system_action_message, SystemMessage)
|
||||
return system_action_message
|
||||
|
||||
|
||||
def render_human_message(
|
||||
self, events, code="", task="", context="", critique="", *args, **kwargs
|
||||
self, events, code="", task="", context="", critique="", *args, **kwargs
|
||||
):
|
||||
"""
|
||||
Integrate observation about the environment(especially events), add to HumanMessage.
|
||||
Refer to @ https://github.com/MineDojo/Voyager/blob/main/voyager/agents/action.py
|
||||
"""
|
||||
|
||||
|
||||
# Deal with events info
|
||||
chat_messages = []
|
||||
error_messages = []
|
||||
# damage_messages = [] # TODO: try to add damage_messages into prompt later
|
||||
assert events[-1][0] == "observe", "Last event must be observe"
|
||||
|
||||
|
||||
for i, (event_type, event) in enumerate(events):
|
||||
if event_type == "onChat":
|
||||
chat_messages.append(event["onChat"])
|
||||
|
|
@ -101,30 +108,30 @@ class ActionDeveloper(Base):
|
|||
inventory_used = event["status"]["inventoryUsed"]
|
||||
inventory = event["inventory"]
|
||||
assert i == len(events) - 1, "observe must be the last event"
|
||||
|
||||
|
||||
# Collect all the environment information into a str: observation
|
||||
observation = ""
|
||||
|
||||
|
||||
observation = (
|
||||
f"Code from the last round:\n{code or 'No code in the first round'}\n\n"
|
||||
)
|
||||
|
||||
|
||||
if error_messages:
|
||||
error = "\n".join(error_messages)
|
||||
observation += f"Execution error:\n{error}\n\n"
|
||||
else:
|
||||
observation += f"Execution error: No error\n\n"
|
||||
|
||||
|
||||
if chat_messages:
|
||||
chat_log = "\n".join(chat_messages)
|
||||
observation += f"Chat log: {chat_log}\n\n"
|
||||
else:
|
||||
observation += f"Chat log: None\n\n"
|
||||
|
||||
|
||||
observation += f"Biome: {biome}\n\n"
|
||||
observation += f"Time: {time_of_day}\n\n"
|
||||
observation += f"Nearby blocks: {', '.join(voxels) if voxels else 'None'}\n\n"
|
||||
|
||||
|
||||
if entities:
|
||||
nearby_entities = [
|
||||
k for k, v in sorted(entities.items(), key=lambda x: x[1])
|
||||
|
|
@ -132,35 +139,35 @@ class ActionDeveloper(Base):
|
|||
observation += f"Nearby entities (nearest to farthest): {', '.join(nearby_entities)}\n\n"
|
||||
else:
|
||||
observation += f"Nearby entities (nearest to farthest): None\n\n"
|
||||
|
||||
|
||||
observation += f"Health: {health:.1f}/20\n\n"
|
||||
observation += f"Hunger: {hunger:.1f}/20\n\n"
|
||||
observation += f"Position: x={position['x']:.1f}, y={position['y']:.1f}, z={position['z']:.1f}\n\n"
|
||||
observation += f"Equipment: {equipment}\n\n"
|
||||
observation += f"Inventory ({inventory_used}/36): {'Empty' if not inventory else ', '.join(inventory)}\n\n"
|
||||
|
||||
|
||||
if not (
|
||||
task == "Place and deposit useless items into a chest"
|
||||
or task.startswith("Deposit useless items into the chest at")
|
||||
task == "Place and deposit useless items into a chest"
|
||||
or task.startswith("Deposit useless items into the chest at")
|
||||
):
|
||||
observation += self.game_memory.chest_observation
|
||||
|
||||
|
||||
observation += f"Task: {task}\n\n"
|
||||
observation += f"Context: {context or 'None'}\n\n"
|
||||
observation += f"Critique: {critique or 'None'}\n\n"
|
||||
|
||||
|
||||
return HumanMessage(content=observation)
|
||||
|
||||
|
||||
def encapsule_message(
|
||||
self,
|
||||
events,
|
||||
code="",
|
||||
task="",
|
||||
context="",
|
||||
critique="",
|
||||
skills=[],
|
||||
*args,
|
||||
**kwargs,
|
||||
self,
|
||||
events,
|
||||
code="",
|
||||
task="",
|
||||
context="",
|
||||
critique="",
|
||||
skills=[],
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
system_message = self.render_system_message(skills=skills)
|
||||
human_message = self.render_human_message(
|
||||
|
|
@ -170,7 +177,7 @@ class ActionDeveloper(Base):
|
|||
"system_msg": [system_message.content],
|
||||
"human_msg": human_message.content,
|
||||
}
|
||||
|
||||
|
||||
async def _observe(self) -> int:
|
||||
await super()._observe()
|
||||
for msg in self._rc.news:
|
||||
|
|
@ -180,7 +187,109 @@ class ActionDeveloper(Base):
|
|||
] # only relevant msgs count as observed news
|
||||
logger.info(len(self._rc.news))
|
||||
return len(self._rc.news)
|
||||
|
||||
async def run_step(self, human_msg, system_msg, *args, **kwargs):
|
||||
while True:
|
||||
messages, reward, done, info = self.runcode_and_evaluate(human_msg, system_msg, *args, **kwargs)
|
||||
if done:
|
||||
break
|
||||
return messages, reward, done, info
|
||||
|
||||
async def handle_add_new_skills(
|
||||
self, task, program_name, program_code, skills, *args, **kwargs
|
||||
):
|
||||
skill_desp = self.game_memory.skill_desp
|
||||
new_skills_info = await AddNewSkills().run(
|
||||
task, program_name, program_code, skills, skill_desp
|
||||
)
|
||||
# update skills in game memory
|
||||
self.perform_game_info_callback(new_skills_info, self.game_memory.append_skill)
|
||||
|
||||
async def retrieve_skills(self, query, skills, *args, **kwargs):
|
||||
retrieve_skills = await RetrieveSkills().run(query, skills)
|
||||
logger.info(f"Render Action Agent system message with {len(retrieve_skills)} skills")
|
||||
self.perform_game_info_callback(retrieve_skills, self.game_memory.update_retrieve_skills)
|
||||
# return Message(content=f"{retrieve_skills}", instruct_content="retrieve_skills",
|
||||
# role=self.profile, send_to=agent_registry.entries["action_developer"]()._setting.name)
|
||||
|
||||
async def runcode_and_evaluate(self, human_msg, system_msg, *args, **kwargs):
|
||||
"""
|
||||
equal to step() in voyager
|
||||
|
||||
"""
|
||||
task = self.game_memory.current_task
|
||||
context = self.game_memory.context
|
||||
|
||||
# 更新生成的代码和对应程序名称
|
||||
code, program_name = await GenerateActionCode().run(
|
||||
human_msg, system_msg, *args, **kwargs
|
||||
)
|
||||
# logger.warning(type(code))
|
||||
# logger.info(f"Code is Here:{code}")
|
||||
|
||||
if code is not None:
|
||||
# fixme:若有独立的mc code执行入口函数,使用独立的函数
|
||||
events = await self._obtain_events()
|
||||
# 注意:这里的events对应是执行了新的action函数之后的events信息
|
||||
# 更新了评估结果, 回调了最新的环境信息到ga
|
||||
await self.critic_reviewer._act() # todo: critic act内的update event放在这里似乎更合理?
|
||||
|
||||
critique = self.game_memory.critique
|
||||
event_summary = self.game_memory.event_summary
|
||||
skills = self.game_memory.skills
|
||||
|
||||
if not self.game_memory.runtime_status:
|
||||
# todo: callback game memory reset block info
|
||||
logger.info("Not success, reset block info !")
|
||||
logger.info(
|
||||
f"\033[32m****Action Agent human message****\n{self.messages[-1].content}\033[0m"
|
||||
)
|
||||
|
||||
# add new skills no matter success or not
|
||||
# add_new_skills_message = {
|
||||
# "task": task,
|
||||
# "program_name": program_name,
|
||||
# "program_code": code,
|
||||
# "skills": self.game_memory.skills,
|
||||
# }
|
||||
new_skill_info = {"query": context + "\n\n" + event_summary, "skills": skills}
|
||||
|
||||
# await self.handle_add_new_skills(**add_new_skills_message)
|
||||
await self.retrieve_skills(**new_skill_info)
|
||||
retrieve_skills = self.game_memory.retrieve_skills
|
||||
|
||||
message = self.encapsule_message(
|
||||
events=events,
|
||||
code=code,
|
||||
task=task,
|
||||
context=context,
|
||||
critique=critique,
|
||||
skills=retrieve_skills,
|
||||
)
|
||||
|
||||
|
||||
else:
|
||||
message = {
|
||||
"system_msg": [system_msg.content],
|
||||
"human_msg": human_msg.content,
|
||||
}
|
||||
logger.info(f"\033[34m Trying again!\033[0m")
|
||||
|
||||
self.rollout_num_iter += 1
|
||||
done = (self.rollout_num_iter >= self.task_max_retries or self.game_memory.runtime_status)
|
||||
info = {
|
||||
"task": self.task,
|
||||
"success": self.game_memory.runtime_status,
|
||||
"conversations": self.conversations,
|
||||
}
|
||||
|
||||
self.perform_game_info_callback(code, self.game_memory.update_code)
|
||||
self.perform_game_info_callback(
|
||||
program_name, self.game_memory.update_program_name
|
||||
)
|
||||
|
||||
return message, 0, done, info
|
||||
|
||||
async def generate_action_code(self, human_msg, system_msg, *args, **kwargs):
|
||||
code, program_name = await GenerateActionCode().run(
|
||||
human_msg, system_msg, *args, **kwargs
|
||||
|
|
@ -198,11 +307,12 @@ class ActionDeveloper(Base):
|
|||
)
|
||||
# logger.info(msg)
|
||||
return msg
|
||||
|
||||
|
||||
async def _act(self) -> Message:
|
||||
todo = self._rc.todo
|
||||
logger.debug(f"Todo is {todo}")
|
||||
self.maintain_actions(todo)
|
||||
|
||||
# 获取最新的游戏周边信息
|
||||
events = await self._obtain_events()
|
||||
self.perform_game_info_callback(events, self.game_memory.update_event)
|
||||
|
|
@ -211,7 +321,8 @@ class ActionDeveloper(Base):
|
|||
code = self.game_memory.code
|
||||
critique = self.game_memory.critique
|
||||
retrieve_skills = self.game_memory.retrieve_skills
|
||||
|
||||
|
||||
# 对自己所需的环境信息进行处理
|
||||
message = self.encapsule_message(
|
||||
events=events,
|
||||
code=code,
|
||||
|
|
@ -222,11 +333,11 @@ class ActionDeveloper(Base):
|
|||
)
|
||||
logger.info(todo)
|
||||
handler_map = {
|
||||
GenerateActionCode: self.generate_action_code,
|
||||
GenerateActionCode: self.run_step#self.generate_action_code,
|
||||
}
|
||||
handler = handler_map.get(type(todo))
|
||||
logger.info(handler)
|
||||
|
||||
|
||||
if handler:
|
||||
msg = await handler(**message)
|
||||
msg.cause_by = type(todo)
|
||||
|
|
@ -234,5 +345,7 @@ class ActionDeveloper(Base):
|
|||
logger.info(msg.send_to)
|
||||
self._publish_message(msg)
|
||||
return msg
|
||||
|
||||
|
||||
raise ValueError(f"Unknown todo type: {type(todo)}")
|
||||
|
||||
# await self.run_step()
|
||||
|
|
@ -28,12 +28,28 @@ class CriticReviewer(Base):
|
|||
) -> None:
|
||||
super().__init__(name, profile, goal, constraints)
|
||||
# Initialize actions specific to the CriticReviewer role
|
||||
# self._init_actions([VerifyTask])
|
||||
self._init_actions([VerifyTask])
|
||||
|
||||
# Set events or actions the CriticReviewer should watch or be aware of
|
||||
# 需要获取最新的events来进行评估
|
||||
self._watch([GenerateActionCode, AddNewSkills])
|
||||
|
||||
async def run(self, message=None):
|
||||
"""Observe, only get the observation"""
|
||||
if message:
|
||||
if isinstance(message, str):
|
||||
message = Message(message)
|
||||
if isinstance(message, Message):
|
||||
self.recv(message)
|
||||
if isinstance(message, list):
|
||||
self.recv(Message("\n".join(message)))
|
||||
elif not await self._observe():
|
||||
# If there is no new information, suspend and wait
|
||||
logger.info(f"{self._setting}: no news. waiting.")
|
||||
return
|
||||
self._rc.todo = VerifyTask
|
||||
|
||||
def render_system_message(self):
|
||||
system_message = SystemMessage(content=load_prompt("critic"))
|
||||
return system_message
|
||||
|
|
@ -119,6 +135,7 @@ class CriticReviewer(Base):
|
|||
self.perform_game_info_callback(
|
||||
success, self.game_memory.update_exploration_progress
|
||||
)
|
||||
self.perform_game_info_callback(critique, self.game_memory.update_critique)
|
||||
return Message(
|
||||
content=f"{critique}",
|
||||
instruct_content="verify_task",
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ async def learn(task="Start", investment: float = 50.0, n_round: int = 3):
|
|||
[
|
||||
CurriculumDesigner(),
|
||||
ActionDeveloper(),
|
||||
CriticReviewer(),
|
||||
# CriticReviewer(),
|
||||
SkillManager(),
|
||||
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue