From dc0fd134fb3c5107a841b3c30a0e8f55930cb640 Mon Sep 17 00:00:00 2001 From: stellahsr Date: Fri, 6 Oct 2023 16:16:09 +0800 Subject: [PATCH] update locally --- metagpt/minecraft_team.py | 8 + metagpt/roles/minecraft/action_developer.py | 195 ++++++++++++++++---- metagpt/roles/minecraft/critic_agent.py | 17 ++ minecraft_run.py | 2 +- 4 files changed, 180 insertions(+), 42 deletions(-) diff --git a/metagpt/minecraft_team.py b/metagpt/minecraft_team.py index 5ead788ce..c7fe6f685 100644 --- a/metagpt/minecraft_team.py +++ b/metagpt/minecraft_team.py @@ -51,6 +51,7 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): chest_observation: str = Field(default="") # eg: "Chests: None\n\n" mf_instance: MineflayerEnv = Field(default_factory=MineflayerEnv) + runtime_status: bool = False # equal to action execution status: success or failed @property def progress(self): @@ -200,6 +201,10 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): chatlog.add(item) return "I also need " + ", ".join(chatlog) + "." if chatlog else "" + def reset_block_info(self): + # revert all the placing event in the last step + pass + def update_exploration_progress(self, success: bool): """ Split task into completed_tasks or failed_tasks @@ -209,6 +214,9 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): "conversations": self.conversations, } """ + # update runtime status in game memory + self.runtime_status = success + task = self.current_task if task.startswith("Deposit useless items into the chest at"): return diff --git a/metagpt/roles/minecraft/action_developer.py b/metagpt/roles/minecraft/action_developer.py index 9171e455b..9791d5b88 100644 --- a/metagpt/roles/minecraft/action_developer.py +++ b/metagpt/roles/minecraft/action_developer.py @@ -2,6 +2,8 @@ # @Date : 2023/9/23 12:45 # @Author : stellahong (stellahong@fuzhi.ai) # @Desc : +import copy + from metagpt.logs import logger from metagpt.roles.minecraft.minecraft_base import Minecraft as Base from metagpt.schema import Message, HumanMessage, SystemMessage @@ -17,6 +19,8 @@ from metagpt.config import CONFIG from metagpt.actions.minecraft.control_primitives_context import ( load_skills_code_context, ) +from metagpt.utils.minecraft import fix_and_parse_json +from metagpt.roles.minecraft.critic_agent import CriticReviewer @agent_registry.register("action_developer") @@ -25,28 +29,31 @@ class ActionDeveloper(Base): iterative prompting mechanism in paper. generate action code based on environment observation and plan, as well as skills retrieval results """ - + def __init__( - self, - name: str = "Bob", - profile: str = "Generate code for specified tasks", - goal: str = "Produce accurate and efficient code solutions in Python and JavaScript", - constraints: str = "Adhere to coding best practices and style guidelines", + self, + name: str = "Bob", + profile: str = "Generate code for specified tasks", + goal: str = "Produce accurate and efficient code solutions in Python and JavaScript", + constraints: str = "Adhere to coding best practices and style guidelines", ) -> None: super().__init__(name, profile, goal, constraints) # Initialize actions specific to the Action role self._init_actions([GenerateActionCode]) - + # Set events or actions the ActionAgent should watch or be aware of # 需要根据events进行自己chest_observation的更新 self._watch([RetrieveSkills]) - + self.rollout_num_iter = 0 + self.task_max_retries = 4 + self.critic_reviewer = CriticReviewer() + def render_system_message(self, skills=[], *args, **kwargs): """ According to basic skills context files to genenarate js skill codes. Refer to @ https://github.com/MineDojo/Voyager/blob/main/voyager/agents/action.py """ - + action_template = utils.load_prompt("action_template") base_skills = [ "exploreUntil", @@ -69,21 +76,21 @@ class ActionDeveloper(Base): system_action_message = SystemMessage(content=system_action_prompt) assert isinstance(system_action_message, SystemMessage) return system_action_message - + def render_human_message( - self, events, code="", task="", context="", critique="", *args, **kwargs + self, events, code="", task="", context="", critique="", *args, **kwargs ): """ Integrate observation about the environment(especially events), add to HumanMessage. Refer to @ https://github.com/MineDojo/Voyager/blob/main/voyager/agents/action.py """ - + # Deal with events info chat_messages = [] error_messages = [] # damage_messages = [] # TODO: try to add damage_messages into prompt later assert events[-1][0] == "observe", "Last event must be observe" - + for i, (event_type, event) in enumerate(events): if event_type == "onChat": chat_messages.append(event["onChat"]) @@ -101,30 +108,30 @@ class ActionDeveloper(Base): inventory_used = event["status"]["inventoryUsed"] inventory = event["inventory"] assert i == len(events) - 1, "observe must be the last event" - + # Collect all the environment information into a str: observation observation = "" - + observation = ( f"Code from the last round:\n{code or 'No code in the first round'}\n\n" ) - + if error_messages: error = "\n".join(error_messages) observation += f"Execution error:\n{error}\n\n" else: observation += f"Execution error: No error\n\n" - + if chat_messages: chat_log = "\n".join(chat_messages) observation += f"Chat log: {chat_log}\n\n" else: observation += f"Chat log: None\n\n" - + observation += f"Biome: {biome}\n\n" observation += f"Time: {time_of_day}\n\n" observation += f"Nearby blocks: {', '.join(voxels) if voxels else 'None'}\n\n" - + if entities: nearby_entities = [ k for k, v in sorted(entities.items(), key=lambda x: x[1]) @@ -132,35 +139,35 @@ class ActionDeveloper(Base): observation += f"Nearby entities (nearest to farthest): {', '.join(nearby_entities)}\n\n" else: observation += f"Nearby entities (nearest to farthest): None\n\n" - + observation += f"Health: {health:.1f}/20\n\n" observation += f"Hunger: {hunger:.1f}/20\n\n" observation += f"Position: x={position['x']:.1f}, y={position['y']:.1f}, z={position['z']:.1f}\n\n" observation += f"Equipment: {equipment}\n\n" observation += f"Inventory ({inventory_used}/36): {'Empty' if not inventory else ', '.join(inventory)}\n\n" - + if not ( - task == "Place and deposit useless items into a chest" - or task.startswith("Deposit useless items into the chest at") + task == "Place and deposit useless items into a chest" + or task.startswith("Deposit useless items into the chest at") ): observation += self.game_memory.chest_observation - + observation += f"Task: {task}\n\n" observation += f"Context: {context or 'None'}\n\n" observation += f"Critique: {critique or 'None'}\n\n" - + return HumanMessage(content=observation) - + def encapsule_message( - self, - events, - code="", - task="", - context="", - critique="", - skills=[], - *args, - **kwargs, + self, + events, + code="", + task="", + context="", + critique="", + skills=[], + *args, + **kwargs, ): system_message = self.render_system_message(skills=skills) human_message = self.render_human_message( @@ -170,7 +177,7 @@ class ActionDeveloper(Base): "system_msg": [system_message.content], "human_msg": human_message.content, } - + async def _observe(self) -> int: await super()._observe() for msg in self._rc.news: @@ -180,7 +187,109 @@ class ActionDeveloper(Base): ] # only relevant msgs count as observed news logger.info(len(self._rc.news)) return len(self._rc.news) + + async def run_step(self, human_msg, system_msg, *args, **kwargs): + while True: + messages, reward, done, info = self.runcode_and_evaluate(human_msg, system_msg, *args, **kwargs) + if done: + break + return messages, reward, done, info + + async def handle_add_new_skills( + self, task, program_name, program_code, skills, *args, **kwargs + ): + skill_desp = self.game_memory.skill_desp + new_skills_info = await AddNewSkills().run( + task, program_name, program_code, skills, skill_desp + ) + # update skills in game memory + self.perform_game_info_callback(new_skills_info, self.game_memory.append_skill) + + async def retrieve_skills(self, query, skills, *args, **kwargs): + retrieve_skills = await RetrieveSkills().run(query, skills) + logger.info(f"Render Action Agent system message with {len(retrieve_skills)} skills") + self.perform_game_info_callback(retrieve_skills, self.game_memory.update_retrieve_skills) + # return Message(content=f"{retrieve_skills}", instruct_content="retrieve_skills", + # role=self.profile, send_to=agent_registry.entries["action_developer"]()._setting.name) + + async def runcode_and_evaluate(self, human_msg, system_msg, *args, **kwargs): + """ + equal to step() in voyager + """ + task = self.game_memory.current_task + context = self.game_memory.context + + # 更新生成的代码和对应程序名称 + code, program_name = await GenerateActionCode().run( + human_msg, system_msg, *args, **kwargs + ) + # logger.warning(type(code)) + # logger.info(f"Code is Here:{code}") + + if code is not None: + # fixme:若有独立的mc code执行入口函数,使用独立的函数 + events = await self._obtain_events() + # 注意:这里的events对应是执行了新的action函数之后的events信息 + # 更新了评估结果, 回调了最新的环境信息到ga + await self.critic_reviewer._act() # todo: critic act内的update event放在这里似乎更合理? + + critique = self.game_memory.critique + event_summary = self.game_memory.event_summary + skills = self.game_memory.skills + + if not self.game_memory.runtime_status: + # todo: callback game memory reset block info + logger.info("Not success, reset block info !") + logger.info( + f"\033[32m****Action Agent human message****\n{self.messages[-1].content}\033[0m" + ) + + # add new skills no matter success or not + # add_new_skills_message = { + # "task": task, + # "program_name": program_name, + # "program_code": code, + # "skills": self.game_memory.skills, + # } + new_skill_info = {"query": context + "\n\n" + event_summary, "skills": skills} + + # await self.handle_add_new_skills(**add_new_skills_message) + await self.retrieve_skills(**new_skill_info) + retrieve_skills = self.game_memory.retrieve_skills + + message = self.encapsule_message( + events=events, + code=code, + task=task, + context=context, + critique=critique, + skills=retrieve_skills, + ) + + + else: + message = { + "system_msg": [system_msg.content], + "human_msg": human_msg.content, + } + logger.info(f"\033[34m Trying again!\033[0m") + + self.rollout_num_iter += 1 + done = (self.rollout_num_iter >= self.task_max_retries or self.game_memory.runtime_status) + info = { + "task": self.task, + "success": self.game_memory.runtime_status, + "conversations": self.conversations, + } + + self.perform_game_info_callback(code, self.game_memory.update_code) + self.perform_game_info_callback( + program_name, self.game_memory.update_program_name + ) + + return message, 0, done, info + async def generate_action_code(self, human_msg, system_msg, *args, **kwargs): code, program_name = await GenerateActionCode().run( human_msg, system_msg, *args, **kwargs @@ -198,11 +307,12 @@ class ActionDeveloper(Base): ) # logger.info(msg) return msg - + async def _act(self) -> Message: todo = self._rc.todo logger.debug(f"Todo is {todo}") self.maintain_actions(todo) + # 获取最新的游戏周边信息 events = await self._obtain_events() self.perform_game_info_callback(events, self.game_memory.update_event) @@ -211,7 +321,8 @@ class ActionDeveloper(Base): code = self.game_memory.code critique = self.game_memory.critique retrieve_skills = self.game_memory.retrieve_skills - + + # 对自己所需的环境信息进行处理 message = self.encapsule_message( events=events, code=code, @@ -222,11 +333,11 @@ class ActionDeveloper(Base): ) logger.info(todo) handler_map = { - GenerateActionCode: self.generate_action_code, + GenerateActionCode: self.run_step#self.generate_action_code, } handler = handler_map.get(type(todo)) logger.info(handler) - + if handler: msg = await handler(**message) msg.cause_by = type(todo) @@ -234,5 +345,7 @@ class ActionDeveloper(Base): logger.info(msg.send_to) self._publish_message(msg) return msg - + raise ValueError(f"Unknown todo type: {type(todo)}") + + # await self.run_step() \ No newline at end of file diff --git a/metagpt/roles/minecraft/critic_agent.py b/metagpt/roles/minecraft/critic_agent.py index 7bb90767a..d9c9db699 100644 --- a/metagpt/roles/minecraft/critic_agent.py +++ b/metagpt/roles/minecraft/critic_agent.py @@ -28,12 +28,28 @@ class CriticReviewer(Base): ) -> None: super().__init__(name, profile, goal, constraints) # Initialize actions specific to the CriticReviewer role + # self._init_actions([VerifyTask]) self._init_actions([VerifyTask]) # Set events or actions the CriticReviewer should watch or be aware of # 需要获取最新的events来进行评估 self._watch([GenerateActionCode, AddNewSkills]) + async def run(self, message=None): + """Observe, only get the observation""" + if message: + if isinstance(message, str): + message = Message(message) + if isinstance(message, Message): + self.recv(message) + if isinstance(message, list): + self.recv(Message("\n".join(message))) + elif not await self._observe(): + # If there is no new information, suspend and wait + logger.info(f"{self._setting}: no news. waiting.") + return + self._rc.todo = VerifyTask + def render_system_message(self): system_message = SystemMessage(content=load_prompt("critic")) return system_message @@ -119,6 +135,7 @@ class CriticReviewer(Base): self.perform_game_info_callback( success, self.game_memory.update_exploration_progress ) + self.perform_game_info_callback(critique, self.game_memory.update_critique) return Message( content=f"{critique}", instruct_content="verify_task", diff --git a/minecraft_run.py b/minecraft_run.py index d7d2cf7c2..d0bcb5fd9 100644 --- a/minecraft_run.py +++ b/minecraft_run.py @@ -19,7 +19,7 @@ async def learn(task="Start", investment: float = 50.0, n_round: int = 3): [ CurriculumDesigner(), ActionDeveloper(), - CriticReviewer(), + # CriticReviewer(), SkillManager(), ]