diff --git a/metagpt/actions/minecraft/design_curriculumn.py b/metagpt/actions/minecraft/design_curriculumn.py index b802216fa..9d0daa72e 100644 --- a/metagpt/actions/minecraft/design_curriculumn.py +++ b/metagpt/actions/minecraft/design_curriculumn.py @@ -208,7 +208,7 @@ class DesignCurriculum(Action): return context except Exception as e: logger.info(f"Error parsing curriculum response: {e}. Trying again!") - return self.generate_context( + return await self.generate_context( task=task, max_retries=max_retries - 1, ) diff --git a/metagpt/minecraft_team.py b/metagpt/minecraft_team.py index 36c4a7dc3..68e20ea89 100644 --- a/metagpt/minecraft_team.py +++ b/metagpt/minecraft_team.py @@ -25,7 +25,7 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): """ 游戏环境的记忆,用于多个agent进行信息的共享和缓存,而不需要重复在自己的角色内维护缓存 """ - + event: dict[str, Any] = Field(default_factory=dict) current_task: str = Field(default="Mine 1 wood log") task_execution_time: float = Field(default=float) @@ -35,24 +35,24 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): code: str = Field(default="") program_name: str = Field(default="") critique: str = Field(default="") - skills: dict = Field(default_factory=dict) # for skills.json - retrieve_skills: list[str] = Field(default_factory=list) + skills: dict = Field(default_factory=dict) # for skills.json + retrieve_skills: list[str] = Field(default_factory=list) event_summary: str = Field(default="") - + qa_cache: dict[str, str] = Field(default_factory=dict) completed_tasks: list[str] = Field(default_factory=list) # Critique things failed_tasks: list[str] = Field(default_factory=list) - + skill_desp: str = Field(default="") - + chest_memory: dict[str, Any] = Field( default_factory=dict ) # eg: {'(1344, 64, 1381)': 'Unknown'} chest_observation: str = Field(default="") # eg: "Chests: None\n\n" - + mf_instance: MineflayerEnv = Field(default_factory=MineflayerEnv) runtime_status: bool = False # equal to action execution status: success or failed - + @property def progress(self): # return len(self.completed_tasks) + 10 # Test only @@ -62,30 +62,30 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): def programs(self): programs = "" if self.code == "": - return programs # TODO: maybe fix 10054 now, a better way is isolating env.step() like voyager + return programs # TODO: maybe fix 10054 now, a better way is isolating env.step() like voyager for skill_name, entry in self.skills.items(): programs += f"{entry['code']}\n\n" for primitives in load_skills_code(): programs += f"{primitives}\n\n" - return programs - + return programs + @property def warm_up(self): return self.mf_instance.warm_up - + @property def core_inv_items_regex(self): return self.mf_instance.core_inv_items_regex - + def set_mc_port(self, mc_port): self.mf_instance.set_mc_port(mc_port) - + def set_mc_resume(self, resume: bool = False): # TODO: mv to config if resume: logger.info(f"Loading Action Developer from {CKPT_DIR}/action") with open(f"{CKPT_DIR}/action/chest_memory.json", "r") as f: self.chest_memory = json.load(f) - + logger.info(f"Loading Curriculum Agent from {CKPT_DIR}/curriculum") with open(f"{CKPT_DIR}/curriculum/completed_tasks.json", "r") as f: self.completed_tasks = json.load(f) @@ -93,46 +93,46 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): self.failed_tasks = json.load(f) with open(f"{CKPT_DIR}/curriculum/qa_cache.json", "r") as f: self.qa_cache = json.load(f) - + logger.info(f"Loading Skill Manager from {CKPT_DIR}/skill\033[0m") with open(f"{CKPT_DIR}/skill/skills.json", "r") as f: self.skills = json.load(f) - + def register_roles(self, roles: Iterable[Minecraft]): for role in roles: role.set_memory(self) - + def update_event(self, event: Dict): if self.event == event: return self.event = event - self.update_chest_memory(event) - self.event_summary = self.summarize_chatlog(event) - + # self.update_chest_memory(event) + # self.event_summary = self.summarize_chatlog(event) + def update_task(self, task: str): self.current_task = task - + def update_context(self, context: str): self.context = context - + def update_code(self, code: str): self.code = code # action_developer.gen_action_code to HERE - + def update_program_name(self, program_name: str): self.program_name = program_name - + def update_critique(self, critique: str): self.critique = critique # critic_agent.check_task_success to HERE - + def append_skill(self, skill: dict): self.skills[self.program_name] = skill # skill_manager.retrieve_skills to HERE - + def update_retrieve_skills(self, retrieve_skills: list): self.retrieve_skills = retrieve_skills - + def update_skill_desp(self, skill_desp: str): self.skill_desp = skill_desp - + def update_chest_memory(self, events: Dict): """ Input: events: Dict @@ -152,13 +152,13 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): self.chest_memory[position] = chest with open(f"{CKPT_DIR}/action/chest_memory.json", "w") as f: json.dump(self.chest_memory, f) - + def update_chest_observation(self): """ update chest_memory to chest_observation. Refer to @ https://github.com/MineDojo/Voyager/blob/main/voyager/agents/action.py """ - + chests = [] for chest_position, chest in self.chest_memory.items(): if isinstance(chest, dict) and len(chest) > 0: @@ -176,7 +176,7 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): self.chest_observation = f"Chests:\n{chests}\n\n" else: self.chest_observation = f"Chests: None\n\n" - + def summarize_chatlog(self, events): def filter_item(message: str): craft_pattern = r"I cannot make \w+ because I need: (.*)" @@ -185,14 +185,13 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): ) mine_pattern = r"I need at least a (.*) to mine \w+!" if re.match(craft_pattern, message): - self.event_summary = re.match(craft_pattern, message).groups()[0] + self.event_summary = re.match(craft_pattern, message).groups()[0] elif re.match(craft_pattern2, message): self.event_summary = "a nearby crafting table" elif re.match(mine_pattern, message): self.event_summary = re.match(mine_pattern, message).groups()[0] else: self.event_summary = "" - chatlog = set() for event_type, event in events: if event_type == "onChat": @@ -200,10 +199,11 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): if item: chatlog.add(item) self.event_summary = "I also need " + ", ".join(chatlog) + "." if chatlog else "" + def reset_block_info(self): # revert all the placing event in the last step pass - + def update_exploration_progress(self, success: bool): """ Split task into completed_tasks or failed_tasks @@ -213,6 +213,7 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): "conversations": self.conversations, } """ + self.runtime_status = success task = self.current_task if task.startswith("Deposit useless items into the chest at"): return @@ -238,9 +239,9 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): ) self.event[-1][1]["inventory"] = new_events[-1][1]["inventory"] self.event[-1][1]["voxels"] = new_events[-1][1]["voxels"] - + self.save_sorted_tasks() - + def save_sorted_tasks(self): updated_completed_tasks = [] # record repeated failed tasks @@ -249,21 +250,21 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): for task in self.completed_tasks: if task not in updated_completed_tasks: updated_completed_tasks.append(task) - + # remove completed tasks from failed tasks for task in updated_completed_tasks: while task in updated_failed_tasks: updated_failed_tasks.remove(task) - + self.completed_tasks = updated_completed_tasks self.failed_tasks = updated_failed_tasks - + # dump to json with open(f"{CKPT_DIR}/curriculum/completed_tasks.json", "w") as f: json.dump(self.completed_tasks, f) with open(f"{CKPT_DIR}/curriculum/failed_tasks.json", "w") as f: json.dump(self.failed_tasks, f) - + async def on_event_retrieve(self, *args): """ Retrieve Minecraft events. @@ -275,25 +276,25 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): Exception: If there is an issue retrieving events. """ try: - self.mf_instance.reset( - options={ - "mode": "soft", - "wait_ticks": 20, - } - ) + self.mf_instance.reset( + options={ + "mode": "soft", + "wait_ticks": 20, + } + ) difficulty = ( - "easy" if len(self.completed_tasks) > 15 else "peaceful" + "easy" if len(self.completed_tasks) > 15 else "peaceful" ) events = self.mf_instance.step( - "bot.chat(`/time set ${getNextTime()}`);\n" - + f"bot.chat('/difficulty {difficulty}');" + "bot.chat(`/time set ${getNextTime()}`);\n" + + f"bot.chat('/difficulty {difficulty}');" ) self.update_event(events) return events except Exception as e: logger.error(f"Failed to retrieve Minecraft events: {str(e)}") raise {} - + async def on_event_execute(self, *args): """ Execute Minecraft events. @@ -318,21 +319,22 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True): logger.error(f"Failed to execute Minecraft events: {str(e)}") raise {} + class MinecraftPlayer(SoftwareCompany): """ Software Company: Possesses a team, SOP (Standard Operating Procedures), and a platform for instant messaging, dedicated to writing executable code. """ - + environment: Environment = Field(default_factory=Environment) game_memory: GameEnvironment = Field(default_factory=GameEnvironment) investment: float = Field(default=50.0) task: str = Field(default="") game_info: dict = Field(default={}) - + def set_port(self, mc_port): self.game_memory.set_mc_port(mc_port) - + def set_resume(self, resume: bool = False): self.game_memory.set_mc_resume(resume=resume) @@ -341,9 +343,9 @@ class MinecraftPlayer(SoftwareCompany): for role in self.environment.roles.values(): status = role.finish_step complete_round.append(status) - #if not status: + # if not status: # return complete_round - #complete_round = True + # complete_round = True complete_round_tag = all(complete_round) logger.info(f"complete_round {complete_round}") return complete_round_tag @@ -351,14 +353,14 @@ class MinecraftPlayer(SoftwareCompany): def update_round(self): for role in self.environment.roles.values(): role.finish_step = False - role.round_id+=1 + role.round_id += 1 role._rc.todo = None logger.info(f"round_id:{role.round_id}") - + def hire(self, roles: list[Role]): self.environment.add_roles(roles) self.game_memory.register_roles(roles) - + def start(self, task, round=0): """Start a project from publishing boss requirement.""" self.task = task @@ -366,30 +368,42 @@ class MinecraftPlayer(SoftwareCompany): Message(role="Player", content=task, cause_by=PlayerActions, round_id=round) ) logger.info(self.game_info) - + def _save(self): logger.info(self.json()) def _reset(self): for role_profile, role in self.environment.roles.items(): role.reset_state() - + async def run(self, n_round=3): """Run company until target round or no money""" - round_id=0 + round_id = 0 + self.game_memory.mf_instance.reset( + options={ + "mode": "soft", + "wait_ticks": 20, + } + ) + events = self.game_memory.mf_instance.step( + code="", + programs="", + ) + self.game_memory.update_event(events) + while n_round > 0: # self._save() if self.check_complete_round(): n_round -= 1 self.update_round() - round_id+=1 + round_id += 1 # add new task into env and continue - #fixme: update self.task + # fixme: update self.task self.start(task=self.task, round=round_id) - + logger.info(f"{n_round=}") self._check_balance() await self.environment.run() - #self.environment.memory.clear() - #self._reset() + # self.environment.memory.clear() + # self._reset() return self.environment.history diff --git a/metagpt/roles/minecraft/action_developer.py b/metagpt/roles/minecraft/action_developer.py index d5d7b7b4a..4f585ea26 100644 --- a/metagpt/roles/minecraft/action_developer.py +++ b/metagpt/roles/minecraft/action_developer.py @@ -47,7 +47,7 @@ class ActionDeveloper(Base): self._watch([RetrieveSkills]) self.rollout_num_iter = 0 self.task_max_retries = 4 - self.critic_reviewer = None #self._rc.env.roles["Task Reviewer"] + self.critic_reviewer = None # self._rc.env.roles["Task Reviewer"] logger.info(self.critic_reviewer) def render_system_message(self, skills=[], *args, **kwargs): @@ -193,10 +193,11 @@ class ActionDeveloper(Base): async def run_step(self, human_msg, system_msg, *args, **kwargs): while True: logger.info(f"self.rollout_num_iter {self.rollout_num_iter}") - system_msg, human_msg, reward, done, info = await self.runcode_and_evaluate(human_msg, system_msg, *args, **kwargs) + system_msg, human_msg, reward, done, info = await self.runcode_and_evaluate(human_msg, system_msg, *args, + **kwargs) if done: break - #return [system_msg, human_msg], reward, done, info + # return [system_msg, human_msg], reward, done, info return Message( content=f"{info}", instruct_content="generate_action_code", @@ -237,13 +238,15 @@ class ActionDeveloper(Base): if code is not None: # fixme:若有独立的mc code执行入口函数,使用独立的函数 - events = await self._obtain_events() + events = await self._execute_events() # 注意:这里的events对应是执行了新的action函数之后的events信息 # 更新了评估结果, 回调了最新的环境信息到ga self.critic_reviewer = self._rc.env.roles["Task Reviewer"] await self.critic_reviewer._act() # todo: critic act内的update event放在这里似乎更合理? critique = self.game_memory.critique + self.perform_game_info_callback(self.game_memory.event, self.game_memory.summarize_chatlog) + event_summary = self.game_memory.event_summary skills = self.game_memory.skills @@ -275,7 +278,7 @@ class ActionDeveloper(Base): critique=critique, skills=retrieve_skills, ) - + system_msg = message["system_msg"] human_msg = message["human_msg"] else: @@ -289,7 +292,7 @@ class ActionDeveloper(Base): "task": self.game_memory.current_task, "success": self.game_memory.runtime_status, } - + logger.info(f"info is {info}") self.perform_game_info_callback(code, self.game_memory.update_code) self.perform_game_info_callback( program_name, self.game_memory.update_program_name @@ -312,7 +315,7 @@ class ActionDeveloper(Base): instruct_content="generate_action_code", role=self.profile, ) - + return msg async def _act(self) -> Message: @@ -321,8 +324,11 @@ class ActionDeveloper(Base): self.maintain_actions(todo) # 获取最新的游戏周边信息 - events = await self._obtain_events() - self.perform_game_info_callback(events, self.game_memory.update_event) + # events = await self._obtain_events() + events = self.game_memory.event + logger.info(events) + # self.perform_game_info_callback(events, self.game_memory.update_event) + logger.info(self.game_memory.event_summary) context = self.game_memory.context task = self.game_memory.current_task code = self.game_memory.code @@ -340,7 +346,7 @@ class ActionDeveloper(Base): ) logger.info(todo) handler_map = { - GenerateActionCode: self.run_step#self.generate_action_code, + GenerateActionCode: self.run_step # self.generate_action_code, } handler = handler_map.get(type(todo)) logger.info(handler) @@ -355,4 +361,3 @@ class ActionDeveloper(Base): return msg raise ValueError(f"Unknown todo type: {type(todo)}") - diff --git a/metagpt/roles/minecraft/skill_manager.py b/metagpt/roles/minecraft/skill_manager.py index a1e080184..d55b5f968 100644 --- a/metagpt/roles/minecraft/skill_manager.py +++ b/metagpt/roles/minecraft/skill_manager.py @@ -28,7 +28,7 @@ class SkillManager(Base): super().__init__(name, profile, goal, constraints) # Initialize actions specific to the SkillManager role - self._init_actions([RetrieveSkills, GenerateSkillDescription]) #AddNewSkills])#先去掉add + self._init_actions([RetrieveSkills, GenerateSkillDescription, AddNewSkills]) #AddNewSkills])#先去掉add # Set events or actions the SkillManager should watch or be aware of self._watch( @@ -65,6 +65,13 @@ class SkillManager(Base): async def handle_add_new_skills( self, task, program_name, program_code, skills, *args, **kwargs ): + if not self.game_memory.runtime_status: + return Message( + content="", + instruct_content="handle_add_new_skills", + role=self.profile, + ) + skill_desp = self.game_memory.skill_desp new_skills_info = await AddNewSkills().run( task, program_name, program_code, skills, skill_desp @@ -83,9 +90,10 @@ class SkillManager(Base): # 获取最新的游戏周边信息 context = self.game_memory.context task = self.game_memory.current_task - event_summary = self.game_memory.event_summary + code = self.game_memory.code self.perform_game_info_callback(self.game_memory.event, self.game_memory.summarize_chatlog) + event_summary = self.game_memory.event_summary try: program_code = code["program_code"] # TODO: Handle code is None, cuz first round DesignCurriculum(code is None) trigger this except (KeyError, TypeError): @@ -97,7 +105,9 @@ class SkillManager(Base): # msg = self._rc.memory.get(k=1)[0] retrieve_skills_message_step1 = {"query": context, "skills": skills} - + logger.info(f"check query {context}") + logger.info(f"check event summary {event_summary}") + retrieve_skills_message_step2 = {"query": context + "\n\n" + event_summary, "skills": skills} generate_skill_message = self.encapsule_message(program_code, program_name) diff --git a/minecraft_run.py b/minecraft_run.py index 8592fb5ef..ed9276265 100644 --- a/minecraft_run.py +++ b/minecraft_run.py @@ -24,8 +24,7 @@ async def learn(task="Start", investment: float = 50.0, n_round: int = 3): ] ) - print(mc_player.environment.roles) - print(mc_player.environment.roles["Generate code for specified tasks"]._rc) + mc_player.invest(investment) mc_player.start(task) await mc_player.run(n_round=n_round)