update locally

2026-06-17 15:35:21 +02:00 · 2023-10-06 16:16:09 +08:00 · 2023-10-06 16:16:09 +08:00 · dc0fd134fb
commit dc0fd134fb
parent eb9ea304a5
4 changed files with 180 additions and 42 deletions
--- a/metagpt/minecraft_team.py
+++ b/metagpt/minecraft_team.py
@ -51,6 +51,7 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True):
    chest_observation: str = Field(default="")  # eg: "Chests: None\n\n"

    mf_instance: MineflayerEnv = Field(default_factory=MineflayerEnv)
+    runtime_status: bool = False  # equal to action execution status: success or failed

    @property
    def progress(self):
@ -200,6 +201,10 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True):
                    chatlog.add(item)
        return "I also need " + ", ".join(chatlog) + "." if chatlog else ""

+    def reset_block_info(self):
+        # revert all the placing event in the last step
+        pass
+
    def update_exploration_progress(self, success: bool):
        """
        Split task into completed_tasks or failed_tasks
@ -209,6 +214,9 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True):
            "conversations": self.conversations,
        }
        """
+        # update runtime status in game memory
+        self.runtime_status = success
+
        task = self.current_task
        if task.startswith("Deposit useless items into the chest at"):
            return
--- a/metagpt/roles/minecraft/action_developer.py
+++ b/metagpt/roles/minecraft/action_developer.py
@ -2,6 +2,8 @@
 # @Date    : 2023/9/23 12:45
 # @Author  : stellahong (stellahong@fuzhi.ai)
 # @Desc    :
+import copy
+
 from metagpt.logs import logger
 from metagpt.roles.minecraft.minecraft_base import Minecraft as Base
 from metagpt.schema import Message, HumanMessage, SystemMessage
@ -17,6 +19,8 @@ from metagpt.config import CONFIG
 from metagpt.actions.minecraft.control_primitives_context import (
    load_skills_code_context,
 )
+from metagpt.utils.minecraft import fix_and_parse_json
+from metagpt.roles.minecraft.critic_agent import CriticReviewer


@agent_registry.register("action_developer")
@ -25,28 +29,31 @@ class ActionDeveloper(Base):
    iterative prompting mechanism in paper.
    generate action code based on environment observation and plan, as well as skills retrieval results
    """
-
+    
    def __init__(
-        self,
-        name: str = "Bob",
-        profile: str = "Generate code for specified tasks",
-        goal: str = "Produce accurate and efficient code solutions in Python and JavaScript",
-        constraints: str = "Adhere to coding best practices and style guidelines",
+            self,
+            name: str = "Bob",
+            profile: str = "Generate code for specified tasks",
+            goal: str = "Produce accurate and efficient code solutions in Python and JavaScript",
+            constraints: str = "Adhere to coding best practices and style guidelines",
    ) -> None:
        super().__init__(name, profile, goal, constraints)
        # Initialize actions specific to the Action role
        self._init_actions([GenerateActionCode])
-
+        
        # Set events or actions the ActionAgent should watch or be aware of
        # 需要根据events进行自己chest_observation的更新
        self._watch([RetrieveSkills])
-
+        self.rollout_num_iter = 0
+        self.task_max_retries = 4
+        self.critic_reviewer = CriticReviewer()
+    
    def render_system_message(self, skills=[], *args, **kwargs):
        """
        According to basic skills context files to genenarate js skill codes.
        Refer to @ https://github.com/MineDojo/Voyager/blob/main/voyager/agents/action.py
        """
-
+        
        action_template = utils.load_prompt("action_template")
        base_skills = [
            "exploreUntil",
@ -69,21 +76,21 @@ class ActionDeveloper(Base):
        system_action_message = SystemMessage(content=system_action_prompt)
        assert isinstance(system_action_message, SystemMessage)
        return system_action_message
-
+    
    def render_human_message(
-        self, events, code="", task="", context="", critique="", *args, **kwargs
+            self, events, code="", task="", context="", critique="", *args, **kwargs
    ):
        """
        Integrate observation about the environment(especially events), add to HumanMessage.
        Refer to @ https://github.com/MineDojo/Voyager/blob/main/voyager/agents/action.py
        """
-
+        
        # Deal with events info
        chat_messages = []
        error_messages = []
        # damage_messages = [] # TODO: try to add damage_messages into prompt later
        assert events[-1][0] == "observe", "Last event must be observe"
-
+        
        for i, (event_type, event) in enumerate(events):
            if event_type == "onChat":
                chat_messages.append(event["onChat"])
@ -101,30 +108,30 @@ class ActionDeveloper(Base):
                inventory_used = event["status"]["inventoryUsed"]
                inventory = event["inventory"]
                assert i == len(events) - 1, "observe must be the last event"
-
+        
        # Collect all the environment information into a str: observation
        observation = ""
-
+        
        observation = (
            f"Code from the last round:\n{code or 'No code in the first round'}\n\n"
        )
-
+        
        if error_messages:
            error = "\n".join(error_messages)
            observation += f"Execution error:\n{error}\n\n"
        else:
            observation += f"Execution error: No error\n\n"
-
+        
        if chat_messages:
            chat_log = "\n".join(chat_messages)
            observation += f"Chat log: {chat_log}\n\n"
        else:
            observation += f"Chat log: None\n\n"
-
+        
        observation += f"Biome: {biome}\n\n"
        observation += f"Time: {time_of_day}\n\n"
        observation += f"Nearby blocks: {', '.join(voxels) if voxels else 'None'}\n\n"
-
+        
        if entities:
            nearby_entities = [
                k for k, v in sorted(entities.items(), key=lambda x: x[1])
@ -132,35 +139,35 @@ class ActionDeveloper(Base):
            observation += f"Nearby entities (nearest to farthest): {', '.join(nearby_entities)}\n\n"
        else:
            observation += f"Nearby entities (nearest to farthest): None\n\n"
-
+        
        observation += f"Health: {health:.1f}/20\n\n"
        observation += f"Hunger: {hunger:.1f}/20\n\n"
        observation += f"Position: x={position['x']:.1f}, y={position['y']:.1f}, z={position['z']:.1f}\n\n"
        observation += f"Equipment: {equipment}\n\n"
        observation += f"Inventory ({inventory_used}/36): {'Empty' if not inventory else ', '.join(inventory)}\n\n"
-
+        
        if not (
-            task == "Place and deposit useless items into a chest"
-            or task.startswith("Deposit useless items into the chest at")
+                task == "Place and deposit useless items into a chest"
+                or task.startswith("Deposit useless items into the chest at")
        ):
            observation += self.game_memory.chest_observation
-
+        
        observation += f"Task: {task}\n\n"
        observation += f"Context: {context or 'None'}\n\n"
        observation += f"Critique: {critique or 'None'}\n\n"
-
+        
        return HumanMessage(content=observation)
-
+    
    def encapsule_message(
-        self,
-        events,
-        code="",
-        task="",
-        context="",
-        critique="",
-        skills=[],
-        *args,
-        **kwargs,
+            self,
+            events,
+            code="",
+            task="",
+            context="",
+            critique="",
+            skills=[],
+            *args,
+            **kwargs,
    ):
        system_message = self.render_system_message(skills=skills)
        human_message = self.render_human_message(
@ -170,7 +177,7 @@ class ActionDeveloper(Base):
            "system_msg": [system_message.content],
            "human_msg": human_message.content,
        }
-
+    
    async def _observe(self) -> int:
        await super()._observe()
        for msg in self._rc.news:
@ -180,7 +187,109 @@ class ActionDeveloper(Base):
        ]  # only relevant msgs count as observed news
        logger.info(len(self._rc.news))
        return len(self._rc.news)
+    
+    async def run_step(self, human_msg, system_msg, *args, **kwargs):
+        while True:
+            messages, reward, done, info = self.runcode_and_evaluate(human_msg, system_msg, *args, **kwargs)
+            if done:
+                break
+        return messages, reward, done, info
+    
+    async def handle_add_new_skills(
+            self, task, program_name, program_code, skills, *args, **kwargs
+    ):
+        skill_desp = self.game_memory.skill_desp
+        new_skills_info = await AddNewSkills().run(
+            task, program_name, program_code, skills, skill_desp
+        )
+        # update skills in game memory
+        self.perform_game_info_callback(new_skills_info, self.game_memory.append_skill)
+    
+    async def retrieve_skills(self, query, skills, *args, **kwargs):
+        retrieve_skills = await RetrieveSkills().run(query, skills)
+        logger.info(f"Render Action Agent system message with {len(retrieve_skills)} skills")
+        self.perform_game_info_callback(retrieve_skills, self.game_memory.update_retrieve_skills)
+        # return Message(content=f"{retrieve_skills}", instruct_content="retrieve_skills",
+        #                role=self.profile, send_to=agent_registry.entries["action_developer"]()._setting.name)
+    
+    async def runcode_and_evaluate(self, human_msg, system_msg, *args, **kwargs):
+        """
+        equal to step() in voyager

+        """
+        task = self.game_memory.current_task
+        context = self.game_memory.context
+        
+        # 更新生成的代码和对应程序名称
+        code, program_name = await GenerateActionCode().run(
+            human_msg, system_msg, *args, **kwargs
+        )
+        # logger.warning(type(code))
+        # logger.info(f"Code is Here:{code}")
+        
+        if code is not None:
+            # fixme：若有独立的mc code执行入口函数，使用独立的函数
+            events = await self._obtain_events()
+            # 注意：这里的events对应是执行了新的action函数之后的events信息
+            # 更新了评估结果, 回调了最新的环境信息到ga
+            await self.critic_reviewer._act()  # todo: critic act内的update event放在这里似乎更合理？
+            
+            critique = self.game_memory.critique
+            event_summary = self.game_memory.event_summary
+            skills = self.game_memory.skills
+            
+            if not self.game_memory.runtime_status:
+                # todo: callback game memory reset block info
+                logger.info("Not success, reset block info !")
+                logger.info(
+                    f"\033[32m****Action Agent human message****\n{self.messages[-1].content}\033[0m"
+                )
+            
+            # add new skills no matter success or not
+            # add_new_skills_message = {
+            #     "task": task,
+            #     "program_name": program_name,
+            #     "program_code": code,
+            #     "skills": self.game_memory.skills,
+            # }
+            new_skill_info = {"query": context + "\n\n" + event_summary, "skills": skills}
+            
+            # await self.handle_add_new_skills(**add_new_skills_message)
+            await self.retrieve_skills(**new_skill_info)
+            retrieve_skills = self.game_memory.retrieve_skills
+            
+            message = self.encapsule_message(
+                events=events,
+                code=code,
+                task=task,
+                context=context,
+                critique=critique,
+                skills=retrieve_skills,
+            )
+        
+        
+        else:
+            message = {
+                "system_msg": [system_msg.content],
+                "human_msg": human_msg.content,
+            }
+            logger.info(f"\033[34m Trying again!\033[0m")
+        
+        self.rollout_num_iter += 1
+        done = (self.rollout_num_iter >= self.task_max_retries or self.game_memory.runtime_status)
+        info = {
+            "task": self.task,
+            "success": self.game_memory.runtime_status,
+            "conversations": self.conversations,
+        }
+        
+        self.perform_game_info_callback(code, self.game_memory.update_code)
+        self.perform_game_info_callback(
+            program_name, self.game_memory.update_program_name
+        )
+        
+        return message, 0, done, info
+    
    async def generate_action_code(self, human_msg, system_msg, *args, **kwargs):
        code, program_name = await GenerateActionCode().run(
            human_msg, system_msg, *args, **kwargs
@ -198,11 +307,12 @@ class ActionDeveloper(Base):
        )
        # logger.info(msg)
        return msg
-
+    
    async def _act(self) -> Message:
        todo = self._rc.todo
        logger.debug(f"Todo is {todo}")
        self.maintain_actions(todo)
+        
        # 获取最新的游戏周边信息
        events = await self._obtain_events()
        self.perform_game_info_callback(events, self.game_memory.update_event)
@ -211,7 +321,8 @@ class ActionDeveloper(Base):
        code = self.game_memory.code
        critique = self.game_memory.critique
        retrieve_skills = self.game_memory.retrieve_skills
-
+        
+        # 对自己所需的环境信息进行处理
        message = self.encapsule_message(
            events=events,
            code=code,
@ -222,11 +333,11 @@ class ActionDeveloper(Base):
        )
        logger.info(todo)
        handler_map = {
-            GenerateActionCode: self.generate_action_code,
+            GenerateActionCode: self.run_step#self.generate_action_code,
        }
        handler = handler_map.get(type(todo))
        logger.info(handler)
-
+        
        if handler:
            msg = await handler(**message)
            msg.cause_by = type(todo)
@ -234,5 +345,7 @@ class ActionDeveloper(Base):
            logger.info(msg.send_to)
            self._publish_message(msg)
            return msg
-
+        
        raise ValueError(f"Unknown todo type: {type(todo)}")
+        
+        # await self.run_step()
--- a/metagpt/roles/minecraft/critic_agent.py
+++ b/metagpt/roles/minecraft/critic_agent.py
@ -28,12 +28,28 @@ class CriticReviewer(Base):
    ) -> None:
        super().__init__(name, profile, goal, constraints)
        # Initialize actions specific to the CriticReviewer role
+        # self._init_actions([VerifyTask])
        self._init_actions([VerifyTask])

        # Set events or actions the CriticReviewer should watch or be aware of
        # 需要获取最新的events来进行评估
        self._watch([GenerateActionCode, AddNewSkills])

+    async def run(self, message=None):
+        """Observe, only get the observation"""
+        if message:
+            if isinstance(message, str):
+                message = Message(message)
+            if isinstance(message, Message):
+                self.recv(message)
+            if isinstance(message, list):
+                self.recv(Message("\n".join(message)))
+        elif not await self._observe():
+            # If there is no new information, suspend and wait
+            logger.info(f"{self._setting}: no news. waiting.")
+            return
+        self._rc.todo = VerifyTask
+
    def render_system_message(self):
        system_message = SystemMessage(content=load_prompt("critic"))
        return system_message
@ -119,6 +135,7 @@ class CriticReviewer(Base):
        self.perform_game_info_callback(
            success, self.game_memory.update_exploration_progress
        )
+        self.perform_game_info_callback(critique, self.game_memory.update_critique)
        return Message(
            content=f"{critique}",
            instruct_content="verify_task",
--- a/minecraft_run.py
+++ b/minecraft_run.py
@ -19,7 +19,7 @@ async def learn(task="Start", investment: float = 50.0, n_round: int = 3):
        [
            CurriculumDesigner(),
            ActionDeveloper(),
-            CriticReviewer(),
+            # CriticReviewer(),
            SkillManager(),
        
        ]