From dc0fd134fb3c5107a841b3c30a0e8f55930cb640 Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Fri, 6 Oct 2023 16:16:09 +0800
Subject: [PATCH] update locally

---
 metagpt/minecraft_team.py                   |   8 +
 metagpt/roles/minecraft/action_developer.py | 195 ++++++++++++++++----
 metagpt/roles/minecraft/critic_agent.py     |  17 ++
 minecraft_run.py                            |   2 +-
 4 files changed, 180 insertions(+), 42 deletions(-)

diff --git a/metagpt/minecraft_team.py b/metagpt/minecraft_team.py
index 5ead788ce..c7fe6f685 100644
--- a/metagpt/minecraft_team.py
+++ b/metagpt/minecraft_team.py
@@ -51,6 +51,7 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True):
     chest_observation: str = Field(default="")  # eg: "Chests: None\n\n"
 
     mf_instance: MineflayerEnv = Field(default_factory=MineflayerEnv)
+    runtime_status: bool = False  # equal to action execution status: success or failed
 
     @property
     def progress(self):
@@ -200,6 +201,10 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True):
                     chatlog.add(item)
         return "I also need " + ", ".join(chatlog) + "." if chatlog else ""
 
+    def reset_block_info(self):
+        # revert all the placing event in the last step
+        pass
+
     def update_exploration_progress(self, success: bool):
         """
         Split task into completed_tasks or failed_tasks
@@ -209,6 +214,9 @@ class GameEnvironment(BaseModel, arbitrary_types_allowed=True):
             "conversations": self.conversations,
         }
         """
+        # update runtime status in game memory
+        self.runtime_status = success
+
         task = self.current_task
         if task.startswith("Deposit useless items into the chest at"):
             return
diff --git a/metagpt/roles/minecraft/action_developer.py b/metagpt/roles/minecraft/action_developer.py
index 9171e455b..9791d5b88 100644
--- a/metagpt/roles/minecraft/action_developer.py
+++ b/metagpt/roles/minecraft/action_developer.py
@@ -2,6 +2,8 @@
 # @Date    : 2023/9/23 12:45
 # @Author  : stellahong (stellahong@fuzhi.ai)
 # @Desc    :
+import copy
+
 from metagpt.logs import logger
 from metagpt.roles.minecraft.minecraft_base import Minecraft as Base
 from metagpt.schema import Message, HumanMessage, SystemMessage
@@ -17,6 +19,8 @@ from metagpt.config import CONFIG
 from metagpt.actions.minecraft.control_primitives_context import (
     load_skills_code_context,
 )
+from metagpt.utils.minecraft import fix_and_parse_json
+from metagpt.roles.minecraft.critic_agent import CriticReviewer
 
 
 @agent_registry.register("action_developer")
@@ -25,28 +29,31 @@ class ActionDeveloper(Base):
     iterative prompting mechanism in paper.
     generate action code based on environment observation and plan, as well as skills retrieval results
     """
-
+    
     def __init__(
-        self,
-        name: str = "Bob",
-        profile: str = "Generate code for specified tasks",
-        goal: str = "Produce accurate and efficient code solutions in Python and JavaScript",
-        constraints: str = "Adhere to coding best practices and style guidelines",
+            self,
+            name: str = "Bob",
+            profile: str = "Generate code for specified tasks",
+            goal: str = "Produce accurate and efficient code solutions in Python and JavaScript",
+            constraints: str = "Adhere to coding best practices and style guidelines",
     ) -> None:
         super().__init__(name, profile, goal, constraints)
         # Initialize actions specific to the Action role
         self._init_actions([GenerateActionCode])
-
+        
         # Set events or actions the ActionAgent should watch or be aware of
         # 需要根据events进行自己chest_observation的更新
         self._watch([RetrieveSkills])
-
+        self.rollout_num_iter = 0
+        self.task_max_retries = 4
+        self.critic_reviewer = CriticReviewer()
+    
     def render_system_message(self, skills=[], *args, **kwargs):
         """
         According to basic skills context files to genenarate js skill codes.
         Refer to @ https://github.com/MineDojo/Voyager/blob/main/voyager/agents/action.py
         """
-
+        
         action_template = utils.load_prompt("action_template")
         base_skills = [
             "exploreUntil",
@@ -69,21 +76,21 @@ class ActionDeveloper(Base):
         system_action_message = SystemMessage(content=system_action_prompt)
         assert isinstance(system_action_message, SystemMessage)
         return system_action_message
-
+    
     def render_human_message(
-        self, events, code="", task="", context="", critique="", *args, **kwargs
+            self, events, code="", task="", context="", critique="", *args, **kwargs
     ):
         """
         Integrate observation about the environment(especially events), add to HumanMessage.
         Refer to @ https://github.com/MineDojo/Voyager/blob/main/voyager/agents/action.py
         """
-
+        
         # Deal with events info
         chat_messages = []
         error_messages = []
         # damage_messages = [] # TODO: try to add damage_messages into prompt later
         assert events[-1][0] == "observe", "Last event must be observe"
-
+        
         for i, (event_type, event) in enumerate(events):
             if event_type == "onChat":
                 chat_messages.append(event["onChat"])
@@ -101,30 +108,30 @@ class ActionDeveloper(Base):
                 inventory_used = event["status"]["inventoryUsed"]
                 inventory = event["inventory"]
                 assert i == len(events) - 1, "observe must be the last event"
-
+        
         # Collect all the environment information into a str: observation
         observation = ""
-
+        
         observation = (
             f"Code from the last round:\n{code or 'No code in the first round'}\n\n"
         )
-
+        
         if error_messages:
             error = "\n".join(error_messages)
             observation += f"Execution error:\n{error}\n\n"
         else:
             observation += f"Execution error: No error\n\n"
-
+        
         if chat_messages:
             chat_log = "\n".join(chat_messages)
             observation += f"Chat log: {chat_log}\n\n"
         else:
             observation += f"Chat log: None\n\n"
-
+        
         observation += f"Biome: {biome}\n\n"
         observation += f"Time: {time_of_day}\n\n"
         observation += f"Nearby blocks: {', '.join(voxels) if voxels else 'None'}\n\n"
-
+        
         if entities:
             nearby_entities = [
                 k for k, v in sorted(entities.items(), key=lambda x: x[1])
@@ -132,35 +139,35 @@ class ActionDeveloper(Base):
             observation += f"Nearby entities (nearest to farthest): {', '.join(nearby_entities)}\n\n"
         else:
             observation += f"Nearby entities (nearest to farthest): None\n\n"
-
+        
         observation += f"Health: {health:.1f}/20\n\n"
         observation += f"Hunger: {hunger:.1f}/20\n\n"
         observation += f"Position: x={position['x']:.1f}, y={position['y']:.1f}, z={position['z']:.1f}\n\n"
         observation += f"Equipment: {equipment}\n\n"
         observation += f"Inventory ({inventory_used}/36): {'Empty' if not inventory else ', '.join(inventory)}\n\n"
-
+        
         if not (
-            task == "Place and deposit useless items into a chest"
-            or task.startswith("Deposit useless items into the chest at")
+                task == "Place and deposit useless items into a chest"
+                or task.startswith("Deposit useless items into the chest at")
         ):
             observation += self.game_memory.chest_observation
-
+        
         observation += f"Task: {task}\n\n"
         observation += f"Context: {context or 'None'}\n\n"
         observation += f"Critique: {critique or 'None'}\n\n"
-
+        
         return HumanMessage(content=observation)
-
+    
     def encapsule_message(
-        self,
-        events,
-        code="",
-        task="",
-        context="",
-        critique="",
-        skills=[],
-        *args,
-        **kwargs,
+            self,
+            events,
+            code="",
+            task="",
+            context="",
+            critique="",
+            skills=[],
+            *args,
+            **kwargs,
     ):
         system_message = self.render_system_message(skills=skills)
         human_message = self.render_human_message(
@@ -170,7 +177,7 @@ class ActionDeveloper(Base):
             "system_msg": [system_message.content],
             "human_msg": human_message.content,
         }
-
+    
     async def _observe(self) -> int:
         await super()._observe()
         for msg in self._rc.news:
@@ -180,7 +187,109 @@ class ActionDeveloper(Base):
         ]  # only relevant msgs count as observed news
         logger.info(len(self._rc.news))
         return len(self._rc.news)
+    
+    async def run_step(self, human_msg, system_msg, *args, **kwargs):
+        while True:
+            messages, reward, done, info = self.runcode_and_evaluate(human_msg, system_msg, *args, **kwargs)
+            if done:
+                break
+        return messages, reward, done, info
+    
+    async def handle_add_new_skills(
+            self, task, program_name, program_code, skills, *args, **kwargs
+    ):
+        skill_desp = self.game_memory.skill_desp
+        new_skills_info = await AddNewSkills().run(
+            task, program_name, program_code, skills, skill_desp
+        )
+        # update skills in game memory
+        self.perform_game_info_callback(new_skills_info, self.game_memory.append_skill)
+    
+    async def retrieve_skills(self, query, skills, *args, **kwargs):
+        retrieve_skills = await RetrieveSkills().run(query, skills)
+        logger.info(f"Render Action Agent system message with {len(retrieve_skills)} skills")
+        self.perform_game_info_callback(retrieve_skills, self.game_memory.update_retrieve_skills)
+        # return Message(content=f"{retrieve_skills}", instruct_content="retrieve_skills",
+        #                role=self.profile, send_to=agent_registry.entries["action_developer"]()._setting.name)
+    
+    async def runcode_and_evaluate(self, human_msg, system_msg, *args, **kwargs):
+        """
+        equal to step() in voyager
 
+        """
+        task = self.game_memory.current_task
+        context = self.game_memory.context
+        
+        # 更新生成的代码和对应程序名称
+        code, program_name = await GenerateActionCode().run(
+            human_msg, system_msg, *args, **kwargs
+        )
+        # logger.warning(type(code))
+        # logger.info(f"Code is Here:{code}")
+        
+        if code is not None:
+            # fixme：若有独立的mc code执行入口函数，使用独立的函数
+            events = await self._obtain_events()
+            # 注意：这里的events对应是执行了新的action函数之后的events信息
+            # 更新了评估结果, 回调了最新的环境信息到ga
+            await self.critic_reviewer._act()  # todo: critic act内的update event放在这里似乎更合理？
+            
+            critique = self.game_memory.critique
+            event_summary = self.game_memory.event_summary
+            skills = self.game_memory.skills
+            
+            if not self.game_memory.runtime_status:
+                # todo: callback game memory reset block info
+                logger.info("Not success, reset block info !")
+                logger.info(
+                    f"\033[32m****Action Agent human message****\n{self.messages[-1].content}\033[0m"
+                )
+            
+            # add new skills no matter success or not
+            # add_new_skills_message = {
+            #     "task": task,
+            #     "program_name": program_name,
+            #     "program_code": code,
+            #     "skills": self.game_memory.skills,
+            # }
+            new_skill_info = {"query": context + "\n\n" + event_summary, "skills": skills}
+            
+            # await self.handle_add_new_skills(**add_new_skills_message)
+            await self.retrieve_skills(**new_skill_info)
+            retrieve_skills = self.game_memory.retrieve_skills
+            
+            message = self.encapsule_message(
+                events=events,
+                code=code,
+                task=task,
+                context=context,
+                critique=critique,
+                skills=retrieve_skills,
+            )
+        
+        
+        else:
+            message = {
+                "system_msg": [system_msg.content],
+                "human_msg": human_msg.content,
+            }
+            logger.info(f"\033[34m Trying again!\033[0m")
+        
+        self.rollout_num_iter += 1
+        done = (self.rollout_num_iter >= self.task_max_retries or self.game_memory.runtime_status)
+        info = {
+            "task": self.task,
+            "success": self.game_memory.runtime_status,
+            "conversations": self.conversations,
+        }
+        
+        self.perform_game_info_callback(code, self.game_memory.update_code)
+        self.perform_game_info_callback(
+            program_name, self.game_memory.update_program_name
+        )
+        
+        return message, 0, done, info
+    
     async def generate_action_code(self, human_msg, system_msg, *args, **kwargs):
         code, program_name = await GenerateActionCode().run(
             human_msg, system_msg, *args, **kwargs
@@ -198,11 +307,12 @@ class ActionDeveloper(Base):
         )
         # logger.info(msg)
         return msg
-
+    
     async def _act(self) -> Message:
         todo = self._rc.todo
         logger.debug(f"Todo is {todo}")
         self.maintain_actions(todo)
+        
         # 获取最新的游戏周边信息
         events = await self._obtain_events()
         self.perform_game_info_callback(events, self.game_memory.update_event)
@@ -211,7 +321,8 @@ class ActionDeveloper(Base):
         code = self.game_memory.code
         critique = self.game_memory.critique
         retrieve_skills = self.game_memory.retrieve_skills
-
+        
+        # 对自己所需的环境信息进行处理
         message = self.encapsule_message(
             events=events,
             code=code,
@@ -222,11 +333,11 @@ class ActionDeveloper(Base):
         )
         logger.info(todo)
         handler_map = {
-            GenerateActionCode: self.generate_action_code,
+            GenerateActionCode: self.run_step#self.generate_action_code,
         }
         handler = handler_map.get(type(todo))
         logger.info(handler)
-
+        
         if handler:
             msg = await handler(**message)
             msg.cause_by = type(todo)
@@ -234,5 +345,7 @@ class ActionDeveloper(Base):
             logger.info(msg.send_to)
             self._publish_message(msg)
             return msg
-
+        
         raise ValueError(f"Unknown todo type: {type(todo)}")
+        
+        # await self.run_step()
\ No newline at end of file
diff --git a/metagpt/roles/minecraft/critic_agent.py b/metagpt/roles/minecraft/critic_agent.py
index 7bb90767a..d9c9db699 100644
--- a/metagpt/roles/minecraft/critic_agent.py
+++ b/metagpt/roles/minecraft/critic_agent.py
@@ -28,12 +28,28 @@ class CriticReviewer(Base):
     ) -> None:
         super().__init__(name, profile, goal, constraints)
         # Initialize actions specific to the CriticReviewer role
+        # self._init_actions([VerifyTask])
         self._init_actions([VerifyTask])
 
         # Set events or actions the CriticReviewer should watch or be aware of
         # 需要获取最新的events来进行评估
         self._watch([GenerateActionCode, AddNewSkills])
 
+    async def run(self, message=None):
+        """Observe, only get the observation"""
+        if message:
+            if isinstance(message, str):
+                message = Message(message)
+            if isinstance(message, Message):
+                self.recv(message)
+            if isinstance(message, list):
+                self.recv(Message("\n".join(message)))
+        elif not await self._observe():
+            # If there is no new information, suspend and wait
+            logger.info(f"{self._setting}: no news. waiting.")
+            return
+        self._rc.todo = VerifyTask
+
     def render_system_message(self):
         system_message = SystemMessage(content=load_prompt("critic"))
         return system_message
@@ -119,6 +135,7 @@ class CriticReviewer(Base):
         self.perform_game_info_callback(
             success, self.game_memory.update_exploration_progress
         )
+        self.perform_game_info_callback(critique, self.game_memory.update_critique)
         return Message(
             content=f"{critique}",
             instruct_content="verify_task",
diff --git a/minecraft_run.py b/minecraft_run.py
index d7d2cf7c2..d0bcb5fd9 100644
--- a/minecraft_run.py
+++ b/minecraft_run.py
@@ -19,7 +19,7 @@ async def learn(task="Start", investment: float = 50.0, n_round: int = 3):
         [
             CurriculumDesigner(),
             ActionDeveloper(),
-            CriticReviewer(),
+            # CriticReviewer(),
             SkillManager(),
         
         ]