From 168bd3b57b2442a4fc8fe1c81e54f1102afa99e5 Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Sat, 7 Oct 2023 20:01:15 +0800
Subject: [PATCH 1/2] fix bug: update finish state in role init fix bug: update
 finish state when new round start fix bug: update CriticReviewer status in
 run_step() fix bug: update ga.runtime_status when failed update: add extra
 yaml load utils

---
 metagpt/actions/minecraft/generate_actions.py |  1 +
 metagpt/actions/minecraft/manage_skills.py    |  3 +++
 metagpt/minecraft_team.py                     |  2 ++
 metagpt/roles/minecraft/action_developer.py   | 15 ++++++++++-----
 metagpt/roles/minecraft/critic_agent.py       |  5 +++--
 metagpt/roles/minecraft/curriculum_agent.py   |  2 ++
 metagpt/roles/minecraft/minecraft_base.py     | 10 ++++++----
 metagpt/roles/minecraft/skill_manager.py      |  8 ++++++--
 metagpt/roles/role.py                         |  2 ++
 metagpt/utils/minecraft/yaml_utils.py         | 15 +++++++++++++++
 10 files changed, 50 insertions(+), 13 deletions(-)
 create mode 100644 metagpt/utils/minecraft/yaml_utils.py

diff --git a/metagpt/actions/minecraft/generate_actions.py b/metagpt/actions/minecraft/generate_actions.py
index 65433f326..a669b5374 100644
--- a/metagpt/actions/minecraft/generate_actions.py
+++ b/metagpt/actions/minecraft/generate_actions.py
@@ -15,6 +15,7 @@ class GenerateActionCode(Action):
 
     def __init__(self, name="", context=None, llm=None):
         super().__init__(name, context, llm)
+        self.llm.model = "gpt-4"
 
     async def generate_code(self, human_msg, system_msg=[]):
         """
diff --git a/metagpt/actions/minecraft/manage_skills.py b/metagpt/actions/minecraft/manage_skills.py
index caec6c560..9b205dd19 100644
--- a/metagpt/actions/minecraft/manage_skills.py
+++ b/metagpt/actions/minecraft/manage_skills.py
@@ -50,12 +50,15 @@ class AddNewSkills(Action):
     ):
         # Implement the logic for adding new skills here.
         # TODO: Fix this
+        logger.info(f"check task {task}")
         if task.startswith("Deposit useless items into the chest at"):
             # No need to reuse the deposit skill
             return {}
         logger.info(
             f"Skill Manager generated description for {program_name}:\n{skill_desp}\033[0m"
         )
+        logger.info(f"check skills {skills}")
+        
         if program_name in skills:
             logger.info(f"Skill {program_name} already exists. Rewriting!")
             self.vectordb._collection.delete(ids=[program_name])
diff --git a/metagpt/minecraft_team.py b/metagpt/minecraft_team.py
index 68e20ea89..5d9243468 100644
--- a/metagpt/minecraft_team.py
+++ b/metagpt/minecraft_team.py
@@ -355,6 +355,7 @@ class MinecraftPlayer(SoftwareCompany):
             role.finish_step = False
             role.round_id += 1
             role._rc.todo = None
+            role.finish_state = len(role._actions)
             logger.info(f"round_id:{role.round_id}")
     
     def hire(self, roles: list[Role]):
@@ -394,6 +395,7 @@ class MinecraftPlayer(SoftwareCompany):
         while n_round > 0:
             # self._save()
             if self.check_complete_round():
+                
                 n_round -= 1
                 self.update_round()
                 round_id += 1
diff --git a/metagpt/roles/minecraft/action_developer.py b/metagpt/roles/minecraft/action_developer.py
index 4f585ea26..5b088c86a 100644
--- a/metagpt/roles/minecraft/action_developer.py
+++ b/metagpt/roles/minecraft/action_developer.py
@@ -20,8 +20,6 @@ from metagpt.config import CONFIG
 from metagpt.actions.minecraft.control_primitives_context import (
     load_skills_code_context,
 )
-from metagpt.utils.minecraft import fix_and_parse_json
-from metagpt.roles.minecraft.critic_agent import CriticReviewer
 
 
 @agent_registry.register("action_developer")
@@ -42,13 +40,14 @@ class ActionDeveloper(Base):
         # Initialize actions specific to the Action role
         self._init_actions([GenerateActionCode])
         
+        
         # Set events or actions the ActionAgent should watch or be aware of
         # éœ€è¦æ ¹æ®eventsè¿›è¡Œè‡ªå·±chest_observationçš„æ›´æ–°
         self._watch([RetrieveSkills])
         self.rollout_num_iter = 0
         self.task_max_retries = 4
+        self.finish_state = len(self._actions)
         self.critic_reviewer = None  # self._rc.env.roles["Task Reviewer"]
-        logger.info(self.critic_reviewer)
     
     def render_system_message(self, skills=[], *args, **kwargs):
         """
@@ -198,6 +197,8 @@ class ActionDeveloper(Base):
             if done:
                 break
         # return [system_msg, human_msg], reward, done, info
+        # ç»“æŸå‰ï¼Œå°†critic_reviewer è½®æ¬¡çŠ¶æ€æ›´æ–°ï¼Œä»¥ä¾¿è¿›å…¥ä¸‹ä¸€è½®
+        self.critic_reviewer.finish_step = True
         return Message(
             content=f"{info}",
             instruct_content="generate_action_code",
@@ -282,8 +283,12 @@ class ActionDeveloper(Base):
             system_msg = message["system_msg"]
             human_msg = message["human_msg"]
         else:
+            self.perform_game_info_callback(
+                False, self.game_memory.update_exploration_progress
+            )
+            logger.info(f"Code is None. Update runtime_status failed!")
             self.critic_reviewer.maintain_actions(VerifyTask())
-            logger.info(f"system msg is {system_msg}, \n human_msg is {human_msg}")
+            # logger.info(f"system msg is {system_msg}, \n human_msg is {human_msg}")
             logger.info(f"\033[34m Trying again!\033[0m")
         
         self.rollout_num_iter += 1
@@ -326,7 +331,7 @@ class ActionDeveloper(Base):
         # èŽ·å–æœ€æ–°çš„æ¸¸æˆå‘¨è¾¹ä¿¡æ¯
         # events = await self._obtain_events()
         events = self.game_memory.event
-        logger.info(events)
+        # logger.info(events)
         # self.perform_game_info_callback(events, self.game_memory.update_event)
         logger.info(self.game_memory.event_summary)
         context = self.game_memory.context
diff --git a/metagpt/roles/minecraft/critic_agent.py b/metagpt/roles/minecraft/critic_agent.py
index 3bf632909..ba37689c7 100644
--- a/metagpt/roles/minecraft/critic_agent.py
+++ b/metagpt/roles/minecraft/critic_agent.py
@@ -34,6 +34,7 @@ class CriticReviewer(Base):
         # Set events or actions the CriticReviewer should watch or be aware of
         # éœ€è¦èŽ·å–æœ€æ–°çš„eventsæ¥è¿›è¡Œè¯„ä¼°
         self._watch([])
+        self.finish_state = len(self._actions)
 
     async def run(self, message=None):
         """Observe, only get the observation"""
@@ -157,7 +158,7 @@ class CriticReviewer(Base):
         # èŽ·å–æœ€æ–°çš„æ¸¸æˆå‘¨è¾¹ä¿¡æ¯
         events = await self._execute_events()
         self.perform_game_info_callback(events, self.game_memory.update_chest_memory)
-        logger.info(f"Execute return event is {self.game_memory.event}")
+        # logger.info(f"Execute return event is {self.game_memory.event}")
         context = self.game_memory.context
         task = self.game_memory.current_task
         chest_observation = self.game_memory.chest_observation
@@ -173,7 +174,7 @@ class CriticReviewer(Base):
             VerifyTask: self.verify_task,
         }
         handler = handler_map.get(type(todo))
-        logger.info(handler)
+        # logger.info(handler)
         if handler:
             msg = await handler(**message)
             msg.cause_by = type(todo)
diff --git a/metagpt/roles/minecraft/curriculum_agent.py b/metagpt/roles/minecraft/curriculum_agent.py
index 68e394786..1b2b12d2f 100644
--- a/metagpt/roles/minecraft/curriculum_agent.py
+++ b/metagpt/roles/minecraft/curriculum_agent.py
@@ -32,6 +32,8 @@ class CurriculumDesigner(Base):
 
         # Set events or actions the ActionAgent should watch or be aware of
         self._watch([PlayerActions, DesignTask])
+        logger.info(self._actions)
+        self.finish_state = len(self._actions)
 
     def render_curriculum_observation(self, *, events, chest_observation):
         """
diff --git a/metagpt/roles/minecraft/minecraft_base.py b/metagpt/roles/minecraft/minecraft_base.py
index dbc3c10a9..c59acc3d7 100644
--- a/metagpt/roles/minecraft/minecraft_base.py
+++ b/metagpt/roles/minecraft/minecraft_base.py
@@ -52,26 +52,28 @@ class Minecraft(Role):
         self.finish_step = False
 
     def maintain_actions(self, todo):
+        logger.info(f"{self._setting.name}:{self.finish_state}")
         if todo in self._actions:
             self.finish_state-=1
         if self.finish_state<=0:
             self.finish_step = True
+        logger.info(f"{self._setting.name}:{self.finish_state}")
 
 
     async def _observe(self) -> int:
         await super()._observe()
         for msg in self._rc.news:
             logger.info(f"check msg round :{msg.round_id}")
-            logger.info(msg.round_id == self.round_id)
+            # logger.info(msg.round_id == self.round_id)
         self._rc.news = [
             msg for msg in self._rc.news if msg.round_id == self.round_id
         ]  # only relevant msgs count as observed news
-        logger.info(len(self._rc.news))
+        # logger.info(len(self._rc.news))
         return len(self._rc.news)
     
     async def _think(self) -> None:
         logger.info(self._actions)
-        logger.info(self._rc.state)
+        # logger.info(self._rc.state)
         if len(self._actions) == 1:
             # If there is only one action, then only this one can be performed
             self._set_state(0)
@@ -133,5 +135,5 @@ agent_registry = Registry(name="Minecraft")
 if __name__ == "__main__":
     mc = Minecraft()
     result = "Async operation result"
-    # µ÷ÓÃ»Øµ÷º¯Êý£¬²¢´«µÝ½á¹û
+    
     # mc.perform_memory_callback(mc.my_callback)
diff --git a/metagpt/roles/minecraft/skill_manager.py b/metagpt/roles/minecraft/skill_manager.py
index 161ec08ae..c86d0d2be 100644
--- a/metagpt/roles/minecraft/skill_manager.py
+++ b/metagpt/roles/minecraft/skill_manager.py
@@ -11,7 +11,7 @@ from metagpt.actions.minecraft.manage_skills import (
     RetrieveSkills,
     AddNewSkills,
 )
-from metagpt.actions.minecraft.review_task import VerifyTask
+from metagpt.actions.minecraft import GenerateActionCode
 from metagpt.actions.minecraft.design_curriculumn import DesignCurriculum
 from metagpt.utils.minecraft import load_prompt
 
@@ -32,8 +32,10 @@ class SkillManager(Base):
         
         # Set events or actions the SkillManager should watch or be aware of
         self._watch(
-            [DesignCurriculum, VerifyTask, RetrieveSkills, GenerateSkillDescription]
+            [DesignCurriculum, GenerateActionCode, RetrieveSkills, GenerateSkillDescription]
         )
+        
+        self.finish_state = len(self._actions)
 
     def encapsule_message(self, program_code, program_name, *args, **kwargs):
         system_msg = self.render_system_message(load_prompt("skill"))
@@ -128,8 +130,10 @@ class SkillManager(Base):
         handler = handler_map.get(type(todo))
         if handler:
             if type(todo) == DesignCurriculum:
+                logger.info(retrieve_skills_message_step1)
                 msg = await handler(**retrieve_skills_message_step1)
             elif type(todo) == RetrieveSkills:
+                logger.info(retrieve_skills_message_step2)
                 msg = await handler(**retrieve_skills_message_step2)
             elif type(todo) == GenerateSkillDescription:
                 msg = await handler(**generate_skill_message)
diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py
index fc21ef76b..de66c8922 100644
--- a/metagpt/roles/role.py
+++ b/metagpt/roles/role.py
@@ -108,10 +108,12 @@ class Role:
     def _init_actions(self, actions):
         self._reset()
         for idx, action in enumerate(actions):
+           
             if not isinstance(action, Action):
                 i = action("")
             else:
                 i = action
+            
             i.set_prefix(self._get_prefix(), self.profile)
             self._actions.append(i)
             self._states.append(f"{idx}. {action}")
diff --git a/metagpt/utils/minecraft/yaml_utils.py b/metagpt/utils/minecraft/yaml_utils.py
new file mode 100644
index 000000000..35b87c211
--- /dev/null
+++ b/metagpt/utils/minecraft/yaml_utils.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+# @Date    : 2023/10/7 16:32
+# @Author  : stellahong (stellahong@fuzhi.ai)
+# @Desc    :
+
+import yaml
+
+from metagpt.const import PROJECT_ROOT
+
+
+def load_extra_conf(yaml_file=PROJECT_ROOT / "config/add_config.yaml"):
+    with open(yaml_file, "r", encoding="utf-8") as file:
+        yaml_data = yaml.safe_load(file)
+        
+        return yaml_data

From f70ba27d35333da1ac7d7c58e91d96c20461ce8c Mon Sep 17 00:00:00 2001
From: stellahsr <stellahsr@126.com>
Date: Sat, 7 Oct 2023 20:24:55 +0800
Subject: [PATCH 2/2] add different llms for different agents

---
 metagpt/actions/minecraft/design_curriculumn.py | 1 +
 metagpt/actions/minecraft/manage_skills.py      | 3 +++
 metagpt/actions/minecraft/review_task.py        | 1 +
 metagpt/provider/openai_api.py                  | 6 +++---
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/metagpt/actions/minecraft/design_curriculumn.py b/metagpt/actions/minecraft/design_curriculumn.py
index 9d0daa72e..a5e321326 100644
--- a/metagpt/actions/minecraft/design_curriculumn.py
+++ b/metagpt/actions/minecraft/design_curriculumn.py
@@ -24,6 +24,7 @@ class DesignTask(Action):
 
     def __init__(self, name="", context=None, llm=None):
         super().__init__(name, context, llm)
+        self.llm.model = "gpt-3.5-turbo"
 
     async def decompose_task(self, query, events):
         system_msgs = SystemMessage(
diff --git a/metagpt/actions/minecraft/manage_skills.py b/metagpt/actions/minecraft/manage_skills.py
index 9b205dd19..35d35e27b 100644
--- a/metagpt/actions/minecraft/manage_skills.py
+++ b/metagpt/actions/minecraft/manage_skills.py
@@ -18,6 +18,7 @@ class RetrieveSkills(Action):
 
     def __init__(self, name="", context=None, llm=None):
         super().__init__(name, context, llm)
+        self.llm.model = "gpt-3.5-turbo"
 
     async def run(self, query, skills, *args, **kwargs):
         # Implement the logic for retrieving skills here.
@@ -44,6 +45,7 @@ class AddNewSkills(Action):
 
     def __init__(self, name="", context=None, llm=None):
         super().__init__(name, context, llm)
+        self.llm.model = "gpt-3.5-turbo"
 
     async def run(
         self, task, program_name, program_code, skills, skill_desp, *args, **kwargs
@@ -100,6 +102,7 @@ class GenerateSkillDescription(Action):
 
     def __init__(self, name="", context=None, llm=None):
         super().__init__(name, context, llm)
+        self.llm.model = "gpt-3.5-turbo"
 
     async def run(self, program_name, human_message, system_message, *args, **kwargs):
         # Implement the logic for generating skill descriptions here.
diff --git a/metagpt/actions/minecraft/review_task.py b/metagpt/actions/minecraft/review_task.py
index 3a46b9752..ed2f34a4f 100644
--- a/metagpt/actions/minecraft/review_task.py
+++ b/metagpt/actions/minecraft/review_task.py
@@ -15,6 +15,7 @@ class VerifyTask(Action):
     
     def __init__(self, name="", context=None, llm=None):
         super().__init__(name, context, llm)
+        self.llm.model = "gpt-3.5-turbo"
 
     async def run(self,human_msg, system_msg, max_retries=5, *args, **kwargs):
         # Implement the logic to verify the task here.
diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py
index 303b1bbf7..00e36d13a 100644
--- a/metagpt/provider/openai_api.py
+++ b/metagpt/provider/openai_api.py
@@ -142,10 +142,10 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
     Check https://platform.openai.com/examples for examples
     """
 
-    def __init__(self):
-        self.__init_openai(CONFIG)
+    def __init__(self, conf=CONFIG, **kwargs):
+        self.__init_openai(conf)
         self.llm = openai
-        self.model = CONFIG.openai_api_model
+        self.model = conf.openai_api_model
         self.auto_max_tokens = False
         self._cost_manager = CostManager()
         RateLimiter.__init__(self, rpm=self.rpm)