From 168bd3b57b2442a4fc8fe1c81e54f1102afa99e5 Mon Sep 17 00:00:00 2001 From: stellahsr Date: Sat, 7 Oct 2023 20:01:15 +0800 Subject: [PATCH 1/2] fix bug: update finish state in role init fix bug: update finish state when new round start fix bug: update CriticReviewer status in run_step() fix bug: update ga.runtime_status when failed update: add extra yaml load utils --- metagpt/actions/minecraft/generate_actions.py | 1 + metagpt/actions/minecraft/manage_skills.py | 3 +++ metagpt/minecraft_team.py | 2 ++ metagpt/roles/minecraft/action_developer.py | 15 ++++++++++----- metagpt/roles/minecraft/critic_agent.py | 5 +++-- metagpt/roles/minecraft/curriculum_agent.py | 2 ++ metagpt/roles/minecraft/minecraft_base.py | 10 ++++++---- metagpt/roles/minecraft/skill_manager.py | 8 ++++++-- metagpt/roles/role.py | 2 ++ metagpt/utils/minecraft/yaml_utils.py | 15 +++++++++++++++ 10 files changed, 50 insertions(+), 13 deletions(-) create mode 100644 metagpt/utils/minecraft/yaml_utils.py diff --git a/metagpt/actions/minecraft/generate_actions.py b/metagpt/actions/minecraft/generate_actions.py index 65433f326..a669b5374 100644 --- a/metagpt/actions/minecraft/generate_actions.py +++ b/metagpt/actions/minecraft/generate_actions.py @@ -15,6 +15,7 @@ class GenerateActionCode(Action): def __init__(self, name="", context=None, llm=None): super().__init__(name, context, llm) + self.llm.model = "gpt-4" async def generate_code(self, human_msg, system_msg=[]): """ diff --git a/metagpt/actions/minecraft/manage_skills.py b/metagpt/actions/minecraft/manage_skills.py index caec6c560..9b205dd19 100644 --- a/metagpt/actions/minecraft/manage_skills.py +++ b/metagpt/actions/minecraft/manage_skills.py @@ -50,12 +50,15 @@ class AddNewSkills(Action): ): # Implement the logic for adding new skills here. # TODO: Fix this + logger.info(f"check task {task}") if task.startswith("Deposit useless items into the chest at"): # No need to reuse the deposit skill return {} logger.info( f"Skill Manager generated description for {program_name}:\n{skill_desp}\033[0m" ) + logger.info(f"check skills {skills}") + if program_name in skills: logger.info(f"Skill {program_name} already exists. Rewriting!") self.vectordb._collection.delete(ids=[program_name]) diff --git a/metagpt/minecraft_team.py b/metagpt/minecraft_team.py index 68e20ea89..5d9243468 100644 --- a/metagpt/minecraft_team.py +++ b/metagpt/minecraft_team.py @@ -355,6 +355,7 @@ class MinecraftPlayer(SoftwareCompany): role.finish_step = False role.round_id += 1 role._rc.todo = None + role.finish_state = len(role._actions) logger.info(f"round_id:{role.round_id}") def hire(self, roles: list[Role]): @@ -394,6 +395,7 @@ class MinecraftPlayer(SoftwareCompany): while n_round > 0: # self._save() if self.check_complete_round(): + n_round -= 1 self.update_round() round_id += 1 diff --git a/metagpt/roles/minecraft/action_developer.py b/metagpt/roles/minecraft/action_developer.py index 4f585ea26..5b088c86a 100644 --- a/metagpt/roles/minecraft/action_developer.py +++ b/metagpt/roles/minecraft/action_developer.py @@ -20,8 +20,6 @@ from metagpt.config import CONFIG from metagpt.actions.minecraft.control_primitives_context import ( load_skills_code_context, ) -from metagpt.utils.minecraft import fix_and_parse_json -from metagpt.roles.minecraft.critic_agent import CriticReviewer @agent_registry.register("action_developer") @@ -42,13 +40,14 @@ class ActionDeveloper(Base): # Initialize actions specific to the Action role self._init_actions([GenerateActionCode]) + # Set events or actions the ActionAgent should watch or be aware of # 需要根据events进行自己chest_observation的更新 self._watch([RetrieveSkills]) self.rollout_num_iter = 0 self.task_max_retries = 4 + self.finish_state = len(self._actions) self.critic_reviewer = None # self._rc.env.roles["Task Reviewer"] - logger.info(self.critic_reviewer) def render_system_message(self, skills=[], *args, **kwargs): """ @@ -198,6 +197,8 @@ class ActionDeveloper(Base): if done: break # return [system_msg, human_msg], reward, done, info + # 结束前,将critic_reviewer 轮次状态更新,以便进入下一轮 + self.critic_reviewer.finish_step = True return Message( content=f"{info}", instruct_content="generate_action_code", @@ -282,8 +283,12 @@ class ActionDeveloper(Base): system_msg = message["system_msg"] human_msg = message["human_msg"] else: + self.perform_game_info_callback( + False, self.game_memory.update_exploration_progress + ) + logger.info(f"Code is None. Update runtime_status failed!") self.critic_reviewer.maintain_actions(VerifyTask()) - logger.info(f"system msg is {system_msg}, \n human_msg is {human_msg}") + # logger.info(f"system msg is {system_msg}, \n human_msg is {human_msg}") logger.info(f"\033[34m Trying again!\033[0m") self.rollout_num_iter += 1 @@ -326,7 +331,7 @@ class ActionDeveloper(Base): # 获取最新的游戏周边信息 # events = await self._obtain_events() events = self.game_memory.event - logger.info(events) + # logger.info(events) # self.perform_game_info_callback(events, self.game_memory.update_event) logger.info(self.game_memory.event_summary) context = self.game_memory.context diff --git a/metagpt/roles/minecraft/critic_agent.py b/metagpt/roles/minecraft/critic_agent.py index 3bf632909..ba37689c7 100644 --- a/metagpt/roles/minecraft/critic_agent.py +++ b/metagpt/roles/minecraft/critic_agent.py @@ -34,6 +34,7 @@ class CriticReviewer(Base): # Set events or actions the CriticReviewer should watch or be aware of # 需要获取最新的events来进行评估 self._watch([]) + self.finish_state = len(self._actions) async def run(self, message=None): """Observe, only get the observation""" @@ -157,7 +158,7 @@ class CriticReviewer(Base): # 获取最新的游戏周边信息 events = await self._execute_events() self.perform_game_info_callback(events, self.game_memory.update_chest_memory) - logger.info(f"Execute return event is {self.game_memory.event}") + # logger.info(f"Execute return event is {self.game_memory.event}") context = self.game_memory.context task = self.game_memory.current_task chest_observation = self.game_memory.chest_observation @@ -173,7 +174,7 @@ class CriticReviewer(Base): VerifyTask: self.verify_task, } handler = handler_map.get(type(todo)) - logger.info(handler) + # logger.info(handler) if handler: msg = await handler(**message) msg.cause_by = type(todo) diff --git a/metagpt/roles/minecraft/curriculum_agent.py b/metagpt/roles/minecraft/curriculum_agent.py index 68e394786..1b2b12d2f 100644 --- a/metagpt/roles/minecraft/curriculum_agent.py +++ b/metagpt/roles/minecraft/curriculum_agent.py @@ -32,6 +32,8 @@ class CurriculumDesigner(Base): # Set events or actions the ActionAgent should watch or be aware of self._watch([PlayerActions, DesignTask]) + logger.info(self._actions) + self.finish_state = len(self._actions) def render_curriculum_observation(self, *, events, chest_observation): """ diff --git a/metagpt/roles/minecraft/minecraft_base.py b/metagpt/roles/minecraft/minecraft_base.py index dbc3c10a9..c59acc3d7 100644 --- a/metagpt/roles/minecraft/minecraft_base.py +++ b/metagpt/roles/minecraft/minecraft_base.py @@ -52,26 +52,28 @@ class Minecraft(Role): self.finish_step = False def maintain_actions(self, todo): + logger.info(f"{self._setting.name}:{self.finish_state}") if todo in self._actions: self.finish_state-=1 if self.finish_state<=0: self.finish_step = True + logger.info(f"{self._setting.name}:{self.finish_state}") async def _observe(self) -> int: await super()._observe() for msg in self._rc.news: logger.info(f"check msg round :{msg.round_id}") - logger.info(msg.round_id == self.round_id) + # logger.info(msg.round_id == self.round_id) self._rc.news = [ msg for msg in self._rc.news if msg.round_id == self.round_id ] # only relevant msgs count as observed news - logger.info(len(self._rc.news)) + # logger.info(len(self._rc.news)) return len(self._rc.news) async def _think(self) -> None: logger.info(self._actions) - logger.info(self._rc.state) + # logger.info(self._rc.state) if len(self._actions) == 1: # If there is only one action, then only this one can be performed self._set_state(0) @@ -133,5 +135,5 @@ agent_registry = Registry(name="Minecraft") if __name__ == "__main__": mc = Minecraft() result = "Async operation result" - # ûصݽ + # mc.perform_memory_callback(mc.my_callback) diff --git a/metagpt/roles/minecraft/skill_manager.py b/metagpt/roles/minecraft/skill_manager.py index 161ec08ae..c86d0d2be 100644 --- a/metagpt/roles/minecraft/skill_manager.py +++ b/metagpt/roles/minecraft/skill_manager.py @@ -11,7 +11,7 @@ from metagpt.actions.minecraft.manage_skills import ( RetrieveSkills, AddNewSkills, ) -from metagpt.actions.minecraft.review_task import VerifyTask +from metagpt.actions.minecraft import GenerateActionCode from metagpt.actions.minecraft.design_curriculumn import DesignCurriculum from metagpt.utils.minecraft import load_prompt @@ -32,8 +32,10 @@ class SkillManager(Base): # Set events or actions the SkillManager should watch or be aware of self._watch( - [DesignCurriculum, VerifyTask, RetrieveSkills, GenerateSkillDescription] + [DesignCurriculum, GenerateActionCode, RetrieveSkills, GenerateSkillDescription] ) + + self.finish_state = len(self._actions) def encapsule_message(self, program_code, program_name, *args, **kwargs): system_msg = self.render_system_message(load_prompt("skill")) @@ -128,8 +130,10 @@ class SkillManager(Base): handler = handler_map.get(type(todo)) if handler: if type(todo) == DesignCurriculum: + logger.info(retrieve_skills_message_step1) msg = await handler(**retrieve_skills_message_step1) elif type(todo) == RetrieveSkills: + logger.info(retrieve_skills_message_step2) msg = await handler(**retrieve_skills_message_step2) elif type(todo) == GenerateSkillDescription: msg = await handler(**generate_skill_message) diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py index fc21ef76b..de66c8922 100644 --- a/metagpt/roles/role.py +++ b/metagpt/roles/role.py @@ -108,10 +108,12 @@ class Role: def _init_actions(self, actions): self._reset() for idx, action in enumerate(actions): + if not isinstance(action, Action): i = action("") else: i = action + i.set_prefix(self._get_prefix(), self.profile) self._actions.append(i) self._states.append(f"{idx}. {action}") diff --git a/metagpt/utils/minecraft/yaml_utils.py b/metagpt/utils/minecraft/yaml_utils.py new file mode 100644 index 000000000..35b87c211 --- /dev/null +++ b/metagpt/utils/minecraft/yaml_utils.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# @Date : 2023/10/7 16:32 +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : + +import yaml + +from metagpt.const import PROJECT_ROOT + + +def load_extra_conf(yaml_file=PROJECT_ROOT / "config/add_config.yaml"): + with open(yaml_file, "r", encoding="utf-8") as file: + yaml_data = yaml.safe_load(file) + + return yaml_data From f70ba27d35333da1ac7d7c58e91d96c20461ce8c Mon Sep 17 00:00:00 2001 From: stellahsr Date: Sat, 7 Oct 2023 20:24:55 +0800 Subject: [PATCH 2/2] add different llms for different agents --- metagpt/actions/minecraft/design_curriculumn.py | 1 + metagpt/actions/minecraft/manage_skills.py | 3 +++ metagpt/actions/minecraft/review_task.py | 1 + metagpt/provider/openai_api.py | 6 +++--- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/metagpt/actions/minecraft/design_curriculumn.py b/metagpt/actions/minecraft/design_curriculumn.py index 9d0daa72e..a5e321326 100644 --- a/metagpt/actions/minecraft/design_curriculumn.py +++ b/metagpt/actions/minecraft/design_curriculumn.py @@ -24,6 +24,7 @@ class DesignTask(Action): def __init__(self, name="", context=None, llm=None): super().__init__(name, context, llm) + self.llm.model = "gpt-3.5-turbo" async def decompose_task(self, query, events): system_msgs = SystemMessage( diff --git a/metagpt/actions/minecraft/manage_skills.py b/metagpt/actions/minecraft/manage_skills.py index 9b205dd19..35d35e27b 100644 --- a/metagpt/actions/minecraft/manage_skills.py +++ b/metagpt/actions/minecraft/manage_skills.py @@ -18,6 +18,7 @@ class RetrieveSkills(Action): def __init__(self, name="", context=None, llm=None): super().__init__(name, context, llm) + self.llm.model = "gpt-3.5-turbo" async def run(self, query, skills, *args, **kwargs): # Implement the logic for retrieving skills here. @@ -44,6 +45,7 @@ class AddNewSkills(Action): def __init__(self, name="", context=None, llm=None): super().__init__(name, context, llm) + self.llm.model = "gpt-3.5-turbo" async def run( self, task, program_name, program_code, skills, skill_desp, *args, **kwargs @@ -100,6 +102,7 @@ class GenerateSkillDescription(Action): def __init__(self, name="", context=None, llm=None): super().__init__(name, context, llm) + self.llm.model = "gpt-3.5-turbo" async def run(self, program_name, human_message, system_message, *args, **kwargs): # Implement the logic for generating skill descriptions here. diff --git a/metagpt/actions/minecraft/review_task.py b/metagpt/actions/minecraft/review_task.py index 3a46b9752..ed2f34a4f 100644 --- a/metagpt/actions/minecraft/review_task.py +++ b/metagpt/actions/minecraft/review_task.py @@ -15,6 +15,7 @@ class VerifyTask(Action): def __init__(self, name="", context=None, llm=None): super().__init__(name, context, llm) + self.llm.model = "gpt-3.5-turbo" async def run(self,human_msg, system_msg, max_retries=5, *args, **kwargs): # Implement the logic to verify the task here. diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 303b1bbf7..00e36d13a 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -142,10 +142,10 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter): Check https://platform.openai.com/examples for examples """ - def __init__(self): - self.__init_openai(CONFIG) + def __init__(self, conf=CONFIG, **kwargs): + self.__init_openai(conf) self.llm = openai - self.model = CONFIG.openai_api_model + self.model = conf.openai_api_model self.auto_max_tokens = False self._cost_manager = CostManager() RateLimiter.__init__(self, rpm=self.rpm)