diff --git a/metagpt/actions/minecraft/generate_actions.py b/metagpt/actions/minecraft/generate_actions.py index 8cc32ec08..588f733cf 100644 --- a/metagpt/actions/minecraft/generate_actions.py +++ b/metagpt/actions/minecraft/generate_actions.py @@ -22,6 +22,7 @@ class GenerateActionCode(Action): Implement the logic for generating action code here. """ + logger.info(f"human_msg {human_msg}, system_msg {system_msg}") rsp = await self._aask(prompt=human_msg, system_msgs=system_msg) parsed_result = parse_action_response(rsp) # logger.info(f"parsed_result is HERE: {parsed_result}") diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 7e865f288..303b1bbf7 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -187,7 +187,7 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter): "max_tokens": self.get_max_tokens(messages), "n": 1, "stop": None, - "temperature": 0.3, + "temperature": 0.0, "timeout": 3, } if CONFIG.openai_api_type == "azure": diff --git a/metagpt/roles/minecraft/action_developer.py b/metagpt/roles/minecraft/action_developer.py index 618a8b431..74f5887eb 100644 --- a/metagpt/roles/minecraft/action_developer.py +++ b/metagpt/roles/minecraft/action_developer.py @@ -14,6 +14,7 @@ from metagpt.actions.minecraft.manage_skills import ( RetrieveSkills, AddNewSkills, ) +from metagpt.actions.minecraft.review_task import VerifyTask import metagpt.utils.minecraft as utils from metagpt.config import CONFIG from metagpt.actions.minecraft.control_primitives_context import ( @@ -46,7 +47,8 @@ class ActionDeveloper(Base): self._watch([RetrieveSkills]) self.rollout_num_iter = 0 self.task_max_retries = 4 - self.critic_reviewer = agent_registry.entries["critic_agent"] + self.critic_reviewer = None #self._rc.env.roles["Task Reviewer"] + logger.info(self.critic_reviewer) def render_system_message(self, skills=[], *args, **kwargs): """ @@ -190,10 +192,16 @@ class ActionDeveloper(Base): async def run_step(self, human_msg, system_msg, *args, **kwargs): while True: - messages, reward, done, info = await self.runcode_and_evaluate(human_msg, system_msg, *args, **kwargs) + logger.info(f"self.rollout_num_iter {self.rollout_num_iter}") + system_msg, human_msg, reward, done, info = await self.runcode_and_evaluate(human_msg, system_msg, *args, **kwargs) if done: break - return messages, reward, done, info + #return [system_msg, human_msg], reward, done, info + return Message( + content=f"{info}", + instruct_content="generate_action_code", + role=self.profile, + ) async def handle_add_new_skills( self, task, program_name, program_code, skills, *args, **kwargs @@ -232,6 +240,7 @@ class ActionDeveloper(Base): events = await self._obtain_events() # 注意:这里的events对应是执行了新的action函数之后的events信息 # 更新了评估结果, 回调了最新的环境信息到ga + self.critic_reviewer = self._rc.env.roles["Task Reviewer"] await self.critic_reviewer._act() # todo: critic act内的update event放在这里似乎更合理? critique = self.game_memory.critique @@ -242,7 +251,7 @@ class ActionDeveloper(Base): # todo: callback game memory reset block info logger.info("Not success, reset block info !") logger.info( - f"\033[32m****Action Agent human message****\n{self.messages[-1].content}\033[0m" + f"\033[32m****Action Agent human message****\n{human_msg}\033[0m" ) # add new skills no matter success or not @@ -267,20 +276,23 @@ class ActionDeveloper(Base): skills=retrieve_skills, ) - + system_msg = message["system_msg"] + human_msg = message["human_msg"] else: - message = { - "system_msg": [system_msg.content], - "human_msg": human_msg.content, - } + #message = { + # "system_msg": [system_msg.content], + # "human_msg": human_msg.content, + #} + self.critic_reviewer.maintain_actions(VerifyTask()) + logger.info(f"system msg is {system_msg}, \n human_msg is {human_msg}") logger.info(f"\033[34m Trying again!\033[0m") self.rollout_num_iter += 1 done = (self.rollout_num_iter >= self.task_max_retries or self.game_memory.runtime_status) info = { - "task": self.task, + "task": self.game_memory.current_task, "success": self.game_memory.runtime_status, - "conversations": self.conversations, + #"conversations": self.conversations, } self.perform_game_info_callback(code, self.game_memory.update_code) @@ -288,7 +300,7 @@ class ActionDeveloper(Base): program_name, self.game_memory.update_program_name ) - return message, 0, done, info + return system_msg, human_msg, 0, done, info async def generate_action_code(self, human_msg, system_msg, *args, **kwargs): code, program_name = await GenerateActionCode().run( @@ -340,12 +352,13 @@ class ActionDeveloper(Base): if handler: msg = await handler(**message) - msg.cause_by = type(todo) + msg.cause_by = GenerateActionCode msg.round_id = self.round_id logger.info(msg.send_to) + self.rollout_num_iter = 0 self._publish_message(msg) return msg raise ValueError(f"Unknown todo type: {type(todo)}") - # await self.run_step() \ No newline at end of file + # await self.run_step() diff --git a/metagpt/roles/minecraft/critic_agent.py b/metagpt/roles/minecraft/critic_agent.py index fb82c1cc7..3ab904b49 100644 --- a/metagpt/roles/minecraft/critic_agent.py +++ b/metagpt/roles/minecraft/critic_agent.py @@ -143,10 +143,14 @@ class CriticReviewer(Base): send_to=agent_registry.entries["skill_manager"]()._setting.name, ) # addnewskill # TODO:if not success + async def _act(self) -> Message: + self._rc.todo = VerifyTask() todo = self._rc.todo + logger.debug(f"Todo is {todo}") + self.maintain_actions(todo) # 获取最新的游戏周边信息 events = await self._obtain_events() diff --git a/metagpt/utils/__init__.py b/metagpt/utils/__init__.py index f13175cf8..a7535383a 100644 --- a/metagpt/utils/__init__.py +++ b/metagpt/utils/__init__.py @@ -6,7 +6,7 @@ @File : __init__.py """ -from metagpt.utils.read_document import read_docx +#from metagpt.utils.read_document import read_docx from metagpt.utils.singleton import Singleton from metagpt.utils.token_counter import ( TOKEN_COSTS, @@ -16,7 +16,7 @@ from metagpt.utils.token_counter import ( __all__ = [ - "read_docx", +# "read_docx", "Singleton", "TOKEN_COSTS", "count_message_tokens", diff --git a/minecraft_run.py b/minecraft_run.py index d7d2cf7c2..8592fb5ef 100644 --- a/minecraft_run.py +++ b/minecraft_run.py @@ -13,7 +13,7 @@ from metagpt.minecraft_team import MinecraftPlayer async def learn(task="Start", investment: float = 50.0, n_round: int = 3): mc_player = MinecraftPlayer() - mc_player.set_port(1077) # Modify this to your Minecraft LAN port + mc_player.set_port(33141) # Modify this to your Minecraft LAN port # mc_player.set_resume(True) # If load json from ckpt dir(include chest_memory, skills, ...) mc_player.hire( [ @@ -24,7 +24,8 @@ async def learn(task="Start", investment: float = 50.0, n_round: int = 3): ] ) - + print(mc_player.environment.roles) + print(mc_player.environment.roles["Generate code for specified tasks"]._rc) mc_player.invest(investment) mc_player.start(task) await mc_player.run(n_round=n_round)