update code

This commit is contained in:
stellahsr 2023-10-06 18:49:13 +08:00
parent c1c1d22cff
commit 7d14a85329
6 changed files with 38 additions and 19 deletions

View file

@ -22,6 +22,7 @@ class GenerateActionCode(Action):
Implement the logic for generating action code here.
"""
logger.info(f"human_msg {human_msg}, system_msg {system_msg}")
rsp = await self._aask(prompt=human_msg, system_msgs=system_msg)
parsed_result = parse_action_response(rsp)
# logger.info(f"parsed_result is HERE: {parsed_result}")

View file

@ -187,7 +187,7 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
"max_tokens": self.get_max_tokens(messages),
"n": 1,
"stop": None,
"temperature": 0.3,
"temperature": 0.0,
"timeout": 3,
}
if CONFIG.openai_api_type == "azure":

View file

@ -14,6 +14,7 @@ from metagpt.actions.minecraft.manage_skills import (
RetrieveSkills,
AddNewSkills,
)
from metagpt.actions.minecraft.review_task import VerifyTask
import metagpt.utils.minecraft as utils
from metagpt.config import CONFIG
from metagpt.actions.minecraft.control_primitives_context import (
@ -46,7 +47,8 @@ class ActionDeveloper(Base):
self._watch([RetrieveSkills])
self.rollout_num_iter = 0
self.task_max_retries = 4
self.critic_reviewer = agent_registry.entries["critic_agent"]
self.critic_reviewer = None #self._rc.env.roles["Task Reviewer"]
logger.info(self.critic_reviewer)
def render_system_message(self, skills=[], *args, **kwargs):
"""
@ -190,10 +192,16 @@ class ActionDeveloper(Base):
async def run_step(self, human_msg, system_msg, *args, **kwargs):
while True:
messages, reward, done, info = await self.runcode_and_evaluate(human_msg, system_msg, *args, **kwargs)
logger.info(f"self.rollout_num_iter {self.rollout_num_iter}")
system_msg, human_msg, reward, done, info = await self.runcode_and_evaluate(human_msg, system_msg, *args, **kwargs)
if done:
break
return messages, reward, done, info
#return [system_msg, human_msg], reward, done, info
return Message(
content=f"{info}",
instruct_content="generate_action_code",
role=self.profile,
)
async def handle_add_new_skills(
self, task, program_name, program_code, skills, *args, **kwargs
@ -232,6 +240,7 @@ class ActionDeveloper(Base):
events = await self._obtain_events()
# 注意这里的events对应是执行了新的action函数之后的events信息
# 更新了评估结果, 回调了最新的环境信息到ga
self.critic_reviewer = self._rc.env.roles["Task Reviewer"]
await self.critic_reviewer._act() # todo: critic act内的update event放在这里似乎更合理
critique = self.game_memory.critique
@ -242,7 +251,7 @@ class ActionDeveloper(Base):
# todo: callback game memory reset block info
logger.info("Not success, reset block info !")
logger.info(
f"\033[32m****Action Agent human message****\n{self.messages[-1].content}\033[0m"
f"\033[32m****Action Agent human message****\n{human_msg}\033[0m"
)
# add new skills no matter success or not
@ -267,20 +276,23 @@ class ActionDeveloper(Base):
skills=retrieve_skills,
)
system_msg = message["system_msg"]
human_msg = message["human_msg"]
else:
message = {
"system_msg": [system_msg.content],
"human_msg": human_msg.content,
}
#message = {
# "system_msg": [system_msg.content],
# "human_msg": human_msg.content,
#}
self.critic_reviewer.maintain_actions(VerifyTask())
logger.info(f"system msg is {system_msg}, \n human_msg is {human_msg}")
logger.info(f"\033[34m Trying again!\033[0m")
self.rollout_num_iter += 1
done = (self.rollout_num_iter >= self.task_max_retries or self.game_memory.runtime_status)
info = {
"task": self.task,
"task": self.game_memory.current_task,
"success": self.game_memory.runtime_status,
"conversations": self.conversations,
#"conversations": self.conversations,
}
self.perform_game_info_callback(code, self.game_memory.update_code)
@ -288,7 +300,7 @@ class ActionDeveloper(Base):
program_name, self.game_memory.update_program_name
)
return message, 0, done, info
return system_msg, human_msg, 0, done, info
async def generate_action_code(self, human_msg, system_msg, *args, **kwargs):
code, program_name = await GenerateActionCode().run(
@ -340,12 +352,13 @@ class ActionDeveloper(Base):
if handler:
msg = await handler(**message)
msg.cause_by = type(todo)
msg.cause_by = GenerateActionCode
msg.round_id = self.round_id
logger.info(msg.send_to)
self.rollout_num_iter = 0
self._publish_message(msg)
return msg
raise ValueError(f"Unknown todo type: {type(todo)}")
# await self.run_step()
# await self.run_step()

View file

@ -143,10 +143,14 @@ class CriticReviewer(Base):
send_to=agent_registry.entries["skill_manager"]()._setting.name,
) # addnewskill
# TODO:if not success
async def _act(self) -> Message:
self._rc.todo = VerifyTask()
todo = self._rc.todo
logger.debug(f"Todo is {todo}")
self.maintain_actions(todo)
# 获取最新的游戏周边信息
events = await self._obtain_events()

View file

@ -6,7 +6,7 @@
@File : __init__.py
"""
from metagpt.utils.read_document import read_docx
#from metagpt.utils.read_document import read_docx
from metagpt.utils.singleton import Singleton
from metagpt.utils.token_counter import (
TOKEN_COSTS,
@ -16,7 +16,7 @@ from metagpt.utils.token_counter import (
__all__ = [
"read_docx",
# "read_docx",
"Singleton",
"TOKEN_COSTS",
"count_message_tokens",

View file

@ -13,7 +13,7 @@ from metagpt.minecraft_team import MinecraftPlayer
async def learn(task="Start", investment: float = 50.0, n_round: int = 3):
mc_player = MinecraftPlayer()
mc_player.set_port(1077) # Modify this to your Minecraft LAN port
mc_player.set_port(33141) # Modify this to your Minecraft LAN port
# mc_player.set_resume(True) # If load json from ckpt dir(include chest_memory, skills, ...)
mc_player.hire(
[
@ -24,7 +24,8 @@ async def learn(task="Start", investment: float = 50.0, n_round: int = 3):
]
)
print(mc_player.environment.roles)
print(mc_player.environment.roles["Generate code for specified tasks"]._rc)
mc_player.invest(investment)
mc_player.start(task)
await mc_player.run(n_round=n_round)