From 6ebe8efc6334940e414beadbfae862a5b1c6894f Mon Sep 17 00:00:00 2001 From: better629 Date: Mon, 16 Oct 2023 15:19:03 +0800 Subject: [PATCH] add general environment and refactor environment's w/r funcs --- .../st_game/actions/gen_action_details.py | 34 +++-- examples/st_game/actions/gen_iter_chat_utt.py | 12 +- examples/st_game/maze_environment.py | 30 ++++- examples/st_game/plan/converse.py | 11 +- examples/st_game/plan/st_plan.py | 117 +++++++++--------- examples/st_game/roles/st_role.py | 62 ++++++---- examples/st_game/run_st_game.py | 7 +- examples/st_game/stanford_town.py | 4 + .../tests/actions/test_gen_action_details.py | 22 ++-- examples/st_game/tests/plan/test_converse.py | 4 +- examples/st_game/tests/test_maze.py | 5 +- examples/st_game/utils/utils.py | 4 +- metagpt/environment/__init__.py | 3 + metagpt/{ => environment}/environment.py | 4 +- metagpt/environment/general_environment.py | 32 +++++ metagpt/environment/gym_environment.py | 21 ++++ metagpt/roles/role.py | 1 - metagpt/software_company.py | 2 +- tests/metagpt/environment/__init__.py | 3 + .../environment/test_gym_environment.py | 19 +++ tests/metagpt/test_environment.py | 2 +- 21 files changed, 252 insertions(+), 147 deletions(-) create mode 100644 metagpt/environment/__init__.py rename metagpt/{ => environment}/environment.py (97%) create mode 100644 metagpt/environment/general_environment.py create mode 100644 metagpt/environment/gym_environment.py create mode 100644 tests/metagpt/environment/__init__.py create mode 100644 tests/metagpt/environment/test_gym_environment.py diff --git a/examples/st_game/actions/gen_action_details.py b/examples/st_game/actions/gen_action_details.py index b740fbe70..dcdeda902 100644 --- a/examples/st_game/actions/gen_action_details.py +++ b/examples/st_game/actions/gen_action_details.py @@ -8,7 +8,6 @@ import random from metagpt.logs import logger from metagpt.schema import Message -from ..maze import Maze from .st_action import STAction @@ -34,9 +33,9 @@ class GenActionSector(STAction): fs = ("kitchen") return fs - def run(self, role: "STRole", maze: Maze, act_desp: str): - def create_prompt_input(role, maze, act_desp): - act_world = f"{maze.access_tile(role.scratch.curr_tile)['world']}" + def run(self, role: "STRole", access_tile: dict[str, str], act_desp: str): + def create_prompt_input(role, access_tile: dict[str, str], act_desp): + act_world = f"{access_tile['world']}" prompt_input = [] @@ -46,8 +45,8 @@ class GenActionSector(STAction): prompt_input += [role.s_mem.get_str_accessible_sector_arenas(x)] prompt_input += [role.scratch.get_str_name()] - prompt_input += [f"{maze.access_tile(role.scratch.curr_tile)['sector']}"] - x = f"{act_world}:{maze.access_tile(role.scratch.curr_tile)['sector']}" + prompt_input += [f"{access_tile['sector']}"] + x = f"{act_world}:{access_tile['sector']}" prompt_input += [role.s_mem.get_str_accessible_sector_arenas(x)] if role.scratch.get_str_daily_plan_req() != "": @@ -57,7 +56,7 @@ class GenActionSector(STAction): # MAR 11 TEMP prompt_input = [] - act_world = maze.access_tile(role.scratch.curr_tile)["world"] + act_world = access_tile["world"] accessible_sector_str = role.s_mem.get_str_accessible_sectors(act_world) curr = accessible_sector_str.split(", ") fin_accessible_sectors = [] @@ -85,12 +84,12 @@ class GenActionSector(STAction): return prompt_input prompt_template = "action_location_sector_v1.txt" - prompt_input = create_prompt_input(role, maze, act_desp) + prompt_input = create_prompt_input(role, access_tile, act_desp) prompt = self.generate_prompt_with_tmpl_filename(prompt_input, prompt_template) self.fail_default_resp = self._func_fail_default_resp() output = self._run_text_davinci(prompt, max_tokens=15) - y = f"{maze.access_tile(role.scratch.curr_tile)['world']}" + y = f"{access_tile['world']}" x = [i.strip() for i in role.s_mem.get_str_accessible_sectors(y).split(",")] if output not in x: # output = random.choice(x) @@ -120,12 +119,9 @@ class GenActionArena(STAction): fs = ("kitchen") return fs - def run(self, role: "STRole", maze: Maze, act_desp: str, act_world: str, act_sector: str): - def create_prompt_input(role, maze, act_desp, act_world, act_sector): + def run(self, role: "STRole", act_desp: str, act_world: str, act_sector: str): + def create_prompt_input(role, act_desp, act_world, act_sector): prompt_input = [] - # prompt_input += [role.scratch.get_str_name()] - # prompt_input += [maze.access_tile(role.scratch.curr_tile)["arena"]] - # prompt_input += [maze.access_tile(role.scratch.curr_tile)["sector"]] prompt_input += [role.scratch.get_str_name()] x = f"{act_world}:{act_sector}" prompt_input += [act_sector] @@ -159,7 +155,7 @@ class GenActionArena(STAction): return prompt_input prompt_template = "action_location_object_vMar11.txt" - prompt_input = create_prompt_input(role, maze, act_desp, act_world, act_sector) + prompt_input = create_prompt_input(role, act_desp, act_world, act_sector) prompt = self.generate_prompt_with_tmpl_filename(prompt_input, prompt_template) self.fail_default_resp = self._func_fail_default_resp() output = self._run_text_davinci(prompt, max_tokens=15) @@ -392,10 +388,10 @@ class GenActionDetails(STAction): role: "STRole", act_desp: str, act_dura): - maze = role._rc.env.maze - act_world = maze.access_tile(role.scratch.curr_tile)["world"] - act_sector = GenActionSector().run(role, maze, act_desp) - act_arena = GenActionArena().run(role, maze, act_desp, act_world, act_sector) + access_tile = role._rc.env.call_func("access_tile", tile=role.scratch.curr_tile) + act_world = access_tile["world"] + act_sector = GenActionSector().run(role, access_tile, act_desp) + act_arena = GenActionArena().run(role, act_desp, act_world, act_sector) act_address = f"{act_world}:{act_sector}:{act_arena}" act_game_object = GenActionObject().run(role, act_desp, act_address) new_address = f"{act_world}:{act_sector}:{act_arena}:{act_game_object}" diff --git a/examples/st_game/actions/gen_iter_chat_utt.py b/examples/st_game/actions/gen_iter_chat_utt.py index aa2dab964..6883c06c2 100644 --- a/examples/st_game/actions/gen_iter_chat_utt.py +++ b/examples/st_game/actions/gen_iter_chat_utt.py @@ -7,7 +7,6 @@ from metagpt.schema import Message from examples.st_game.actions.st_action import STAction from examples.st_game.utils.utils import extract_first_json_dict -from examples.st_game.maze import Maze class GenIterChatUTT(STAction): @@ -44,9 +43,9 @@ class GenIterChatUTT(STAction): cleaned_dict["end"] = False return cleaned_dict - def run(self, maze: Maze, init_role: "STRole", target_role: "STRole", retrieved: dict, curr_context: str, + def run(self, init_role: "STRole", target_role: "STRole", retrieved: dict, curr_context: str, curr_chat: list[str], *args, **kwargs) -> dict: - def create_prompt_input(maze: Maze, init_role: "STRole", target_role: "STRole", + def create_prompt_input(access_tile: dict[str, str], init_role: "STRole", target_role: "STRole", retrieved: dict, curr_context: str, curr_chat: list[str]): role = init_role scratch = role._rc.scratch @@ -67,8 +66,8 @@ class GenIterChatUTT(STAction): prev_convo_insert = "" print(prev_convo_insert) - curr_sector = f"{maze.access_tile(scratch.curr_tile)['sector']}" - curr_arena = f"{maze.access_tile(scratch.curr_tile)['arena']}" + curr_sector = f"{access_tile['sector']}" + curr_arena = f"{access_tile['arena']}" curr_location = f"{curr_arena} in {curr_sector}" retrieved_str = "" @@ -91,7 +90,8 @@ class GenIterChatUTT(STAction): ] return prompt_input - prompt_input = create_prompt_input(maze, init_role, target_role, retrieved, curr_context, curr_chat) + access_tile = init_role._rc.env.call_func("access_tile", tile=init_role.scratch.curr_tile) + prompt_input = create_prompt_input(access_tile, init_role, target_role, retrieved, curr_context, curr_chat) prompt = self.generate_prompt_with_tmpl_filename(prompt_input, "iterative_convo_v1.txt") # original using `ChatGPT_safe_generate_response_OLD` diff --git a/examples/st_game/maze_environment.py b/examples/st_game/maze_environment.py index 894ace49b..fe13832a6 100644 --- a/examples/st_game/maze_environment.py +++ b/examples/st_game/maze_environment.py @@ -2,18 +2,40 @@ # -*- coding: utf-8 -*- # @Desc : maze environment +from typing import Tuple from pydantic import Field -from metagpt.environment import Environment +from metagpt.environment.environment import Environment +from metagpt.environment.general_environment import GeneralEnvironment from metagpt.roles.role import Role from examples.st_game.maze import Maze -class MazeEnvironment(Environment): +class MazeEnvironment(GeneralEnvironment): maze: Maze = Field(default_factory=Maze) def add_role(self, role: Role): - role.set_env(self) - self.roles[role.name] = role # use role.name as key not role.profile + self.roles[role.name] = role + + def init_register_funcs(self): + self.register_func("access_tile", self.maze.access_tile) + self.register_func("add_tiles_event", self.add_tiles_event) + self.register_func("get_nearby_tiles", self.maze.get_nearby_tiles) + self.register_func("get_tile_path", self.maze.get_tile_path) + self.register_func("get_collision_maze", self.get_collision_maze) + self.register_func("get_address_tiles", self.get_address_tiles) + self.register_func("turn_event_from_tile_idle", self.maze.turn_event_from_tile_idle) + self.register_func("remove_subject_events_from_tile", self.maze.remove_subject_events_from_tile) + self.register_func("add_event_from_tile", self.maze.add_event_from_tile) + self.register_func("remove_event_from_tile", self.maze.remove_event_from_tile) + + def add_tiles_event(self, pt_y: int, pt_x: int, event: Tuple[str, str, str, str]): + self.maze.tiles[pt_y][pt_x]["events"].add(event) + + def get_collision_maze(self) -> list: + return self.maze.collision_maze + + def get_address_tiles(self) -> dict: + return self.maze.address_tiles diff --git a/examples/st_game/plan/converse.py b/examples/st_game/plan/converse.py index 2b2877a7b..e2edc5ad6 100644 --- a/examples/st_game/plan/converse.py +++ b/examples/st_game/plan/converse.py @@ -6,13 +6,12 @@ from typing import Union, Tuple from metagpt.logs import logger -from examples.st_game.maze import Maze from examples.st_game.memory.retrieve import new_agent_retrieve from examples.st_game.actions.agent_chat_sum_rel import AgentChatSumRel from examples.st_game.actions.gen_iter_chat_utt import GenIterChatUTT -def agent_conversation(maze: Maze, init_role: "STRole", target_role: "STRole") -> list[list[str]]: +def agent_conversation(init_role: "STRole", target_role: "STRole") -> list[list[str]]: curr_chat = [] logger.info(f"Role: {init_role.name} starts a conversation with Role: {target_role.name}") @@ -37,7 +36,7 @@ def agent_conversation(maze: Maze, init_role: "STRole", target_role: "STRole") - focal_points = [f"{relationship}", f"{target_scratch.name} is {target_scratch.act_description}"] retrieved = new_agent_retrieve(init_role, focal_points, 15) - utt, end = generate_one_utterance(maze, init_role, target_role, retrieved, curr_chat) + utt, end = generate_one_utterance(init_role, target_role, retrieved, curr_chat) curr_chat += [[scratch.name, utt]] if end: @@ -58,7 +57,7 @@ def agent_conversation(maze: Maze, init_role: "STRole", target_role: "STRole") - focal_points = [f"{relationship}", f"{scratch.name} is {scratch.act_description}"] retrieved = new_agent_retrieve(target_role, focal_points, 15) - utt, end = generate_one_utterance(maze, target_role, init_role, retrieved, curr_chat) + utt, end = generate_one_utterance(target_role, init_role, retrieved, curr_chat) curr_chat += [[target_scratch.name, utt]] if end: @@ -88,7 +87,7 @@ def generate_summarize_agent_relationship(init_role: "STRole", return summarized_relationship -def generate_one_utterance(maze: Maze, init_role, target_role, retrieved: dict, curr_chat: list) -> Tuple[str, str]: +def generate_one_utterance(init_role, target_role, retrieved: dict, curr_chat: list) -> Tuple[str, str]: # Chat version optimized for speed via batch generation scratch = init_role._rc.scratch target_scratch = target_role._rc.scratch @@ -101,6 +100,6 @@ def generate_one_utterance(maze: Maze, init_role, target_role, retrieved: dict, f"is initiating a conversation with " + f"{target_scratch.name}.") - x = GenIterChatUTT().run(maze, init_role, target_role, retrieved, curr_context, curr_chat) + x = GenIterChatUTT().run(init_role, target_role, retrieved, curr_context, curr_chat) return x["utterance"], x["end"] diff --git a/examples/st_game/plan/st_plan.py b/examples/st_game/plan/st_plan.py index a2349fdc9..85227748f 100644 --- a/examples/st_game/plan/st_plan.py +++ b/examples/st_game/plan/st_plan.py @@ -9,7 +9,6 @@ import math from metagpt.llm import LLM from metagpt.logs import logger -from ..maze import Maze from ..plan.converse import agent_conversation from ..actions.decide_to_talk import DecideToTalk from ..actions.summarize_conv import SummarizeConv @@ -23,16 +22,16 @@ from ..utils.utils import get_embedding from ..memory.retrieve import new_agent_retrieve -def plan(role: "STRole", maze: Maze, roles: dict["STRole"], new_day: bool, retrieved: dict) -> str: +def plan(role: "STRole", roles: dict["STRole"], new_day: bool, retrieved: dict) -> str: # PART 1: Generate the hourly schedule. - if new_day: + if new_day: _long_term_planning(role, new_day) # PART 2: If the current action has expired, we want to create a new plan. act_check_finished = role.scratch.act_check_finished() logger.info(f"Role: {role.name} act_check_finished is {act_check_finished}") if act_check_finished: - _determine_action(role, maze) + _determine_action(role) # PART 3: If you perceived an event that needs to be responded to (saw # another role), and retrieved relevant information. @@ -60,7 +59,7 @@ def plan(role: "STRole", maze: Maze, roles: dict["STRole"], new_day: bool, retri if reaction_mode: # If we do want to chat, then we generate conversation if reaction_mode[:9] == "chat with": - _chat_react(maze, role, reaction_mode, roles) + _chat_react(role, reaction_mode, roles) elif reaction_mode[:4] == "wait": _wait_react(role, reaction_mode) @@ -246,7 +245,7 @@ def _should_react(role: "STRole", retrieved: dict, roles: dict): return False -def _chat_react(maze: Maze, role: "STRole", reaction_mode: str, roles: dict["STRole"]): +def _chat_react(role: "STRole", reaction_mode: str, roles: dict["STRole"]): # There are two roles -- the role who is initiating the conversation # and the role who is the target. We get the role instances here. init_role = role @@ -254,7 +253,7 @@ def _chat_react(maze: Maze, role: "STRole", reaction_mode: str, roles: dict["STR curr_roles = [init_role, target_role] # Actually creating the conversation here. - convo, duration_min = generate_convo(maze, init_role, target_role) # 2222 + convo, duration_min = generate_convo(init_role, target_role) # 2222 convo_summary = generate_convo_summary(convo) inserted_act = convo_summary inserted_act_dur = duration_min @@ -313,9 +312,9 @@ def _create_react(role: "STRole", inserted_act: str, inserted_act_dur: int, elif (scratch.f_daily_schedule_hourly_org[scratch.get_f_daily_schedule_hourly_org_index()][1] + scratch.f_daily_schedule_hourly_org[scratch.get_f_daily_schedule_hourly_org_index() + 1][1]): end_hour = start_hour + ( - (scratch.f_daily_schedule_hourly_org[scratch.get_f_daily_schedule_hourly_org_index()][1] + - scratch.f_daily_schedule_hourly_org[scratch.get_f_daily_schedule_hourly_org_index() + 1][ - 1]) / 60) + (scratch.f_daily_schedule_hourly_org[scratch.get_f_daily_schedule_hourly_org_index()][1] + + scratch.f_daily_schedule_hourly_org[scratch.get_f_daily_schedule_hourly_org_index() + 1][ + 1]) / 60) else: end_hour = start_hour + 2 @@ -357,7 +356,7 @@ def _wait_react(role: "STRole", reaction_mode: str): inserted_act = f'waiting to start {scratch.act_description.split("(")[-1][:-1]}' end_time = datetime.datetime.strptime(reaction_mode[6:].strip(), "%B %d, %Y, %H:%M:%S") inserted_act_dur = (end_time.minute + end_time.hour * 60) - ( - scratch.curr_time.minute + scratch.curr_time.hour * 60) + 1 + scratch.curr_time.minute + scratch.curr_time.hour * 60) + 1 act_address = f" {scratch.curr_tile[0]} {scratch.curr_tile[1]}" act_event = (role.name, "waiting to start", scratch.act_description.split("(")[-1][:-1]) @@ -376,8 +375,8 @@ def _wait_react(role: "STRole", reaction_mode: str): act_pronunciatio, act_obj_description, act_obj_pronunciatio, act_obj_event) -def generate_convo(maze: Maze, init_role: "STRole", target_role: "STRole") -> Union[list, int]: - convo = agent_conversation(maze, init_role, target_role) +def generate_convo(init_role: "STRole", target_role: "STRole") -> Union[list, int]: + convo = agent_conversation(init_role, target_role) all_utt = "" for row in convo: @@ -424,7 +423,7 @@ def generate_new_decomp_schedule(role: "STRole", inserted_act: str, inserted_act truncated_act_dur += [[scratch.f_daily_schedule[count][0], dur_sum - today_min_pass]] truncated_act_dur[-1][-1] -= ( - dur_sum - today_min_pass) # DEC 7 DEBUG;.. is the +1 the right thing to do??? + dur_sum - today_min_pass) # DEC 7 DEBUG;.. is the +1 the right thing to do??? # DEC 7 DEBUG;.. is the +1 the right thing to do??? # truncated_act_dur[-1][-1] -= (dur_sum - today_min_pass + 1) print("DEBUG::: ", truncated_act_dur) @@ -463,7 +462,7 @@ def generate_new_decomp_schedule(role: "STRole", inserted_act: str, inserted_act inserted_act_dur) -def _long_term_planning(role: "STRole", new_day: bool): +def _long_term_planning(role: "STRole", new_day: bool): """ Formulates the role's daily long-term plan if it is the start of a new day. This basically has two components: first, we create the wake-up hour, @@ -481,7 +480,7 @@ def _long_term_planning(role: "STRole", new_day: bool): # When it is a new day, we start by creating the daily_req of the role. # Note that the daily_req is a list of strings that describe the role's # day in broad strokes. - if new_day == "First day": + if new_day == "First day": # Bootstrapping the daily plan for the start of then generation: # if this is the start of generation (so there is no previous day's # daily requirement, or if we are on a new day, we want to create a new @@ -504,7 +503,7 @@ def _long_term_planning(role: "STRole", new_day: bool): # Added March 4 -- adding plan to the memory. thought = f"This is {role.scratch.name}'s plan for {role.scratch.curr_time.strftime('%A %B %d')}:" - for i in role.scratch.daily_req: + for i in role.scratch.daily_req: thought += f" {i}," thought = thought[:-1] + "." created = role.scratch.curr_time @@ -513,16 +512,16 @@ def _long_term_planning(role: "STRole", new_day: bool): keywords = set(["plan"]) thought_poignancy = 5 thought_embedding_pair = (thought, get_embedding(thought)) - role.a_mem.add_thought(created, expiration, s, p, o, - thought, keywords, thought_poignancy, - thought_embedding_pair, None) + role.a_mem.add_thought(created, expiration, s, p, o, + thought, keywords, thought_poignancy, + thought_embedding_pair, None) # print("Sleeping for 20 seconds...") # time.sleep(10) # print("Done sleeping!") -def _determine_action(role: "STRole", maze: Maze): +def _determine_action(role: "STRole"): """ Creates the next action sequence for the role. The main goal of this function is to run "add_new_action" on the role's @@ -531,9 +530,9 @@ def _determine_action(role: "STRole", maze: Maze): As a part of this, the role may need to decompose its hourly schedule as needed. INPUT - role: Current instance whose action we are determining. - maze: Current instance. + role: Current instance whose action we are determining. """ + def determine_decomp(act_desp, act_dura): """ Given an action description and its duration, we determine whether we need @@ -546,12 +545,12 @@ def _determine_action(role: "STRole", maze: Maze): OUTPUT: a boolean. True if we need to decompose, False otherwise. """ - if "sleep" not in act_desp and "bed" not in act_desp: + if "sleep" not in act_desp and "bed" not in act_desp: return True elif "sleeping" in act_desp or "asleep" in act_desp or "in bed" in act_desp: return False - elif "sleep" in act_desp or "bed" in act_desp: - if act_dura > 60: + elif "sleep" in act_desp or "bed" in act_desp: + if act_dura > 60: return False return True @@ -570,18 +569,18 @@ def _determine_action(role: "STRole", maze: Maze): if curr_index == 0: # This portion is invoked if it is the first hour of the day. act_desp, act_dura = role.scratch.f_daily_schedule[curr_index] - if act_dura >= 60: - # We decompose if the next action is longer than an hour, and fits the - # criteria described in determine_decomp. - if determine_decomp(act_desp, act_dura): - role.scratch.f_daily_schedule[curr_index:curr_index+1] = ( - TaskDecomp().run(role, act_desp, act_dura)) + if act_dura >= 60: + # We decompose if the next action is longer than an hour, and fits the + # criteria described in determine_decomp. + if determine_decomp(act_desp, act_dura): + role.scratch.f_daily_schedule[curr_index:curr_index + 1] = ( + TaskDecomp().run(role, act_desp, act_dura)) if curr_index_60 + 1 < len(role.scratch.f_daily_schedule): - act_desp, act_dura = role.scratch.f_daily_schedule[curr_index_60+1] - if act_dura >= 60: - if determine_decomp(act_desp, act_dura): - role.scratch.f_daily_schedule[curr_index_60+1:curr_index_60+2] = ( - TaskDecomp().run(role, act_desp, act_dura)) + act_desp, act_dura = role.scratch.f_daily_schedule[curr_index_60 + 1] + if act_dura >= 60: + if determine_decomp(act_desp, act_dura): + role.scratch.f_daily_schedule[curr_index_60 + 1:curr_index_60 + 2] = ( + TaskDecomp().run(role, act_desp, act_dura)) if curr_index_60 < len(role.scratch.f_daily_schedule): # If it is not the first hour of the day, this is always invoked (it is @@ -589,42 +588,42 @@ def _determine_action(role: "STRole", maze: Maze): # decompose two hours in one go). Of course, we need to have something to # decompose as well, so we check for that too. if role.scratch.curr_time.hour < 23: - # And we don't want to decompose after 11 pm. + # And we don't want to decompose after 11 pm. act_desp, act_dura = role.scratch.f_daily_schedule[curr_index_60] - if act_dura >= 60: - if determine_decomp(act_desp, act_dura): - role.scratch.f_daily_schedule[curr_index_60:curr_index_60+1] = ( - TaskDecomp().run(role, act_desp, act_dura)) + if act_dura >= 60: + if determine_decomp(act_desp, act_dura): + role.scratch.f_daily_schedule[curr_index_60:curr_index_60 + 1] = ( + TaskDecomp().run(role, act_desp, act_dura)) # * End of Decompose * # Generate an instance from the action description and duration. By # this point, we assume that all the relevant actions are decomposed and # ready in f_daily_schedule. - print ("DEBUG LJSDLFSKJF") - for i in role.scratch.f_daily_schedule: print (i) - print (curr_index) - print (len(role.scratch.f_daily_schedule)) - print (role.scratch.name) - print ("------") + print("DEBUG LJSDLFSKJF") + for i in role.scratch.f_daily_schedule: print(i) + print(curr_index) + print(len(role.scratch.f_daily_schedule)) + print(role.scratch.name) + print("------") # 1440 x_emergency = 0 - for i in role.scratch.f_daily_schedule: + for i in role.scratch.f_daily_schedule: x_emergency += i[1] # print ("x_emergency", x_emergency) - if 1440 - x_emergency > 0: - print ("x_emergency__AAA", x_emergency) + if 1440 - x_emergency > 0: + print("x_emergency__AAA", x_emergency) role.scratch.f_daily_schedule += [["sleeping", 1440 - x_emergency]] - - act_desp, act_dura = role.scratch.f_daily_schedule[curr_index] + + act_desp, act_dura = role.scratch.f_daily_schedule[curr_index] new_action_details = GenActionDetails().run(role, act_desp, act_dura) # Adding the action to role's queue. role.scratch.add_new_action(**new_action_details) - -def revise_identity(role: "STRole"): + +def revise_identity(role: "STRole"): p_name = role.scratch.name focal_points = [f"{p_name}'s plan for {role.scratch.get_str_curr_date_str()}.", @@ -633,7 +632,7 @@ def revise_identity(role: "STRole"): statements = "[Statements]\n" for key, val in retrieved.items(): - for i in val: + for i in val: statements += f"{i.created.strftime('%A %B %d -- %H:%M %p')}: {i.embedding_key}\n" # print (";adjhfno;asdjao;idfjo;af", p_name) @@ -653,7 +652,7 @@ def revise_identity(role: "STRole"): currently_prompt = f"{p_name}'s status from {(role.scratch.curr_time - datetime.timedelta(days=1)).strftime('%A %B %d')}:\n" currently_prompt += f"{role.scratch.currently}\n\n" - currently_prompt += f"{p_name}'s thoughts at the end of {(role.scratch.curr_time - datetime.timedelta(days=1)).strftime('%A %B %d')}:\n" + currently_prompt += f"{p_name}'s thoughts at the end of {(role.scratch.curr_time - datetime.timedelta(days=1)).strftime('%A %B %d')}:\n" currently_prompt += (plan_note + thought_note).replace('\n', '') + "\n\n" currently_prompt += f"It is now {role.scratch.curr_time.strftime('%A %B %d')}. Given the above, write {p_name}'s status for {role.scratch.curr_time.strftime('%A %B %d')} that reflects {p_name}'s thoughts at the end of {(role.scratch.curr_time - datetime.timedelta(days=1)).strftime('%A %B %d')}. Write this in third-person talking about {p_name}." currently_prompt += f"If there is any scheduling information, be as specific as possible (include date, time, and location if stated in the statement).\n\n" @@ -673,7 +672,5 @@ def revise_identity(role: "STRole"): new_daily_req = LLM().ask(daily_req_prompt) new_daily_req = new_daily_req.replace('\n', ' ') - print ("WE ARE HERE!!!", new_daily_req) + print("WE ARE HERE!!!", new_daily_req) role.scratch.daily_plan_req = new_daily_req - - diff --git a/examples/st_game/roles/st_role.py b/examples/st_game/roles/st_role.py index 176fe0608..5c3fa544c 100644 --- a/examples/st_game/roles/st_role.py +++ b/examples/st_game/roles/st_role.py @@ -14,7 +14,6 @@ import math import time from pydantic import Field -from pathlib import Path import random import datetime from operator import itemgetter @@ -28,7 +27,6 @@ from examples.st_game.memory.spatial_memory import MemoryTree from examples.st_game.actions.dummy_action import DummyAction, DummyMessage from examples.st_game.actions.user_requirement import UserRequirement from examples.st_game.maze_environment import MazeEnvironment -from examples.st_game.memory.retrieve import new_agent_retrieve from examples.st_game.memory.scratch import Scratch from examples.st_game.utils.utils import get_embedding, path_finder from examples.st_game.utils.const import collision_block_id, STORAGE_PATH @@ -55,6 +53,7 @@ class STRole(Role): name: str = "Klaus Mueller", profile: str = "STMember", sim_code: str = "new_sim", + env: "MazeEnvironment" = None, step: int = 0, start_date: str = "", curr_time: str = "", @@ -74,6 +73,7 @@ class STRole(Role): self.role_storage_path = STORAGE_PATH.joinpath(f"{sim_code}/personas/{self.name}") self._rc = STRoleContext() + self.set_env(env) # init environment before start_project self.load_from() # load role's memory self._init_actions([]) @@ -89,7 +89,7 @@ class STRole(Role): pt_x = role_env["x"] pt_y = role_env["y"] self._rc.scratch.curr_tile = (pt_x, pt_y) - self._rc.env.maze.tiles[pt_y][pt_x]["events"].add(self.scratch.get_curr_event_and_desc()) + self._rc.env.call_func("add_tiles_event", pt_y=pt_y, pt_x=pt_x, event=self.scratch.get_curr_event_and_desc()) @property def name(self): @@ -195,23 +195,25 @@ class STRole(Role): OUTPUT: ret_events: a list of that are perceived and new. """ - maze = self._rc.env.maze # PERCEIVE SPACE # We get the nearby tiles given our current tile and the persona's vision # radius. - nearby_tiles = maze.get_nearby_tiles(self._rc.scratch.curr_tile, - self._rc.scratch.vision_r) + nearby_tiles = self._rc.env.call_func("get_nearby_tiles", + tile=self._rc.scratch.curr_tile, + vision_r=self._rc.scratch.vision_r) # We then store the perceived space. Note that the s_mem of the persona is # in the form of a tree constructed using dictionaries. for tile in nearby_tiles: - tile_info = maze.access_tile(tile) + tile_info = self._rc.env.call_func("access_tile", tile=tile) self._rc.spatial_memory.add_tile_info(tile_info) # PERCEIVE EVENTS. # We will perceive events that take place in the same arena as the # persona's current arena. - curr_arena_path = maze.get_tile_path(self._rc.scratch.curr_tile, "arena") + curr_arena_path = self._rc.env.call_func("get_tile_path", + tile=self._rc.scratch.curr_tile, + level="arena") # We do not perceive the same event twice (this can happen if an object is # extended across multiple tiles). percept_events_set = set() @@ -221,9 +223,12 @@ class STRole(Role): # First, we put all events that are occuring in the nearby tiles into the # percept_events_list for tile in nearby_tiles: - tile_details = maze.access_tile(tile) + tile_details = self._rc.env.call_func("access_tile", tile=tile) if tile_details["events"]: - if maze.get_tile_path(tile, "arena") == curr_arena_path: + tmp_arena_path = self._rc.env.call_func("get_tile_path", + tile=tile, + level="arena") + if tmp_arena_path == curr_arena_path: # This calculates the distance between the persona's current tile, # and the target tile. dist = math.dist([tile[0], tile[1]], @@ -356,7 +361,6 @@ class STRole(Role): e.g., "dolores double studio:double studio:bedroom 1:bed" """ roles = self._rc.env.get_roles() - maze = self._rc.env.maze if "" in plan and self._rc.scratch.planned_path == []: self._rc.scratch.act_path_set = False @@ -372,18 +376,18 @@ class STRole(Role): # Executing persona-persona interaction. target_p_tile = (roles[plan.split("")[-1].strip()] .scratch.curr_tile) - potential_path = path_finder(maze.collision_maze, + potential_path = path_finder(self._rc.env.call_func("get_collision_maze"), self._rc.scratch.curr_tile, target_p_tile, collision_block_id) if len(potential_path) <= 2: target_tiles = [potential_path[0]] else: - potential_1 = path_finder(maze.collision_maze, + potential_1 = path_finder(self._rc.env.call_func("get_collision_maze"), self._rc.scratch.curr_tile, potential_path[int(len(potential_path) / 2)], collision_block_id) - potential_2 = path_finder(maze.collision_maze, + potential_2 = path_finder(self._rc.env.call_func("get_collision_maze"), self._rc.scratch.curr_tile, potential_path[int(len(potential_path) / 2) + 1], collision_block_id) @@ -402,7 +406,7 @@ class STRole(Role): elif "" in plan: # Executing a random location action. plan = ":".join(plan.split(":")[:-1]) - target_tiles = maze.address_tiles[plan] + target_tiles = self._rc.env.call_func("get_address_tiles")[plan] target_tiles = random.sample(list(target_tiles), 1) else: @@ -411,10 +415,10 @@ class STRole(Role): # Retrieve the target addresses. Again, plan is an action address in its # string form. takes this and returns candidate # coordinates. - if plan not in maze.address_tiles: - maze.address_tiles["Johnson Park:park:park garden"] # ERRORRRRRRR + if plan not in self._rc.env.call_func("get_address_tiles"): + self._rc.env.call_func("get_address_tiles")["Johnson Park:park:park garden"] # ERRORRRRRRR else: - target_tiles = maze.address_tiles[plan] + target_tiles = self._rc.env.call_func("get_address_tiles")[plan] # There are sometimes more than one tile returned from this (e.g., a tabe # may stretch many coordinates). So, we sample a few here. And from that @@ -430,7 +434,8 @@ class STRole(Role): persona_name_set = set(roles.keys()) new_target_tiles = [] for i in target_tiles: - curr_event_set = maze.access_tile(i)["events"] + access_tile = self._rc.env.call_func("access_tile", tile=i) + curr_event_set = access_tile["events"] pass_curr_tile = False for j in curr_event_set: if j[0] in persona_name_set: @@ -444,7 +449,6 @@ class STRole(Role): # Now that we've identified the target tile, we find the shortest path to # one of the target tiles. curr_tile = self._rc.scratch.curr_tile - collision_maze = maze.collision_maze closest_target_tile = None path = None for i in target_tiles: @@ -452,7 +456,7 @@ class STRole(Role): # an input, and returns a list of coordinate tuples that becomes the # path. # e.g., [(0, 1), (1, 1), (1, 2), (1, 3), (1, 4)...] - curr_path = path_finder(maze.collision_maze, + curr_path = path_finder(self._rc.env.call_func("get_collision_maze"), curr_tile, i, collision_block_id) @@ -486,22 +490,26 @@ class STRole(Role): ret = True if role_env: for key, val in self.game_obj_cleanup.items(): - self._rc.env.maze.turn_event_from_tile_idle(key, val) + self._rc.env.call_func("turn_event_from_tile_idle", curr_event=key, tile=val) # reset game_obj_cleanup self.game_obj_cleanup = dict() curr_tile = self.role_tile new_tile = (role_env["x"], role_env["y"]) - self._rc.env.maze.remove_subject_events_from_tile(self.name, curr_tile) - self._rc.env.maze.add_event_from_tile(self.scratch.get_curr_event_and_desc(), new_tile) + self._rc.env.call_func("remove_subject_events_from_tile", subject=self.name, tile=curr_tile) + self._rc.env.call_func("add_event_from_tile", + curr_event=self.scratch.get_curr_event_and_desc(), + tile=new_tile) # the persona will travel to get to their destination. *Once* # the persona gets there, we activate the object action. if not self.scratch.planned_path: self.game_obj_cleanup[self.scratch.get_curr_event_and_desc()] = new_tile - self._rc.env.maze.add_event_from_tile(self.scratch.get_curr_event_and_desc(), new_tile) + self._rc.env.call_func("add_event_from_tile", + curr_event=self.scratch.get_curr_event_and_desc(), + tile=new_tile) blank = (self.scratch.get_curr_obj_event_and_desc()[0], None, None, None) - self._rc.env.maze.remove_event_from_tile(blank, new_tile) + self._rc.env.call_func("remove_event_from_tile", curr_event=blank, tile=new_tile) # update role's new tile self._rc.scratch.curr_tile = new_tile @@ -535,7 +543,7 @@ class STRole(Role): # use self._rc.memory 's retrieve functions retrieved = self.retrieve(observed) - plans = plan(self, self._rc.env.maze, self._rc.env.get_roles(), new_day, retrieved) + plans = plan(self, self._rc.env.get_roles(), new_day, retrieved) self.reflect() diff --git a/examples/st_game/run_st_game.py b/examples/st_game/run_st_game.py index 255c65ce2..cb33a7aee 100644 --- a/examples/st_game/run_st_game.py +++ b/examples/st_game/run_st_game.py @@ -17,6 +17,9 @@ async def startup(idea: str, sim_code: str, investment: float = 30.0, n_round: int = 500): + town = StanfordTown() + town.init_env() + # copy `storage/{fork_sim_code}` to `storage/{sim_code}` copy_folder(str(STORAGE_PATH.joinpath(fork_sim_code)), str(STORAGE_PATH.joinpath(sim_code))) @@ -28,8 +31,9 @@ async def startup(idea: str, for idx, role_name in enumerate(reverie_meta["persona_names"]): has_inner_voice = True if idx == 0 else False role = STRole(name=role_name, - sim_code=sim_code, profile=role_name, + sim_code=sim_code, + env=town.environment, step=reverie_meta.get("step", 0), start_date=reverie_meta.get("start_date"), curr_time=reverie_meta.get("curr_time"), @@ -41,7 +45,6 @@ async def startup(idea: str, write_curr_sim_code({"sim_code": sim_code}) write_curr_step({"step": reverie_meta.get("step", 0)}) - town = StanfordTown() town.wakeup_roles(roles) town.invest(investment) diff --git a/examples/st_game/stanford_town.py b/examples/st_game/stanford_town.py index c565afd80..e337fba3a 100644 --- a/examples/st_game/stanford_town.py +++ b/examples/st_game/stanford_town.py @@ -17,6 +17,10 @@ class StanfordTown(SoftwareCompany): environment: MazeEnvironment = Field(default_factory=MazeEnvironment) + def init_env(self): + logger.info("StanfordTown init environment") + self.environment.init_register_funcs() + def wakeup_roles(self, roles: list[Role]): logger.warning(f"The Town add {len(roles)} roles, and start to operate.") self.environment.add_roles(roles) diff --git a/examples/st_game/tests/actions/test_gen_action_details.py b/examples/st_game/tests/actions/test_gen_action_details.py index c84de6e6d..b2a507067 100644 --- a/examples/st_game/tests/actions/test_gen_action_details.py +++ b/examples/st_game/tests/actions/test_gen_action_details.py @@ -10,20 +10,20 @@ from examples.st_game.actions.gen_action_details import ( GenActObjDescription, GenEventTriple, GenObjEventTriple, - GenPronunciatio - ) + GenPronunciatio +) from examples.st_game.roles.st_role import STRole -role = STRole(name="Klaus Mueller", start_date="October 4, 2023", curr_time="October 4, 2023, 00:00:00", - sim_code="base_the_ville_isabella_maria_klaus") -maze = role._rc.env.maze +role = STRole(name="Klaus Mueller", start_date="October 4, 2023", curr_time="October 4, 2023, 00:00:00", + sim_code="base_the_ville_isabella_maria_klaus") +access_tile = role._rc.env.call_func("access_tile", tile=role.scratch.curr_tile) act_desp = "klaus mueller starts the day by making a coffee" act_dura = "20" -act_world = maze.access_tile(role.scratch.curr_tile)["world"] +act_world = access_tile["world"] assert act_world == "the Ville" -sector = GenActionSector().run(role, maze, act_desp) -arena = GenActionArena().run(role, maze, act_desp, act_world, sector) +sector = GenActionSector().run(role, access_tile, act_desp) +arena = GenActionArena().run(role, act_desp, act_world, sector) temp_address = f"{act_world}:{sector}:{arena}" obj = GenActionObject().run(role, act_desp, temp_address) @@ -33,24 +33,29 @@ act_obj_desp = GenActObjDescription().run(role, obj, act_desp) result_dict = GenActionDetails().run(role, act_desp, act_dura) + def test_gen_action_sector(): assert isinstance(sector, str) assert sector in role.s_mem.get_str_accessible_sectors(act_world) + def test_gen_action_arena(): assert isinstance(arena, str) assert arena in role.s_mem.get_str_accessible_sector_arenas(f"{act_world}:{sector}") + def test_gen_action_obj(): assert isinstance(obj, str) assert obj in role.s_mem.get_str_accessible_arena_game_objects(temp_address) + # def test_gen_event_triple(): # assert len(event_triple) == 3 # def test_gen_obj_event_triple(): # assert len(obj_triple) == 3 + def test_gen_action_details(): if result_dict: for key in [ @@ -70,4 +75,3 @@ def test_gen_action_details(): assert result_dict["action_address"] == f"{temp_address}:{obj}" assert result_dict["action_duration"] == int(act_dura) assert result_dict["act_obj_description"] == act_obj_desp - \ No newline at end of file diff --git a/examples/st_game/tests/plan/test_converse.py b/examples/st_game/tests/plan/test_converse.py index a39452334..9838a31fd 100644 --- a/examples/st_game/tests/plan/test_converse.py +++ b/examples/st_game/tests/plan/test_converse.py @@ -5,7 +5,6 @@ from typing import Tuple from examples.st_game.roles.st_role import STRole -from examples.st_game.maze import Maze from examples.st_game.utils.const import STORAGE_PATH from examples.st_game.utils.mg_ga_transform import get_reverie_meta from examples.st_game.utils.utils import copy_folder @@ -43,8 +42,7 @@ def init_two_roles(fork_sim_code: str = "July1_the_ville_isabella_maria_klaus-st def test_agent_conversation(): role_ir, role_km = init_two_roles() - maze = Maze() - curr_chat = agent_conversation(maze, role_ir, role_km) + curr_chat = agent_conversation(role_ir, role_km) assert len(curr_chat) % 2 == 0 meet = False diff --git a/examples/st_game/tests/test_maze.py b/examples/st_game/tests/test_maze.py index c81179fe9..af3f4a85a 100644 --- a/examples/st_game/tests/test_maze.py +++ b/examples/st_game/tests/test_maze.py @@ -1,11 +1,8 @@ from ..utils.const import MAZE_ASSET_PATH from ..maze import Maze + def test_maze_init(): maze = Maze(maze_asset_path=MAZE_ASSET_PATH) assert maze.maze_height == 100 assert maze.maze_width == 140 - - - - diff --git a/examples/st_game/utils/utils.py b/examples/st_game/utils/utils.py index 097e41387..fd547013a 100644 --- a/examples/st_game/utils/utils.py +++ b/examples/st_game/utils/utils.py @@ -159,13 +159,13 @@ def path_finder_v2(a, start, end, collision_block_char) -> list[int]: return the_path -def path_finder(maze: "Maze", start: list[int], end: list[int], collision_block_char: str) -> list[int]: +def path_finder(collision_maze: list, start: list[int], end: list[int], collision_block_char: str) -> list[int]: # EMERGENCY PATCH start = (start[1], start[0]) end = (end[1], end[0]) # END EMERGENCY PATCH - path = path_finder_v2(maze, start, end, collision_block_char) + path = path_finder_v2(collision_maze, start, end, collision_block_char) new_path = [] for i in path: diff --git a/metagpt/environment/__init__.py b/metagpt/environment/__init__.py new file mode 100644 index 000000000..2bcf8efd0 --- /dev/null +++ b/metagpt/environment/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : diff --git a/metagpt/environment.py b/metagpt/environment/environment.py similarity index 97% rename from metagpt/environment.py rename to metagpt/environment/environment.py index 24e6ada2f..6af734069 100644 --- a/metagpt/environment.py +++ b/metagpt/environment/environment.py @@ -17,8 +17,8 @@ from metagpt.schema import Message class Environment(BaseModel): """环境,承载一批角色,角色可以向环境发布消息,可以被其他角色观察到 - Environment, hosting a batch of roles, roles can publish messages to the environment, and can be observed by other roles - + Environment, hosting a batch of roles, roles can publish messages to the environment, + and can be observed by other roles """ roles: dict[str, Role] = Field(default_factory=dict) diff --git a/metagpt/environment/general_environment.py b/metagpt/environment/general_environment.py new file mode 100644 index 000000000..5a77d567e --- /dev/null +++ b/metagpt/environment/general_environment.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : + +from typing import Callable + +from pydantic import Field + +from metagpt.environment.environment import Environment + + +class GeneralEnvironment(Environment): + """ + A GeneralEnvironment for interfacing with games, etc. It create a registration mechanism to register + custom methods when operating with the particular environment. + """ + name: str = Field(default="") + registered_funcs: dict[str, Callable] = Field(default={}) + + def register_func(self, func_name: str, func: Callable): + if func_name not in self.registered_funcs: + self.registered_funcs[func_name] = func + + def call_func(self, func_name: str, *args, **kwargs): + assert func_name in self.registered_funcs + + func = self.registered_funcs.get(func_name) + return func(*args, **kwargs) + + @staticmethod + def init_register_funcs(self): + raise NotImplementedError() diff --git a/metagpt/environment/gym_environment.py b/metagpt/environment/gym_environment.py new file mode 100644 index 000000000..f24e9957b --- /dev/null +++ b/metagpt/environment/gym_environment.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : RL environment about Gymnasium(forked from openai gym) + +from typing import Callable + +import gymnasium as gym + +from metagpt.logs import logger +from metagpt.environment.general_environment import GeneralEnvironment + + +class GymEnvironment(GeneralEnvironment): + + def init_register_funcs(self): + env = gym.make(self.name) + logger.info(f"init gym environment: {self.name}") + self.register_func("reset", env.reset) + self.register_func("sample_action", env.action_space.sample) + self.register_func("step", env.step) + self.register_func("close", env.close) diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py index 21b6b5b8a..5bdb3b311 100644 --- a/metagpt/roles/role.py +++ b/metagpt/roles/role.py @@ -11,7 +11,6 @@ from typing import Iterable, Type from pydantic import BaseModel, Field -# from metagpt.environment import Environment from metagpt.config import CONFIG from metagpt.actions import Action, ActionOutput from metagpt.llm import LLM diff --git a/metagpt/software_company.py b/metagpt/software_company.py index b2bd18c58..97f96526f 100644 --- a/metagpt/software_company.py +++ b/metagpt/software_company.py @@ -9,7 +9,7 @@ from pydantic import BaseModel, Field from metagpt.actions import BossRequirement from metagpt.config import CONFIG -from metagpt.environment import Environment +from metagpt.environment.environment import Environment from metagpt.logs import logger from metagpt.roles import Role from metagpt.schema import Message diff --git a/tests/metagpt/environment/__init__.py b/tests/metagpt/environment/__init__.py new file mode 100644 index 000000000..2bcf8efd0 --- /dev/null +++ b/tests/metagpt/environment/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : diff --git a/tests/metagpt/environment/test_gym_environment.py b/tests/metagpt/environment/test_gym_environment.py new file mode 100644 index 000000000..7943a82da --- /dev/null +++ b/tests/metagpt/environment/test_gym_environment.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : unittest of gym environment + +from metagpt.environment.gym_environment import GymEnvironment + + +def test_gym_environment(): + gym_env = GymEnvironment(name="CartPole-v1") + gym_env.init_register_funcs() + + observation, info = gym_env.call_func("reset", seed=42) + for _ in range(2): + action = gym_env.call_func("sample_action") + observation, reward, terminated, truncated, info = gym_env.call_func("step", action=action) + if terminated or truncated: + observation, info = gym_env.call_func("reset") + assert len(observation) == 4 + gym_env.call_func("close") diff --git a/tests/metagpt/test_environment.py b/tests/metagpt/test_environment.py index a0f1f6257..a1f2ba296 100644 --- a/tests/metagpt/test_environment.py +++ b/tests/metagpt/test_environment.py @@ -9,7 +9,7 @@ import pytest from metagpt.actions import BossRequirement -from metagpt.environment import Environment +from metagpt.environment.environment import Environment from metagpt.logs import logger from metagpt.manager import Manager from metagpt.roles import Architect, ProductManager, Role