diff --git a/examples/android_assistant/run_assistant.py b/examples/android_assistant/run_assistant.py index c64323e72..27a3335b7 100644 --- a/examples/android_assistant/run_assistant.py +++ b/examples/android_assistant/run_assistant.py @@ -40,19 +40,17 @@ def startup( ), device_id: str = typer.Option(default="emulator-5554", help="The Android device_id"), ): - config.set_other( - { - "stage": stage, - "mode": mode, - "app_name": app_name, - "task_desc": task_desc, - "refine_doc": refine_doc, - "min_dist": min_dist, - "android_screenshot_dir": android_screenshot_dir, - "android_xml_dir": android_xml_dir, - "device_id": device_id, - } - ) + config.extra = { + "stage": stage, + "mode": mode, + "app_name": app_name, + "task_desc": task_desc, + "refine_doc": refine_doc, + "min_dist": min_dist, + "android_screenshot_dir": android_screenshot_dir, + "android_xml_dir": android_xml_dir, + "device_id": device_id, + } team = Team( env=AndroidEnv( diff --git a/metagpt/config2.py b/metagpt/config2.py index 09ff6d83f..ed68b4db2 100644 --- a/metagpt/config2.py +++ b/metagpt/config2.py @@ -75,7 +75,7 @@ class Config(CLIParams, YamlModel): iflytek_api_key: str = "" azure_tts_subscription_key: str = "" azure_tts_region: str = "" - other: dict = dict() # other dict + _extra: dict = dict() # extra config dict @classmethod def from_home(cls, path): @@ -128,11 +128,13 @@ class Config(CLIParams, YamlModel): self.reqa_file = reqa_file self.max_auto_summarize_code = max_auto_summarize_code - def set_other(self, other: dict): - self.other = other + @property + def extra(self): + return self._extra - def get_other(self, key: str, default_value: str = None): - return self.other.get(key, default_value) + @extra.setter + def extra(self, value: dict): + self._extra = value def get_openai_llm(self) -> Optional[LLMConfig]: """Get OpenAI LLMConfig by name. If no OpenAI, raise Exception""" diff --git a/metagpt/ext/android_assistant/actions/manual_record.py b/metagpt/ext/android_assistant/actions/manual_record.py index d0f7a6c13..bcfb2ed89 100644 --- a/metagpt/ext/android_assistant/actions/manual_record.py +++ b/metagpt/ext/android_assistant/actions/manual_record.py @@ -55,6 +55,7 @@ class ManualRecord(Action): self.task_desc_path.write_text(task_desc) step = 0 + extra_config = config.extra while True: step += 1 screenshot_path: Path = env.observe( @@ -68,12 +69,12 @@ class ManualRecord(Action): if not screenshot_path.exists() or not xml_path.exists(): return AndroidActionOutput(action_state=RunState.FAIL) - elem_list = elem_list_from_xml_tree(xml_path, self.useless_list, config.get_other("min_dist")) + elem_list = elem_list_from_xml_tree(xml_path, self.useless_list, extra_config.get("min_dist", 30)) screenshot_labeled_path = Path(self.screenshot_after_path).joinpath(f"{step}_labeled.png") labeled_img = draw_bbox_multi(screenshot_path, screenshot_labeled_path, elem_list) - cv2.namedWindow('image', cv2.WINDOW_NORMAL) + cv2.namedWindow("image", cv2.WINDOW_NORMAL) cv2.imshow("image", labeled_img) cv2.waitKey(0) cv2.destroyAllWindows() diff --git a/metagpt/ext/android_assistant/actions/parse_record.py b/metagpt/ext/android_assistant/actions/parse_record.py index c9b8d34fa..304daf655 100644 --- a/metagpt/ext/android_assistant/actions/parse_record.py +++ b/metagpt/ext/android_assistant/actions/parse_record.py @@ -45,6 +45,7 @@ class ParseRecord(Action): path.mkdir(parents=True, exist_ok=True) task_desc = self.task_desc_path.read_text() + extra_config = config.extra with open(self.record_path, "r") as record_file: record_step_count = len(record_file.readlines()) - 1 @@ -90,7 +91,7 @@ class ParseRecord(Action): continue if doc_content[action_type]: - if config.get_other("doc_refine"): + if extra_config.get("doc_refine", False): refine_context = refine_doc_suffix.format(old_doc=doc_content[action_type]) context += refine_context logger.info( diff --git a/metagpt/ext/android_assistant/actions/screenshot_parse.py b/metagpt/ext/android_assistant/actions/screenshot_parse.py index 2004aa85f..4d8bb0e1e 100644 --- a/metagpt/ext/android_assistant/actions/screenshot_parse.py +++ b/metagpt/ext/android_assistant/actions/screenshot_parse.py @@ -101,6 +101,7 @@ next action. You should always prioritize these documented elements for interact grid_on: bool, env: AndroidEnv, ): + extra_config = config.extra for path in [task_dir, docs_dir]: path.mkdir(parents=True, exist_ok=True) screenshot_path: Path = env.observe( @@ -125,7 +126,7 @@ next action. You should always prioritize these documented elements for interact bbox = e.bbox center_ = (bbox[0][0] + bbox[1][0]) // 2, (bbox[0][1] + bbox[1][1]) // 2 dist = (abs(center[0] - center_[0]) ** 2 + abs(center[1] - center_[1]) ** 2) ** 0.5 - if dist <= config.get_other("min_dist"): + if dist <= extra_config.get("min_dist", 30): close = True break if not close: diff --git a/metagpt/ext/android_assistant/actions/self_learn_and_reflect.py b/metagpt/ext/android_assistant/actions/self_learn_and_reflect.py index 73f5073a6..5e9cfbb45 100644 --- a/metagpt/ext/android_assistant/actions/self_learn_and_reflect.py +++ b/metagpt/ext/android_assistant/actions/self_learn_and_reflect.py @@ -80,6 +80,7 @@ class SelfLearnAndReflect(Action): async def run_self_learn( self, round_count: int, task_desc: str, last_act: str, task_dir: Path, env: AndroidEnv ) -> AndroidActionOutput: + extra_config = config.extra screenshot_path: Path = env.observe( EnvObsParams(obs_type=EnvObsType.GET_SCREENSHOT, ss_name=f"{round_count}_before", local_save_dir=task_dir) ) @@ -89,7 +90,7 @@ class SelfLearnAndReflect(Action): if not screenshot_path.exists() or not xml_path.exists(): return AndroidActionOutput(action_state=RunState.FAIL) - elem_list = elem_list_from_xml_tree(xml_path, self.useless_list, config.get_other("min_dist")) + elem_list = elem_list_from_xml_tree(xml_path, self.useless_list, extra_config.get("min_dist", 30)) screenshot_before_labeled_path = task_dir.joinpath(f"{round_count}_before_labeled.png") draw_bbox_multi(screenshot_path, screenshot_before_labeled_path, elem_list) diff --git a/metagpt/ext/android_assistant/roles/android_assistant.py b/metagpt/ext/android_assistant/roles/android_assistant.py index 8abdbab59..45636f519 100644 --- a/metagpt/ext/android_assistant/roles/android_assistant.py +++ b/metagpt/ext/android_assistant/roles/android_assistant.py @@ -40,8 +40,9 @@ class AndroidAssistant(Role): super().__init__(**data) self._watch([UserRequirement, AndroidActionOutput]) - self.task_desc = config.get_other("task_desc", "Just explore any app in this phone!") - app_name = config.get_other("app_name", "demo") + extra_config = config.extra + self.task_desc = extra_config.get("task_desc", "Just explore any app in this phone!") + app_name = extra_config.get("app_name", "demo") data_dir = self.output_root_dir.absolute().joinpath("output") or EXAMPLE_PATH.joinpath( "android_assistant/output" ) @@ -50,8 +51,8 @@ class AndroidAssistant(Role): """Firstly, we decide the state with user config, further, we can do it automatically, like if it's new app, run the learn first and then do the act stage or learn it during the action. """ - stage = config.get_other("stage") - mode = config.get_other("mode") + stage = extra_config.get("stage") + mode = extra_config.get("mode") if stage == "learn" and mode == "manual": # choose ManualRecord and then run ParseRecord # Remember, only run each action only one time, no need to run n_round. diff --git a/metagpt/ext/android_assistant/utils/utils.py b/metagpt/ext/android_assistant/utils/utils.py index 3a5ebc325..f1fa13869 100644 --- a/metagpt/ext/android_assistant/utils/utils.py +++ b/metagpt/ext/android_assistant/utils/utils.py @@ -10,6 +10,7 @@ from xml.etree.ElementTree import Element, iterparse import cv2 import pyshine as ps +from metagpt.config2 import config from metagpt.ext.android_assistant.utils.schema import ( ActionOp, AndroidElement, @@ -47,6 +48,7 @@ def get_id_from_element(elem: Element) -> str: def traverse_xml_tree(xml_path: Path, elem_list: list[AndroidElement], attrib: str, add_index=False): path = [] + extra_config = config.extra for event, elem in iterparse(str(xml_path), ["start", "end"]): if event == "start": path.append(elem) @@ -68,9 +70,7 @@ def traverse_xml_tree(xml_path: Path, elem_list: list[AndroidElement], attrib: s bbox = e.bbox center_ = (bbox[0][0] + bbox[1][0]) // 2, (bbox[0][1] + bbox[1][1]) // 2 dist = (abs(center[0] - center_[0]) ** 2 + abs(center[1] - center_[1]) ** 2) ** 0.5 - # TODO Modify config to default 30. It should be modified back config after single action test - # if dist <= config.get_other("min_dist"): - if dist <= 30: + if dist <= extra_config.get("min_dist", 30): close = True break if not close: