From 7610fa22d98a0d6fc570795ff54db7b7396b7a5a Mon Sep 17 00:00:00 2001 From: better629 Date: Wed, 31 Jan 2024 10:38:41 +0800 Subject: [PATCH] fix format --- .../actions/manual_record.py | 80 ++++++++++-------- .../andriod_assistant/actions/parse_record.py | 55 +++++++------ .../actions/parse_record_an.py | 9 ++- .../actions/screenshot_parse.py | 67 +++++++++++---- .../actions/self_learn_and_reflect.py | 81 ++++++++++++++----- .../actions/self_learn_reflect_an.py | 18 +---- examples/andriod_assistant/test.py | 51 ++++++------ 7 files changed, 218 insertions(+), 143 deletions(-) diff --git a/examples/andriod_assistant/actions/manual_record.py b/examples/andriod_assistant/actions/manual_record.py index 57c354dfe..654ac6ea7 100644 --- a/examples/andriod_assistant/actions/manual_record.py +++ b/examples/andriod_assistant/actions/manual_record.py @@ -1,21 +1,23 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # @Desc : manual record user interaction in stage=learn & mode=manual, LIKE scripts/step_recorder.py -import cv2 -import os import time from pathlib import Path -from examples.andriod_assistant.utils.schema import OpLogItem, ActionOp, RunState, GridOp, ActionOp, TapOp, \ - TapGridOp, \ - LongPressOp, LongPressGridOp, SwipeOp, SwipeGridOp, TextOp, AndroidElement +import cv2 + +from examples.andriod_assistant.utils.schema import ( + ActionOp, + AndroidElement, + SwipeOp, +) from examples.andriod_assistant.utils.utils import draw_bbox_multi, traverse_xml_tree from metagpt.actions.action import Action -from metagpt.logs import logger from metagpt.config2 import config +from metagpt.const import ADB_EXEC_FAIL from metagpt.environment.android_env.android_env import AndroidEnv from metagpt.environment.api.env_api import EnvAPIAbstract -from metagpt.const import ADB_EXEC_FAIL +from metagpt.logs import logger class ManualRecord(Action): @@ -23,15 +25,10 @@ class ManualRecord(Action): name: str = "ManualRecord" - async def run( - self, demo_name: str, task_dir: Path, env: AndroidEnv - ): - + async def run(self, demo_name: str, task_dir: Path, env: AndroidEnv): # Question 这里是将通过ADB获取的东西存到本地的路径的吧 screenshot_path: Path = env.step( - EnvAPIAbstract( - api_name="get_screenshot", kwargs={"ss_name": f"{demo_name}", "local_save_dir": task_dir} - ) + EnvAPIAbstract(api_name="get_screenshot", kwargs={"ss_name": f"{demo_name}", "local_save_dir": task_dir}) ) xml_path: Path = env.step( EnvAPIAbstract(api_name="get_xml", kwargs={"xml_name": f"{demo_name}", "local_save_dir": task_dir}) @@ -74,31 +71,40 @@ class ManualRecord(Action): user_input = "xxx" logger.info( "Choose one of the following actions you want to perform on the current screen:\ntap, text, long " - "press, swipe, stop", "blue") + "press, swipe, stop", + "blue", + ) - while user_input.lower() != ActionOp.TAP.value and user_input.lower() != ActionOp.TEXT.value and user_input.lower() != ActionOp.LONG_PRESS.value \ - and user_input.lower() != ActionOp.SWIPE.value and user_input.lower() != ActionOp.STOP.value: + while ( + user_input.lower() != ActionOp.TAP.value + and user_input.lower() != ActionOp.TEXT.value + and user_input.lower() != ActionOp.LONG_PRESS.value + and user_input.lower() != ActionOp.SWIPE.value + and user_input.lower() != ActionOp.STOP.value + ): user_input = input() if user_input.lower() == ActionOp.TAP.value: - logger.info(f"Which element do you want to tap? Choose a numeric tag from 1 to {len(elem_list)}:", - "blue") + logger.info( + f"Which element do you want to tap? Choose a numeric tag from 1 to {len(elem_list)}:", "blue" + ) user_input = "xxx" while not user_input.isnumeric() or int(user_input) > len(elem_list) or int(user_input) < 1: user_input = input() tl, br = elem_list[int(user_input) - 1].bbox x, y = (tl[0] + br[0]) // 2, (tl[1] + br[1]) // 2 - ret = env.step( - EnvAPIAbstract(api_name="user_tap", kwargs={"x": x, "y": y}) - ) + ret = env.step(EnvAPIAbstract(api_name="user_tap", kwargs={"x": x, "y": y})) # Question 将 ERROR 替换为 ADB_EXEC_FAIL(FAILED) if ret == ADB_EXEC_FAIL: logger.info("ERROR: tap execution failed", "red") break record_file.write(f"tap({int(user_input)}):::{elem_list[int(user_input) - 1].uid}\n") elif user_input.lower() == ActionOp.TEXT.value: - logger.info(f"Which element do you want to input the text string? Choose a numeric tag from 1 to " - f"{len(elem_list)}:", "blue") + logger.info( + f"Which element do you want to input the text string? Choose a numeric tag from 1 to " + f"{len(elem_list)}:", + "blue", + ) input_area = "xxx" while not input_area.isnumeric() or int(input_area) > len(elem_list) or int(input_area) < 1: input_area = input() @@ -106,14 +112,12 @@ class ManualRecord(Action): user_input = "" while not user_input: user_input = input() - env.step( - EnvAPIAbstract(api_name="user_input", kwargs={"input_txt": user_input}) - ) - record_file.write(f"text({input_area}:sep:\"{user_input}\"):::{elem_list[int(input_area) - 1].uid}\n") + env.step(EnvAPIAbstract(api_name="user_input", kwargs={"input_txt": user_input})) + record_file.write(f'text({input_area}:sep:"{user_input}"):::{elem_list[int(input_area) - 1].uid}\n') elif user_input.lower() == ActionOp.LONG_PRESS.value: logger.info( - f"Which element do you want to long press? Choose a numeric tag from 1 to {len(elem_list)}:", - "blue") + f"Which element do you want to long press? Choose a numeric tag from 1 to {len(elem_list)}:", "blue" + ) user_input = "xxx" while not user_input.isnumeric() or int(user_input) > len(elem_list) or int(user_input) < 1: user_input = input() @@ -126,14 +130,20 @@ class ManualRecord(Action): record_file.write(f"long_press({int(user_input)}):::{elem_list[int(user_input) - 1].uid}\n") elif user_input.lower() == ActionOp.SWIPE.value: logger.info( - f"What is the direction of your swipe? Choose one from the following options:\nup, down, left," - f" right", "blue") + "What is the direction of your swipe? Choose one from the following options:\nup, down, left," + " right", + "blue", + ) user_input = "" - while user_input != SwipeOp.UP.value and user_input != SwipeOp.DOWN.value and user_input != SwipeOp.LEFT.value and user_input != SwipeOp.RIGHT.value: + while ( + user_input != SwipeOp.UP.value + and user_input != SwipeOp.DOWN.value + and user_input != SwipeOp.LEFT.value + and user_input != SwipeOp.RIGHT.value + ): user_input = input() swipe_dir = user_input - logger.info( - f"Which element do you want to swipe? Choose a numeric tag from 1 to {len(elem_list)}:") + logger.info(f"Which element do you want to swipe? Choose a numeric tag from 1 to {len(elem_list)}:") while not user_input.isnumeric() or int(user_input) > len(elem_list) or int(user_input) < 1: user_input = input() tl, br = elem_list[int(user_input) - 1].bbox diff --git a/examples/andriod_assistant/actions/parse_record.py b/examples/andriod_assistant/actions/parse_record.py index ed794fc4c..2ce924296 100644 --- a/examples/andriod_assistant/actions/parse_record.py +++ b/examples/andriod_assistant/actions/parse_record.py @@ -3,35 +3,38 @@ # @Desc : parse record to generate learned standard operations in stage=learn & mode=manual, # LIKE scripts/document_generation.py -import re import ast import json +import re import time from pathlib import Path +from examples.andriod_assistant.actions.parse_record_an import RECORD_PARSE_NODE from examples.andriod_assistant.prompts.operation_prompt import ( + long_press_doc_template, + refine_doc_suffix, + swipe_doc_template, tap_doc_template, text_doc_template, - long_press_doc_template, - swipe_doc_template, - refine_doc_suffix ) -from examples.andriod_assistant.utils.schema import RecordLogItem, RunState, ActionOp, \ - SwipeOp, AndroidActionOutput -from examples.andriod_assistant.actions.parse_record_an import RECORD_PARSE_NODE +from examples.andriod_assistant.utils.schema import ( + ActionOp, + AndroidActionOutput, + RecordLogItem, + RunState, + SwipeOp, +) +from metagpt.actions.action import Action from metagpt.config2 import config from metagpt.environment.android_env.android_env import AndroidEnv -from metagpt.utils.common import encode_image from metagpt.logs import logger -from metagpt.actions.action import Action +from metagpt.utils.common import encode_image class ParseRecord(Action): name: str = "ParseRecord" - async def run( - self, app_name: str, demo_name: str, task_dir: Path, docs_dir: Path, env: AndroidEnv - ): + async def run(self, app_name: str, demo_name: str, task_dir: Path, docs_dir: Path, env: AndroidEnv): doc_count = 0 record_path = Path(task_dir) / "record.txt" @@ -81,24 +84,21 @@ class ParseRecord(Action): context += refine_context logger.info( f"Documentation for the element {resource_id} already exists. The doc will be " - f"refined based on the latest demo.") + f"refined based on the latest demo." + ) else: logger.info( f"Documentation for the element {resource_id} already exists. Turn on DOC_REFINE " - f"in the config file if needed.") + f"in the config file if needed." + ) continue else: - doc_content = { - "tap": "", - "text": "", - "v_swipe": "", - "h_swipe": "", - "long_press": "" - } + doc_content = {"tap": "", "text": "", "v_swipe": "", "h_swipe": "", "long_press": ""} logger.info(f"Waiting for GPT-4V to generate documentation for the element {resource_id}") - node = await RECORD_PARSE_NODE.fill(context=context, llm=self.llm, - images=[img_before_base64, img_after_base64]) + node = await RECORD_PARSE_NODE.fill( + context=context, llm=self.llm, images=[img_before_base64, img_after_base64] + ) if "error" in node.content: return AndroidActionOutput(action_state=RunState.FAIL) @@ -108,8 +108,13 @@ class ParseRecord(Action): doc_content[action_type] = msg with open(log_path, "a") as logfile: - log_item = RecordLogItem(step=step, prompt=prompt, image_before=img_before_base64, - image_after=img_after_base64, response=node.content) + log_item = RecordLogItem( + step=step, + prompt=prompt, + image_before=img_before_base64, + image_after=img_after_base64, + response=node.content, + ) # TODO 修改 dumps 方式 logfile.write(json.dumps(log_item) + "\n") with open(doc_path, "w") as outfile: diff --git a/examples/andriod_assistant/actions/parse_record_an.py b/examples/andriod_assistant/actions/parse_record_an.py index b81eaec0c..210c93e23 100644 --- a/examples/andriod_assistant/actions/parse_record_an.py +++ b/examples/andriod_assistant/actions/parse_record_an.py @@ -5,10 +5,11 @@ from metagpt.actions.action_node import ActionNode OBSERVATION = ActionNode( - key="Observation", expected_type=str, + key="Observation", + expected_type=str, instruction="Provide a description of your observations of the two images. " - "Subsequently, delineate the distinctions between the first image and the second one.", - example="" + "Subsequently, delineate the distinctions between the first image and the second one.", + example="", ) THOUGHT = ActionNode( @@ -22,7 +23,7 @@ DESCRIPTION = ActionNode( key="Description", expected_type=str, instruction="Describe the functionality of the UI element concisely in one or two sentences Do not include " - "the numeric tag in your description", + "the numeric tag in your description", example="", ) diff --git a/examples/andriod_assistant/actions/screenshot_parse.py b/examples/andriod_assistant/actions/screenshot_parse.py index 0317bdaab..3c5691a9b 100644 --- a/examples/andriod_assistant/actions/screenshot_parse.py +++ b/examples/andriod_assistant/actions/screenshot_parse.py @@ -2,24 +2,41 @@ # -*- coding: utf-8 -*- # @Desc : LIKE scripts/task_executor.py in stage=act -from pathlib import Path import ast +from pathlib import Path +from examples.andriod_assistant.actions.screenshot_parse_an import SCREENSHOT_PARSE_NODE from examples.andriod_assistant.prompts.assistant_prompt import ( screenshot_parse_template, screenshot_parse_with_grid_template, ) -from examples.andriod_assistant.utils.schema import OpLogItem, RunState, GridOp, TapOp, TapGridOp, \ - LongPressOp, LongPressGridOp, SwipeOp, SwipeGridOp, TextOp, AndroidElement, AndroidActionOutput -from examples.andriod_assistant.actions.screenshot_parse_an import SCREENSHOT_PARSE_NODE -from examples.andriod_assistant.utils.utils import draw_bbox_multi, traverse_xml_tree, area_to_xy, \ - screenshot_parse_extract, elem_bbox_to_xy +from examples.andriod_assistant.utils.schema import ( + AndroidActionOutput, + AndroidElement, + GridOp, + LongPressGridOp, + LongPressOp, + OpLogItem, + RunState, + SwipeGridOp, + SwipeOp, + TapGridOp, + TapOp, + TextOp, +) +from examples.andriod_assistant.utils.utils import ( + area_to_xy, + draw_bbox_multi, + elem_bbox_to_xy, + screenshot_parse_extract, + traverse_xml_tree, +) from metagpt.actions.action import Action from metagpt.config2 import config +from metagpt.const import ADB_EXEC_FAIL from metagpt.environment.android_env.android_env import AndroidEnv from metagpt.environment.api.env_api import EnvAPIAbstract from metagpt.utils.common import encode_image -from metagpt.const import ADB_EXEC_FAIL class ScreenshotParse(Action): @@ -42,21 +59,33 @@ next action. You should always prioritize these documented elements for interact if doc_content["tap"]: ui_doc += f"This UI element is clickable. {doc_content['tap']}\n\n" if doc_content["text"]: - ui_doc += f"This UI element can receive text input. The text input is used for the following " \ - f"purposes: {doc_content['text']}\n\n" + ui_doc += ( + f"This UI element can receive text input. The text input is used for the following " + f"purposes: {doc_content['text']}\n\n" + ) if doc_content["long_press"]: ui_doc += f"This UI element is long clickable. {doc_content['long_press']}\n\n" if doc_content["v_swipe"]: - ui_doc += f"This element can be swiped directly without tapping. You can swipe vertically on " \ - f"this UI element. {doc_content['v_swipe']}\n\n" + ui_doc += ( + f"This element can be swiped directly without tapping. You can swipe vertically on " + f"this UI element. {doc_content['v_swipe']}\n\n" + ) if doc_content["h_swipe"]: - ui_doc += f"This element can be swiped directly without tapping. You can swipe horizontally on " \ - f"this UI element. {doc_content['h_swipe']}\n\n" + ui_doc += ( + f"This element can be swiped directly without tapping. You can swipe horizontally on " + f"this UI element. {doc_content['h_swipe']}\n\n" + ) return ui_doc - async def run( - self, round_count: int, task_desc: str, last_act: str, task_dir: Path, docs_dir: Path, grid_on: bool, env: AndroidEnv + self, + round_count: int, + task_desc: str, + last_act: str, + task_dir: Path, + docs_dir: Path, + grid_on: bool, + env: AndroidEnv, ): screenshot_path: Path = env.step( EnvAPIAbstract( @@ -102,7 +131,7 @@ next action. You should always prioritize these documented elements for interact return AndroidActionOutput(action_state=RunState.FAIL) prompt = node.compile(context=context, schema="json", mode="auto") - log_item = OpLogItem(step=round_count, prompt=prompt, image=screenshot_labeled_path, response=node.content) + OpLogItem(step=round_count, prompt=prompt, image=screenshot_labeled_path, response=node.content) op_param = screenshot_parse_extract(node.instruct_content.model_dump(), grid_on) if op_param.param_state == RunState.FINISH: @@ -126,7 +155,11 @@ next action. You should always prioritize these documented elements for interact return AndroidActionOutput(action_state=RunState.FAIL) elif isinstance(op_param, SwipeOp): x, y = elem_bbox_to_xy(elem_list[op_param.area - 1].bbox) - res = env.step(EnvAPIAbstract("user_swipe", kwargs={"x": x, "y": y, "orient": op_param.swipe_orient, "dist": op_param.dist})) + res = env.step( + EnvAPIAbstract( + "user_swipe", kwargs={"x": x, "y": y, "orient": op_param.swipe_orient, "dist": op_param.dist} + ) + ) if res == ADB_EXEC_FAIL: return AndroidActionOutput(action_state=RunState.FAIL) elif isinstance(op_param, GridOp): diff --git a/examples/andriod_assistant/actions/self_learn_and_reflect.py b/examples/andriod_assistant/actions/self_learn_and_reflect.py index 9ca1e4b1c..a943cd846 100644 --- a/examples/andriod_assistant/actions/self_learn_and_reflect.py +++ b/examples/andriod_assistant/actions/self_learn_and_reflect.py @@ -2,25 +2,47 @@ # -*- coding: utf-8 -*- # @Desc : LIKE scripts/self_explorer.py in stage=learn & mode=auto self_explore_task stage -from pathlib import Path import ast +from pathlib import Path from examples.andriod_assistant.actions.screenshot_parse_an import SCREENSHOT_PARSE_NODE -from examples.andriod_assistant.actions.self_learn_reflect_an import SELF_LEARN_REFLECT_NODE -from examples.andriod_assistant.prompts.assistant_prompt import ( - screenshot_parse_self_explore_template, screenshot_parse_self_explore_reflect_template as reflect_template +from examples.andriod_assistant.actions.self_learn_reflect_an import ( + SELF_LEARN_REFLECT_NODE, +) +from examples.andriod_assistant.prompts.assistant_prompt import ( + screenshot_parse_self_explore_reflect_template as reflect_template, +) +from examples.andriod_assistant.prompts.assistant_prompt import ( + screenshot_parse_self_explore_template, +) +from examples.andriod_assistant.utils.schema import ( + ActionOp, + AndroidActionOutput, + AndroidElement, + Decision, + DocContent, + LongPressOp, + OpLogItem, + ReflectLogItem, + RunState, + SwipeOp, + TapOp, + TextOp, +) +from examples.andriod_assistant.utils.utils import ( + draw_bbox_multi, + elem_bbox_to_xy, + reflect_parse_extarct, + screenshot_parse_extract, + traverse_xml_tree, ) -from examples.andriod_assistant.utils.schema import AndroidElement, OpLogItem, ReflectLogItem, RunState, TapOp, \ - TextOp, SwipeOp, LongPressOp, ActionOp, Decision, DocContent, AndroidActionOutput -from examples.andriod_assistant.utils.utils import draw_bbox_multi, traverse_xml_tree, screenshot_parse_extract, \ - elem_bbox_to_xy, reflect_parse_extarct from metagpt.actions.action import Action from metagpt.config2 import config +from metagpt.const import ADB_EXEC_FAIL from metagpt.environment.android_env.android_env import AndroidEnv from metagpt.environment.api.env_api import EnvAPIAbstract -from metagpt.utils.common import encode_image -from metagpt.const import ADB_EXEC_FAIL from metagpt.logs import logger +from metagpt.utils.common import encode_image class SelfLearnAndReflect(Action): @@ -35,12 +57,16 @@ class SelfLearnAndReflect(Action): act_name: str = "" ui_area: int = -1 - async def run(self, round_count: int, task_desc: str, last_act: str, task_dir: Path, docs_dir: Path, env: AndroidEnv) -> AndroidActionOutput: + async def run( + self, round_count: int, task_desc: str, last_act: str, task_dir: Path, docs_dir: Path, env: AndroidEnv + ) -> AndroidActionOutput: resp = self.run_self_learn(round_count, task_desc, last_act, task_dir, env) resp = self.run_reflect(round_count, task_desc, last_act, task_dir, docs_dir, env) return resp - async def run_self_learn(self, round_count: int, task_desc: str, last_act: str, task_dir: Path, env: AndroidEnv) -> AndroidActionOutput: + async def run_self_learn( + self, round_count: int, task_desc: str, last_act: str, task_dir: Path, env: AndroidEnv + ) -> AndroidActionOutput: screenshot_path: Path = env.step( EnvAPIAbstract( api_name="get_screenshot", kwargs={"ss_name": f"{round_count}_before", "local_save_dir": task_dir} @@ -89,7 +115,7 @@ class SelfLearnAndReflect(Action): if "error" in node.content: return AndroidActionOutput(action_state=RunState.FAIL) prompt = node.compile(context=context, schema="json", mode="auto") - log_item = OpLogItem(step=round_count, prompt=prompt, image=screenshot_before_labeled_path, response=node.content) + OpLogItem(step=round_count, prompt=prompt, image=screenshot_before_labeled_path, response=node.content) op_param = screenshot_parse_extract(node.instruct_content.model_dump(), grid_on=False) if op_param.param_state == RunState.FINISH: return AndroidActionOutput(action_state=RunState.FINISH) @@ -116,7 +142,11 @@ class SelfLearnAndReflect(Action): self.ui_area = op_param.area self.swipe_orient = op_param.swipe_orient x, y = elem_bbox_to_xy(elem_list[op_param.area - 1].bbox) - res = env.step(EnvAPIAbstract("user_swipe", kwargs={"x": x, "y": y, "orient": op_param.swipe_orient, "dist": op_param.dist})) + res = env.step( + EnvAPIAbstract( + "user_swipe", kwargs={"x": x, "y": y, "orient": op_param.swipe_orient, "dist": op_param.dist} + ) + ) if res == ADB_EXEC_FAIL: return AndroidActionOutput(action_state=RunState.FAIL) @@ -124,7 +154,9 @@ class SelfLearnAndReflect(Action): self.act_name = op_param.act_name return AndroidActionOutput() - async def run_reflect(self, round_count: int, task_desc: str, last_act: str, task_dir: Path, docs_dir: Path, env: AndroidEnv) -> AndroidActionOutput: + async def run_reflect( + self, round_count: int, task_desc: str, last_act: str, task_dir: Path, docs_dir: Path, env: AndroidEnv + ) -> AndroidActionOutput: screenshot_path: Path = env.step( EnvAPIAbstract( api_name="get_screenshot", kwargs={"ss_name": f"{round_count}_after", "local_save_dir": task_dir} @@ -147,15 +179,24 @@ class SelfLearnAndReflect(Action): action = "v_swipe" elif self.swipe_orient == SwipeOp.LEFT.value or self.swipe_orient == SwipeOp.RIGHT.value: action = "h_swipe" - context = reflect_template.format(action=action, ui_element=str(self.ui_area), task_desc=task_desc, last_act=last_act) - node = await SELF_LEARN_REFLECT_NODE.fill(context=context, llm=self.llm, images=[self.screenshot_before_base64, img_base64]) + context = reflect_template.format( + action=action, ui_element=str(self.ui_area), task_desc=task_desc, last_act=last_act + ) + node = await SELF_LEARN_REFLECT_NODE.fill( + context=context, llm=self.llm, images=[self.screenshot_before_base64, img_base64] + ) if "error" in node.content: return AndroidActionOutput(action_state=RunState.FAIL) prompt = node.compile(context=context, schema="json", mode="auto") - log_item = ReflectLogItem(step=round_count, prompt=prompt, image_before=self.screenshot_before_path, - image_after=screenshot_after_labeled_path, response=node.content) + ReflectLogItem( + step=round_count, + prompt=prompt, + image_before=self.screenshot_before_path, + image_after=screenshot_after_labeled_path, + response=node.content, + ) op_param = reflect_parse_extarct(node.instruct_content.model_dump()) if op_param.param_state == RunState.FINISH: @@ -163,7 +204,7 @@ class SelfLearnAndReflect(Action): if op_param.param_state == RunState.FAIL: return AndroidActionOutput(action_state=RunState.FAIL) - resource_id = self.elem_list[int(self.ui_area) -1].uid + resource_id = self.elem_list[int(self.ui_area) - 1].uid if op_param.decision == Decision.INEFFECTIVE.value: self.useless_list.append(resource_id) last_act = "NONE" # TODO global diff --git a/examples/andriod_assistant/actions/self_learn_reflect_an.py b/examples/andriod_assistant/actions/self_learn_reflect_an.py index b70c65655..305b7376a 100644 --- a/examples/andriod_assistant/actions/self_learn_reflect_an.py +++ b/examples/andriod_assistant/actions/self_learn_reflect_an.py @@ -4,28 +4,16 @@ from metagpt.actions.action_node import ActionNode - DECISION = ActionNode( - key="Decision", - expected_type=str, - instruction="explain why you made this decision", - example="BACK" + key="Decision", expected_type=str, instruction="explain why you made this decision", example="BACK" ) -THOUGHT = ActionNode( - key="Thought", - expected_type=str, - instruction="explain why you made this decision", - example="" -) +THOUGHT = ActionNode(key="Thought", expected_type=str, instruction="explain why you made this decision", example="") DOCUMENTATION = ActionNode( - key="Documentation", - expected_type=str, - instruction="describe the function of the UI element", - example="" + key="Documentation", expected_type=str, instruction="describe the function of the UI element", example="" ) diff --git a/examples/andriod_assistant/test.py b/examples/andriod_assistant/test.py index 3970daca6..c223665c4 100644 --- a/examples/andriod_assistant/test.py +++ b/examples/andriod_assistant/test.py @@ -3,36 +3,35 @@ # @Desc : test case (imgs from appagent's) -import re import ast -import json -import time import asyncio +import re from pathlib import Path +from actions.parse_record_an import RECORD_PARSE_NODE from prompts.operation_prompt import ( + long_press_doc_template, + refine_doc_suffix, + swipe_doc_template, tap_doc_template, text_doc_template, - long_press_doc_template, - swipe_doc_template, - refine_doc_suffix ) from utils.schema import ActionOp, SwipeOp -from actions.parse_record_an import RECORD_PARSE_NODE -from metagpt.config2 import config -from metagpt.utils.common import encode_image -from metagpt.logs import logger -from metagpt.actions.action import Action -TEST_BEFORE_PATH = Path( - "apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_1.png") -TEST_AFTER_PATH = Path( - "apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_2.png") +from metagpt.actions.action import Action +from metagpt.config2 import config +from metagpt.logs import logger +from metagpt.utils.common import encode_image + +TEST_BEFORE_PATH = Path("apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_1.png") +TEST_AFTER_PATH = Path("apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_2.png") RECORD_PATH = Path("apps/demo_Contacts/record.txt") TASK_DESC_PATH = Path("apps/demo_Contacts/task_desc.txt") DOCS_DIR = Path("storage") testaction = Action(name="test") + + # TODO test for parse record # 仅使用一张图像进行测试 async def manual_test(): @@ -80,26 +79,23 @@ async def manual_test(): context += refine_context logger.info( f"Documentation for the element {resource_id} already exists. The doc will be " - f"refined based on the latest demo.") + f"refined based on the latest demo." + ) else: logger.info( f"Documentation for the element {resource_id} already exists. Turn on DOC_REFINE " - f"in the config file if needed.") + f"in the config file if needed." + ) else: - doc_content = { - "tap": "", - "text": "", - "v_swipe": "", - "h_swipe": "", - "long_press": "" - } + doc_content = {"tap": "", "text": "", "v_swipe": "", "h_swipe": "", "long_press": ""} logger.info(f"Waiting for GPT-4V to generate documentation for the element {resource_id}") - node = await RECORD_PARSE_NODE.fill(context=context, llm=testaction.llm, - images=[img_before_base64, img_after_base64]) + node = await RECORD_PARSE_NODE.fill( + context=context, llm=testaction.llm, images=[img_before_base64, img_after_base64] + ) # log_path = task_dir.joinpath(f"log_{app_name}_{demo_name}.txt") - prompt = node.compile(context=context, schema="json", mode="auto") + node.compile(context=context, schema="json", mode="auto") msg = node.content doc_content[action_type] = msg @@ -107,6 +103,7 @@ async def manual_test(): outfile.write(str(doc_content)) logger.info(f"Documentation generated and saved to {doc_path}") + if __name__ == "__main__": loop = asyncio.get_event_loop() loop.run_until_complete(manual_test())