From 058c79a860ed80828a81e67a8933873caf6ad7bb Mon Sep 17 00:00:00 2001 From: didi <2020201387@ruc.edu.cn> Date: Tue, 30 Jan 2024 23:41:20 +0800 Subject: [PATCH] Update Manual Test --- ...android.widget.TextView_183_204_Apps_2.txt | 1 + examples/andriod_assistant/test.py | 144 +++++++++--------- metagpt/actions/action_node.py | 2 + 3 files changed, 79 insertions(+), 68 deletions(-) create mode 100644 examples/andriod_assistant/storage/android.view.ViewGroup_1067_236_android.widget.TextView_183_204_Apps_2.txt diff --git a/examples/andriod_assistant/storage/android.view.ViewGroup_1067_236_android.widget.TextView_183_204_Apps_2.txt b/examples/andriod_assistant/storage/android.view.ViewGroup_1067_236_android.widget.TextView_183_204_Apps_2.txt new file mode 100644 index 000000000..19b670ea7 --- /dev/null +++ b/examples/andriod_assistant/storage/android.view.ViewGroup_1067_236_android.widget.TextView_183_204_Apps_2.txt @@ -0,0 +1 @@ +{'tap': '[CONTENT]\n{\n "Observation": "The first image shows a mobile device\'s home screen with various app icons and a Google search bar at the top. The second image displays an app drawer with a grid of apps and a search bar at the top, indicating that the UI element has been tapped.",\n "Thought": "Tapping the UI element opens the app drawer, which is a common function in mobile operating systems to access a list of all installed apps.",\n "Description": "Tapping this area will open the app drawer, displaying a list of all installed applications."\n}\n[/CONTENT]', 'text': '', 'v_swipe': '', 'h_swipe': '', 'long_press': ''} \ No newline at end of file diff --git a/examples/andriod_assistant/test.py b/examples/andriod_assistant/test.py index 3210adb46..3970daca6 100644 --- a/examples/andriod_assistant/test.py +++ b/examples/andriod_assistant/test.py @@ -7,6 +7,7 @@ import re import ast import json import time +import asyncio from pathlib import Path from prompts.operation_prompt import ( @@ -24,83 +25,90 @@ from metagpt.logs import logger from metagpt.actions.action import Action TEST_BEFORE_PATH = Path( - "examples/andriod_assistant/apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_1.png") + "apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_1.png") TEST_AFTER_PATH = Path( - "examples/andriod_assistant/apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_2.png") -RECORD_PATH = Path("examples/andriod_assistant/apps/demo_Contacts/record.txt") -TASK_DESC_PATH = Path("examples/andriod_assistant/apps/demo_Contacts/task_desc.txt") -DOCS_DIR = Path("examples/andriod_assistant/storage") + "apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_2.png") +RECORD_PATH = Path("apps/demo_Contacts/record.txt") +TASK_DESC_PATH = Path("apps/demo_Contacts/task_desc.txt") +DOCS_DIR = Path("storage") testaction = Action(name="test") # TODO test for parse record # 仅使用一张图像进行测试 -img_before_base64 = encode_image(TEST_BEFORE_PATH) -img_after_base64 = encode_image(TEST_AFTER_PATH) +async def manual_test(): + img_before_base64 = encode_image(TEST_BEFORE_PATH) + img_after_base64 = encode_image(TEST_AFTER_PATH) -with open(RECORD_PATH, "r") as record_file: - rec = record_file.readline().strip() - action, resource_id = rec.split(":::") - action_type = action.split("(")[0] - action_param = re.findall(r"\((.*?)\)", action)[0] - if action_type == ActionOp.TAP.value: - prompt_template = tap_doc_template - context = prompt_template.format(ui_element=action_param) - elif action_type == ActionOp.TEXT.value: - input_area, input_text = action_param.split(":sep:") - prompt_template = text_doc_template - context = prompt_template.format(ui_element=input_area) - elif action_type == ActionOp.LONG_PRESS.value: - prompt_template = long_press_doc_template - context = prompt_template.format(ui_element=action_param) - elif action_type == ActionOp.SWIPE.value: - swipe_area, swipe_dir = action_param.split(":sep:") - if swipe_dir == SwipeOp.UP.value or swipe_dir == SwipeOp.DOWN.value: - action_type = ActionOp.VERTICAL_SWIPE.value - elif swipe_dir == SwipeOp.LEFT.value or swipe_dir == SwipeOp.RIGHT.value: - action_type = ActionOp.HORIZONTAL_SWIPE.value - prompt_template = swipe_doc_template - context = prompt_template.format(swipe_dir=swipe_dir, ui_element=swipe_area) - else: - print("Error occurs") + with open(RECORD_PATH, "r") as record_file: + rec = record_file.readline().strip() + action, resource_id = rec.split(":::") + action_type = action.split("(")[0] + action_param = re.findall(r"\((.*?)\)", action)[0] + if action_type == ActionOp.TAP.value: + prompt_template = tap_doc_template + context = prompt_template.format(ui_element=action_param) + elif action_type == ActionOp.TEXT.value: + input_area, input_text = action_param.split(":sep:") + prompt_template = text_doc_template + context = prompt_template.format(ui_element=input_area) + elif action_type == ActionOp.LONG_PRESS.value: + prompt_template = long_press_doc_template + context = prompt_template.format(ui_element=action_param) + elif action_type == ActionOp.SWIPE.value: + swipe_area, swipe_dir = action_param.split(":sep:") + if swipe_dir == SwipeOp.UP.value or swipe_dir == SwipeOp.DOWN.value: + action_type = ActionOp.VERTICAL_SWIPE.value + elif swipe_dir == SwipeOp.LEFT.value or swipe_dir == SwipeOp.RIGHT.value: + action_type = ActionOp.HORIZONTAL_SWIPE.value + prompt_template = swipe_doc_template + context = prompt_template.format(swipe_dir=swipe_dir, ui_element=swipe_area) + else: + print("Error occurs") - task_desc_path = TASK_DESC_PATH - task_desc = open(task_desc_path, "r").read() - context = context.format(task_desc=task_desc) + task_desc_path = TASK_DESC_PATH + task_desc = open(task_desc_path, "r").read() + context = context.format(task_desc=task_desc) - doc_name = resource_id + ".txt" + doc_name = resource_id + ".txt" - doc_path = DOCS_DIR.joinpath(doc_name) - if doc_path.exists(): - doc_content = ast.literal_eval(open(doc_path).read()) - if doc_content[action_type]: - if config.get_other("doc_refine"): - refine_context = refine_doc_suffix.format(old_doc=doc_content[action_type]) - context += refine_context - logger.info( - f"Documentation for the element {resource_id} already exists. The doc will be " - f"refined based on the latest demo.") - else: - logger.info( - f"Documentation for the element {resource_id} already exists. Turn on DOC_REFINE " - f"in the config file if needed.") - else: - doc_content = { - "tap": "", - "text": "", - "v_swipe": "", - "h_swipe": "", - "long_press": "" - } - logger.info(f"Waiting for GPT-4V to generate documentation for the element {resource_id}") + doc_path = DOCS_DIR.joinpath(doc_name) + if doc_path.exists(): + doc_content = ast.literal_eval(open(doc_path).read()) + if doc_content[action_type]: + if config.get_other("doc_refine"): + refine_context = refine_doc_suffix.format(old_doc=doc_content[action_type]) + context += refine_context + logger.info( + f"Documentation for the element {resource_id} already exists. The doc will be " + f"refined based on the latest demo.") + else: + logger.info( + f"Documentation for the element {resource_id} already exists. Turn on DOC_REFINE " + f"in the config file if needed.") + else: + doc_content = { + "tap": "", + "text": "", + "v_swipe": "", + "h_swipe": "", + "long_press": "" + } + logger.info(f"Waiting for GPT-4V to generate documentation for the element {resource_id}") - node = RECORD_PARSE_NODE.fill(context=context, llm=testaction.llm, - images=[img_before_base64, img_after_base64]) + node = await RECORD_PARSE_NODE.fill(context=context, llm=testaction.llm, + images=[img_before_base64, img_after_base64]) - # log_path = task_dir.joinpath(f"log_{app_name}_{demo_name}.txt") - prompt = node.compile(context=context, schema="json", mode="auto") - msg = node.content - doc_content[action_type] = msg + # log_path = task_dir.joinpath(f"log_{app_name}_{demo_name}.txt") + prompt = node.compile(context=context, schema="json", mode="auto") + msg = node.content + doc_content[action_type] = msg - with open(doc_path, "w") as outfile: - outfile.write(str(doc_content)) - logger.info(f"Documentation generated and saved to {doc_path}") + with open(doc_path, "w") as outfile: + outfile.write(str(doc_content)) + logger.info(f"Documentation generated and saved to {doc_path}") + +if __name__ == "__main__": + loop = asyncio.get_event_loop() + loop.run_until_complete(manual_test()) + loop.close() + print("OK") diff --git a/metagpt/actions/action_node.py b/metagpt/actions/action_node.py index 31e4cc0fc..3efb9e113 100644 --- a/metagpt/actions/action_node.py +++ b/metagpt/actions/action_node.py @@ -461,6 +461,7 @@ class ActionNode: self.instruct_content = scontent else: self.content = await self.llm.aask(prompt) + logger.info(self.content) self.instruct_content = None return self @@ -476,6 +477,7 @@ class ActionNode: timeout=USE_CONFIG_TIMEOUT, exclude=[], ): + logger.info("进入fill") """Fill the node(s) with mode. :param context: Everything we should know when filling node.