Update Manual Test

This commit is contained in:
didi 2024-01-30 23:41:20 +08:00 committed by better629
parent 7e3a7bdbc2
commit 058c79a860
3 changed files with 79 additions and 68 deletions

View file

@ -0,0 +1 @@
{'tap': '[CONTENT]\n{\n "Observation": "The first image shows a mobile device\'s home screen with various app icons and a Google search bar at the top. The second image displays an app drawer with a grid of apps and a search bar at the top, indicating that the UI element has been tapped.",\n "Thought": "Tapping the UI element opens the app drawer, which is a common function in mobile operating systems to access a list of all installed apps.",\n "Description": "Tapping this area will open the app drawer, displaying a list of all installed applications."\n}\n[/CONTENT]', 'text': '', 'v_swipe': '', 'h_swipe': '', 'long_press': ''}

View file

@ -7,6 +7,7 @@ import re
import ast
import json
import time
import asyncio
from pathlib import Path
from prompts.operation_prompt import (
@ -24,83 +25,90 @@ from metagpt.logs import logger
from metagpt.actions.action import Action
TEST_BEFORE_PATH = Path(
"examples/andriod_assistant/apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_1.png")
"apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_1.png")
TEST_AFTER_PATH = Path(
"examples/andriod_assistant/apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_2.png")
RECORD_PATH = Path("examples/andriod_assistant/apps/demo_Contacts/record.txt")
TASK_DESC_PATH = Path("examples/andriod_assistant/apps/demo_Contacts/task_desc.txt")
DOCS_DIR = Path("examples/andriod_assistant/storage")
"apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_2.png")
RECORD_PATH = Path("apps/demo_Contacts/record.txt")
TASK_DESC_PATH = Path("apps/demo_Contacts/task_desc.txt")
DOCS_DIR = Path("storage")
testaction = Action(name="test")
# TODO test for parse record
# 仅使用一张图像进行测试
img_before_base64 = encode_image(TEST_BEFORE_PATH)
img_after_base64 = encode_image(TEST_AFTER_PATH)
async def manual_test():
img_before_base64 = encode_image(TEST_BEFORE_PATH)
img_after_base64 = encode_image(TEST_AFTER_PATH)
with open(RECORD_PATH, "r") as record_file:
rec = record_file.readline().strip()
action, resource_id = rec.split(":::")
action_type = action.split("(")[0]
action_param = re.findall(r"\((.*?)\)", action)[0]
if action_type == ActionOp.TAP.value:
prompt_template = tap_doc_template
context = prompt_template.format(ui_element=action_param)
elif action_type == ActionOp.TEXT.value:
input_area, input_text = action_param.split(":sep:")
prompt_template = text_doc_template
context = prompt_template.format(ui_element=input_area)
elif action_type == ActionOp.LONG_PRESS.value:
prompt_template = long_press_doc_template
context = prompt_template.format(ui_element=action_param)
elif action_type == ActionOp.SWIPE.value:
swipe_area, swipe_dir = action_param.split(":sep:")
if swipe_dir == SwipeOp.UP.value or swipe_dir == SwipeOp.DOWN.value:
action_type = ActionOp.VERTICAL_SWIPE.value
elif swipe_dir == SwipeOp.LEFT.value or swipe_dir == SwipeOp.RIGHT.value:
action_type = ActionOp.HORIZONTAL_SWIPE.value
prompt_template = swipe_doc_template
context = prompt_template.format(swipe_dir=swipe_dir, ui_element=swipe_area)
else:
print("Error occurs")
with open(RECORD_PATH, "r") as record_file:
rec = record_file.readline().strip()
action, resource_id = rec.split(":::")
action_type = action.split("(")[0]
action_param = re.findall(r"\((.*?)\)", action)[0]
if action_type == ActionOp.TAP.value:
prompt_template = tap_doc_template
context = prompt_template.format(ui_element=action_param)
elif action_type == ActionOp.TEXT.value:
input_area, input_text = action_param.split(":sep:")
prompt_template = text_doc_template
context = prompt_template.format(ui_element=input_area)
elif action_type == ActionOp.LONG_PRESS.value:
prompt_template = long_press_doc_template
context = prompt_template.format(ui_element=action_param)
elif action_type == ActionOp.SWIPE.value:
swipe_area, swipe_dir = action_param.split(":sep:")
if swipe_dir == SwipeOp.UP.value or swipe_dir == SwipeOp.DOWN.value:
action_type = ActionOp.VERTICAL_SWIPE.value
elif swipe_dir == SwipeOp.LEFT.value or swipe_dir == SwipeOp.RIGHT.value:
action_type = ActionOp.HORIZONTAL_SWIPE.value
prompt_template = swipe_doc_template
context = prompt_template.format(swipe_dir=swipe_dir, ui_element=swipe_area)
else:
print("Error occurs")
task_desc_path = TASK_DESC_PATH
task_desc = open(task_desc_path, "r").read()
context = context.format(task_desc=task_desc)
task_desc_path = TASK_DESC_PATH
task_desc = open(task_desc_path, "r").read()
context = context.format(task_desc=task_desc)
doc_name = resource_id + ".txt"
doc_name = resource_id + ".txt"
doc_path = DOCS_DIR.joinpath(doc_name)
if doc_path.exists():
doc_content = ast.literal_eval(open(doc_path).read())
if doc_content[action_type]:
if config.get_other("doc_refine"):
refine_context = refine_doc_suffix.format(old_doc=doc_content[action_type])
context += refine_context
logger.info(
f"Documentation for the element {resource_id} already exists. The doc will be "
f"refined based on the latest demo.")
else:
logger.info(
f"Documentation for the element {resource_id} already exists. Turn on DOC_REFINE "
f"in the config file if needed.")
else:
doc_content = {
"tap": "",
"text": "",
"v_swipe": "",
"h_swipe": "",
"long_press": ""
}
logger.info(f"Waiting for GPT-4V to generate documentation for the element {resource_id}")
doc_path = DOCS_DIR.joinpath(doc_name)
if doc_path.exists():
doc_content = ast.literal_eval(open(doc_path).read())
if doc_content[action_type]:
if config.get_other("doc_refine"):
refine_context = refine_doc_suffix.format(old_doc=doc_content[action_type])
context += refine_context
logger.info(
f"Documentation for the element {resource_id} already exists. The doc will be "
f"refined based on the latest demo.")
else:
logger.info(
f"Documentation for the element {resource_id} already exists. Turn on DOC_REFINE "
f"in the config file if needed.")
else:
doc_content = {
"tap": "",
"text": "",
"v_swipe": "",
"h_swipe": "",
"long_press": ""
}
logger.info(f"Waiting for GPT-4V to generate documentation for the element {resource_id}")
node = RECORD_PARSE_NODE.fill(context=context, llm=testaction.llm,
images=[img_before_base64, img_after_base64])
node = await RECORD_PARSE_NODE.fill(context=context, llm=testaction.llm,
images=[img_before_base64, img_after_base64])
# log_path = task_dir.joinpath(f"log_{app_name}_{demo_name}.txt")
prompt = node.compile(context=context, schema="json", mode="auto")
msg = node.content
doc_content[action_type] = msg
# log_path = task_dir.joinpath(f"log_{app_name}_{demo_name}.txt")
prompt = node.compile(context=context, schema="json", mode="auto")
msg = node.content
doc_content[action_type] = msg
with open(doc_path, "w") as outfile:
outfile.write(str(doc_content))
logger.info(f"Documentation generated and saved to {doc_path}")
with open(doc_path, "w") as outfile:
outfile.write(str(doc_content))
logger.info(f"Documentation generated and saved to {doc_path}")
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(manual_test())
loop.close()
print("OK")

View file

@ -461,6 +461,7 @@ class ActionNode:
self.instruct_content = scontent
else:
self.content = await self.llm.aask(prompt)
logger.info(self.content)
self.instruct_content = None
return self
@ -476,6 +477,7 @@ class ActionNode:
timeout=USE_CONFIG_TIMEOUT,
exclude=[],
):
logger.info("进入fill")
"""Fill the node(s) with mode.
:param context: Everything we should know when filling node.