mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-08 15:05:17 +02:00
UPdate ScreenShot Parse Action Node;Adds 4 action nodes test to test_for_an.py
This commit is contained in:
parent
13975976d5
commit
26bbdef6c6
3 changed files with 86 additions and 83 deletions
|
|
@ -19,13 +19,14 @@ from examples.andriod_assistant.utils.schema import (
|
|||
OpLogItem,
|
||||
RunState,
|
||||
SwipeGridOp,
|
||||
SwipeOp,
|
||||
SwipeOp_3,
|
||||
TapGridOp,
|
||||
TapOp,
|
||||
TextOp,
|
||||
)
|
||||
from examples.andriod_assistant.utils.utils import (
|
||||
area_to_xy,
|
||||
draw_grid,
|
||||
draw_bbox_multi,
|
||||
elem_bbox_to_xy,
|
||||
screenshot_parse_extract,
|
||||
|
|
@ -47,9 +48,9 @@ class ScreenshotParse(Action):
|
|||
return ""
|
||||
|
||||
ui_doc = """
|
||||
You also have access to the following documentations that describes the functionalities of UI
|
||||
elements you can interact on the screen. These docs are crucial for you to determine the target of your
|
||||
next action. You should always prioritize these documented elements for interaction:"""
|
||||
You also have access to the following documentations that describes the functionalities of UI
|
||||
elements you can interact on the screen. These docs are crucial for you to determine the target of your
|
||||
next action. You should always prioritize these documented elements for interaction:"""
|
||||
for i, elem in enumerate(elem_list):
|
||||
doc_path = docs_idr.joinpath(f"{elem.uid}.txt")
|
||||
if not doc_path.exists():
|
||||
|
|
@ -78,23 +79,32 @@ next action. You should always prioritize these documented elements for interact
|
|||
return ui_doc
|
||||
|
||||
async def run(
|
||||
self,
|
||||
round_count: int,
|
||||
task_desc: str,
|
||||
last_act: str,
|
||||
task_dir: Path,
|
||||
docs_dir: Path,
|
||||
grid_on: bool,
|
||||
env: AndroidEnv,
|
||||
self,
|
||||
round_count: int,
|
||||
task_desc: str,
|
||||
last_act: str,
|
||||
task_dir: Path,
|
||||
docs_dir: Path,
|
||||
grid_on: bool,
|
||||
env: AndroidEnv,
|
||||
):
|
||||
screenshot_path: Path = env.step(
|
||||
for path in [task_dir, docs_dir]:
|
||||
if not path.exists():
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
screenshot_path: Path = env.observe(
|
||||
EnvAPIAbstract(
|
||||
api_name="get_screenshot", kwargs={"ss_name": f"{round_count}_before", "local_save_dir": task_dir}
|
||||
api_name="get_screenshot",
|
||||
kwargs={"ss_name": f"{round_count}_before", "local_save_dir": task_dir}
|
||||
)
|
||||
)
|
||||
xml_path: Path = env.step(
|
||||
EnvAPIAbstract(api_name="get_xml", kwargs={"xml_name": f"{round_count}", "local_save_dir": task_dir})
|
||||
xml_path: Path = env.observe(
|
||||
EnvAPIAbstract(
|
||||
api_name="get_xml",
|
||||
kwargs={"xml_name": f"{round_count}", "local_save_dir": task_dir}
|
||||
)
|
||||
)
|
||||
width, height = env.device_shape
|
||||
if not screenshot_path.exists() or not xml_path.exists():
|
||||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
|
||||
|
|
@ -111,18 +121,23 @@ next action. You should always prioritize these documented elements for interact
|
|||
bbox = e.bbox
|
||||
center_ = (bbox[0][0] + bbox[1][0]) // 2, (bbox[0][1] + bbox[1][1]) // 2
|
||||
dist = (abs(center[0] - center_[0]) ** 2 + abs(center[1] - center_[1]) ** 2) ** 0.5
|
||||
if dist <= config.get_other("min_dist"):
|
||||
# TODO Modify config to default 30. It should be modified back config after single action test
|
||||
# if dist <= config.get_other("min_dist"):
|
||||
if dist <= 30:
|
||||
close = True
|
||||
break
|
||||
if not close:
|
||||
elem_list.append(elem)
|
||||
|
||||
screenshot_labeled_path = task_dir.joinpath(f"{task_dir}_{round_count}_labeled.png")
|
||||
screenshot_labeled_path = task_dir.joinpath(f"{round_count}_labeled.png")
|
||||
draw_bbox_multi(screenshot_path, screenshot_labeled_path, elem_list)
|
||||
img_base64 = encode_image(screenshot_labeled_path)
|
||||
|
||||
parse_template = screenshot_parse_with_grid_template if grid_on else screenshot_parse_template
|
||||
|
||||
if grid_on:
|
||||
rows, cols = draw_grid(screenshot_path, task_dir / f"{round_count}_grid.png")
|
||||
|
||||
ui_doc = self._makeup_ui_document(elem_list, docs_dir)
|
||||
context = parse_template.format(ui_document=ui_doc, task_description=task_desc, last_act=last_act)
|
||||
node = await SCREENSHOT_PARSE_NODE.fill(context=context, llm=self.llm, images=[img_base64])
|
||||
|
|
@ -131,7 +146,7 @@ next action. You should always prioritize these documented elements for interact
|
|||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
|
||||
prompt = node.compile(context=context, schema="json", mode="auto")
|
||||
OpLogItem(step=round_count, prompt=prompt, image=screenshot_labeled_path, response=node.content)
|
||||
OpLogItem(step=round_count, prompt=prompt, image=str(screenshot_labeled_path), response=node.content)
|
||||
|
||||
op_param = screenshot_parse_extract(node.instruct_content.model_dump(), grid_on)
|
||||
if op_param.param_state == RunState.FINISH:
|
||||
|
|
@ -141,23 +156,24 @@ next action. You should always prioritize these documented elements for interact
|
|||
|
||||
if isinstance(op_param, TapOp):
|
||||
x, y = elem_bbox_to_xy(elem_list[op_param.area - 1].bbox)
|
||||
res = env.step(EnvAPIAbstract("system_tap", kwargs={"x": x, "y": y}))
|
||||
res = env.step(EnvAPIAbstract(api_name="system_tap", kwargs={"x": x, "y": y}))
|
||||
if res == ADB_EXEC_FAIL:
|
||||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
elif isinstance(op_param, TextOp):
|
||||
res = env.step(EnvAPIAbstract("user_input", kwargs={"input_txt": op_param.input_str}))
|
||||
res = env.step(EnvAPIAbstract(api_name="user_input", kwargs={"input_txt": op_param.input_str}))
|
||||
if res == ADB_EXEC_FAIL:
|
||||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
elif isinstance(op_param, LongPressOp):
|
||||
x, y = elem_bbox_to_xy(elem_list[op_param.area - 1].bbox)
|
||||
res = env.step(EnvAPIAbstract("user_longpress", kwargs={"x": x, "y": y}))
|
||||
res = env.step(EnvAPIAbstract(api_name="user_longpress", kwargs={"x": x, "y": y}))
|
||||
if res == ADB_EXEC_FAIL:
|
||||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
elif isinstance(op_param, SwipeOp):
|
||||
elif isinstance(op_param, SwipeOp_3):
|
||||
x, y = elem_bbox_to_xy(elem_list[op_param.area - 1].bbox)
|
||||
res = env.step(
|
||||
EnvAPIAbstract(
|
||||
"user_swipe", kwargs={"x": x, "y": y, "orient": op_param.swipe_orient, "dist": op_param.dist}
|
||||
api_name="user_swipe",
|
||||
kwargs={"x": x, "y": y, "orient": op_param.swipe_orient, "dist": op_param.dist}
|
||||
)
|
||||
)
|
||||
if res == ADB_EXEC_FAIL:
|
||||
|
|
@ -167,18 +183,19 @@ next action. You should always prioritize these documented elements for interact
|
|||
elif isinstance(op_param, TapGridOp) or isinstance(op_param, LongPressGridOp):
|
||||
x, y = area_to_xy(op_param.area, op_param.subarea, env.width, env.height, env.rows, env.cols)
|
||||
if isinstance(op_param, TapGridOp):
|
||||
res = env.step(EnvAPIAbstract("system_tap", kwargs={"x": x, "y": y}))
|
||||
res = env.step(EnvAPIAbstract(api_name="system_tap", kwargs={"x": x, "y": y}))
|
||||
if res == ADB_EXEC_FAIL:
|
||||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
else:
|
||||
# LongPressGridOp
|
||||
res = env.step(EnvAPIAbstract("user_longpress", kwargs={"x": x, "y": y}))
|
||||
res = env.step(EnvAPIAbstract(api_name="user_longpress", kwargs={"x": x, "y": y}))
|
||||
if res == ADB_EXEC_FAIL:
|
||||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
elif isinstance(op_param, SwipeGridOp):
|
||||
start_x, start_y = area_to_xy(op_param.start_area, op_param.start_subarea)
|
||||
end_x, end_y = area_to_xy(op_param.end_area, op_param.end_subarea)
|
||||
res = env.step(EnvAPIAbstract("user_swipe_to", kwargs={"start": (start_x, start_y), "end": (end_x, end_y)}))
|
||||
start_x, start_y = area_to_xy(op_param.start_area, op_param.start_subarea, width, height, rows, cols)
|
||||
end_x, end_y = area_to_xy(op_param.end_area, op_param.end_subarea, width, height, rows, cols)
|
||||
res = env.step(
|
||||
EnvAPIAbstract(api_name="user_swipe_to", kwargs={"start": (start_x, start_y), "end": (end_x, end_y)}))
|
||||
if res == ADB_EXEC_FAIL:
|
||||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from pathlib import Path
|
|||
from actions.manual_record import ManualRecord
|
||||
from actions.parse_record import ParseRecord
|
||||
from actions.self_learn_and_reflect import SelfLearnAndReflect
|
||||
from actions.screenshot_parse import ScreenshotParse
|
||||
from metagpt.environment.android_env.android_env import AndroidEnv
|
||||
|
||||
TASK_PATH = Path("apps/Contacts")
|
||||
|
|
@ -29,64 +30,49 @@ test_env_manual_learn_android = AndroidEnv(
|
|||
test_manual_record = ManualRecord()
|
||||
test_manual_parse = ParseRecord()
|
||||
|
||||
# 虚拟机效果实现
|
||||
# 不同 Action Node 结果符合预期(Action Node)
|
||||
test_env_screenshot_parse_android = AndroidEnv(
|
||||
device_id="emulator-5554",
|
||||
xml_dir=Path("/sdcard"),
|
||||
screenshot_dir=Path("/sdcard/Pictures/Screenshots"),
|
||||
)
|
||||
test_screenshot_parse = ScreenshotParse()
|
||||
|
||||
if __name__ == "__main__":
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(
|
||||
|
||||
test_action_list = [
|
||||
test_self_learning.run(
|
||||
round_count=20,
|
||||
task_desc="Create a contact in Contacts App named zjy with a phone number +86 18831933368 ",
|
||||
last_act="",
|
||||
task_dir=TASK_PATH / "demos" / f"self_learning_{DEMO_NAME}",
|
||||
docs_dir=SELF_EXPLORE_DOC_PATH,
|
||||
env=test_env_self_learn_android
|
||||
),
|
||||
test_manual_record.run(
|
||||
demo_name=DEMO_NAME,
|
||||
task_dir=TASK_PATH / "demos" / f"manual_record_{DEMO_NAME}",
|
||||
task_desc="Create a contact in Contacts App named zjy with a phone number +86 18831933368 ",
|
||||
env=test_env_manual_learn_android
|
||||
),
|
||||
test_manual_parse.run(
|
||||
app_name="Contacts",
|
||||
demo_name="1708753998.5757847",
|
||||
task_dir=TASK_PATH / "demos" / f"manual_record_1708753998.5757847", # 修要修改
|
||||
docs_dir=PARSE_RECORD_DOC_PATH, # 需要修改
|
||||
demo_name=DEMO_NAME,
|
||||
task_dir=TASK_PATH / "demos" / f"manual_record_{DEMO_NAME}", # 修要修改
|
||||
docs_dir=PARSE_RECORD_DOC_PATH, # 需要修改
|
||||
env=test_env_manual_learn_android
|
||||
))
|
||||
# test_action_list = [
|
||||
# # test_self_learning.run(
|
||||
# # round_count=20,
|
||||
# # task_desc="Create a contact in Contacts App named zjy with a phone number +86 18831933368 ",
|
||||
# # last_act="",
|
||||
# # task_dir= TASK_PATH / "demos" / f"self_learning_{DEMO_NAME}",
|
||||
# # docs_dir=DOC_PATH,
|
||||
# # env=test_env_self_learn_android
|
||||
# # ),
|
||||
# test_manual_record.run(
|
||||
# demo_name=DEMO_NAME,
|
||||
# task_dir=TASK_PATH / "demos" / f"manual_record_{DEMO_NAME}",
|
||||
# task_desc="Create a contact in Contacts App named zjy with a phone number +86 18831933368 ",
|
||||
# env=test_env_manual_learn_android
|
||||
# ),
|
||||
# test_manual_parse.run(
|
||||
# app_name="Contacts",
|
||||
# demo_name=DEMO_NAME,
|
||||
# task_dir=TASK_PATH / "demos" / f"manual_record_{DEMO_NAME}", # 修要修改
|
||||
# docs_dir=PARSE_RECORD_DOC_PATH, # 需要修改
|
||||
# env=test_env_manual_learn_android
|
||||
# )
|
||||
# ]
|
||||
# test_action_list = [
|
||||
# test_self_learning.run(
|
||||
# round_count=20,
|
||||
# task_desc="Create a contact in Contacts App named zjy with a phone number +86 18831933368 ",
|
||||
# last_act="",
|
||||
# task_dir=TASK_PATH,
|
||||
# docs_dir=DOC_PATH,
|
||||
# env=test_env_self_learn_android
|
||||
# ),
|
||||
# test_manual_record.run(
|
||||
# demo_name=DEMO_NAME,
|
||||
# task_dir=TASK_PATH,
|
||||
# env=test_env_manual_learn_android
|
||||
# ),
|
||||
# test_manual_parse.run(
|
||||
# app_name="Contacts",
|
||||
# demo_name=DEMO_NAME,
|
||||
# task_dir=TASK_PATH,
|
||||
# docs_dir=DOC_PATH,
|
||||
# env=test_env_manual_learn_android
|
||||
# )
|
||||
# ]
|
||||
# loop.run_until_complete(asyncio.gather(*test_action_list))
|
||||
),
|
||||
test_screenshot_parse.run(
|
||||
round_count=20,
|
||||
task_desc="Create a contact in Contacts App named zjy with a phone number +86 18831933368 ",
|
||||
last_act="",
|
||||
task_dir=TASK_PATH / f"act_{DEMO_NAME}",
|
||||
docs_dir=PARSE_RECORD_DOC_PATH,
|
||||
env=test_env_screenshot_parse_android,
|
||||
grid_on=False
|
||||
)
|
||||
]
|
||||
|
||||
loop.run_until_complete(asyncio.gather(*test_action_list))
|
||||
loop.close()
|
||||
print("Finish")
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ def area_to_xy(area: int, subarea: str, width: int, height: int, rows: int, cols
|
|||
return x, y
|
||||
|
||||
|
||||
def elem_bbox_to_xy(bbox: tuple[tuple[int, int]]) -> tuple[int, int]:
|
||||
def elem_bbox_to_xy(bbox: tuple[tuple[int, int], tuple[int, int]]) -> tuple[int, int]:
|
||||
tl, br = bbox
|
||||
x, y = (tl[0] + br[0]) // 2, (tl[1] + br[1]) // 2
|
||||
return x, y
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue