mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-02 14:45:17 +02:00
Add manual_record module & ActionOp.STOP
Questions about Android Env & EnvAPIAbstract have been annotated in codes
This commit is contained in:
parent
a7a84e4933
commit
42425ef403
2 changed files with 142 additions and 2 deletions
|
|
@ -1,8 +1,21 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Desc : manual record user interaction in stage=learn & mode=manual, LIKE scripts/step_recorder.py
|
||||
import cv2
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from examples.andriod_assistant.utils.schema import OpLogItem, ActionOp, ParamExtState, GridOp, ActionOp, TapOp, \
|
||||
TapGridOp, \
|
||||
LongPressOp, LongPressGridOp, SwipeOp, SwipeGridOp, TextOp, AndroidElement
|
||||
from examples.andriod_assistant.utils.utils import draw_bbox_multi, traverse_xml_tree
|
||||
from metagpt.actions.action import Action
|
||||
from metagpt.logs import logger
|
||||
from metagpt.config2 import config
|
||||
from metagpt.environment.android_env.android_env import AndroidEnv
|
||||
from metagpt.environment.api.env_api import EnvAPIAbstract
|
||||
from metagpt.const import ADB_EXEC_FAIL
|
||||
|
||||
|
||||
class ManualRecord(Action):
|
||||
|
|
@ -10,5 +23,131 @@ class ManualRecord(Action):
|
|||
|
||||
name: str = "ManualRecord"
|
||||
|
||||
async def run(self):
|
||||
pass
|
||||
async def run(
|
||||
self, demo_name: str, task_desc: str, task_dir: Path, env: AndroidEnv,
|
||||
grid_on: bool = False
|
||||
):
|
||||
|
||||
# Question 这里是将通过ADB获取的东西存到本地的路径的吧
|
||||
screenshot_path: Path = env.step(
|
||||
EnvAPIAbstract(
|
||||
api_name="get_screenshot", kwargs={"ss_name": f"{demo_name}", "local_save_dir": task_dir}
|
||||
)
|
||||
)
|
||||
xml_path: Path = env.step(
|
||||
EnvAPIAbstract(api_name="get_xml", kwargs={"xml_name": f"{demo_name}", "local_save_dir": task_dir})
|
||||
)
|
||||
if not screenshot_path.exists() or not xml_path.exists():
|
||||
# TODO exit
|
||||
return
|
||||
step = 0
|
||||
# Question 直接使用 OS 构建路径合适吗?
|
||||
record_path = os.path.join(task_dir, "record.txt")
|
||||
record_file = open(record_path, "w")
|
||||
while True:
|
||||
step += 1
|
||||
clickable_list = []
|
||||
focusable_list = []
|
||||
traverse_xml_tree(xml_path, clickable_list, "clickable", True)
|
||||
traverse_xml_tree(xml_path, focusable_list, "focusable", True)
|
||||
elem_list: list[AndroidElement] = clickable_list.copy()
|
||||
|
||||
for elem in focusable_list:
|
||||
bbox = elem.bbox
|
||||
center = (bbox[0][0] + bbox[1][0]) // 2, (bbox[0][1] + bbox[1][1]) // 2
|
||||
close = False
|
||||
for e in clickable_list:
|
||||
bbox = e.bbox
|
||||
center_ = (bbox[0][0] + bbox[1][0]) // 2, (bbox[0][1] + bbox[1][1]) // 2
|
||||
dist = (abs(center[0] - center_[0]) ** 2 + abs(center[1] - center_[1]) ** 2) ** 0.5
|
||||
if dist <= config.get_other("min_dist"):
|
||||
close = True
|
||||
break
|
||||
if not close:
|
||||
elem_list.append(elem)
|
||||
screenshot_labeled_path = task_dir.joinpath(f"{task_dir}_{step}_labeled.png")
|
||||
labeled_img = draw_bbox_multi(screenshot_path, screenshot_labeled_path, elem_list)
|
||||
|
||||
cv2.imshow("image", labeled_img)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
user_input = "xxx"
|
||||
logger.info(
|
||||
"Choose one of the following actions you want to perform on the current screen:\ntap, text, long "
|
||||
"press, swipe, stop", "blue")
|
||||
|
||||
while user_input.lower() != ActionOp.TAP.value and user_input.lower() != ActionOp.TEXT.value and user_input.lower() != ActionOp.LONG_PRESS.value \
|
||||
and user_input.lower() != ActionOp.SWIPE.value and user_input.lower() != ActionOp.STOP.value:
|
||||
user_input = input()
|
||||
|
||||
if user_input.lower() == ActionOp.TAP.value:
|
||||
logger.info(f"Which element do you want to tap? Choose a numeric tag from 1 to {len(elem_list)}:",
|
||||
"blue")
|
||||
user_input = "xxx"
|
||||
while not user_input.isnumeric() or int(user_input) > len(elem_list) or int(user_input) < 1:
|
||||
user_input = input()
|
||||
tl, br = elem_list[int(user_input) - 1].bbox
|
||||
x, y = (tl[0] + br[0]) // 2, (tl[1] + br[1]) // 2
|
||||
ret = env.step(
|
||||
EnvAPIAbstract(api_name="user_tap", kwargs={"x": x, "y": y})
|
||||
)
|
||||
# Question 将 ERROR 替换为 ADB_EXEC_FAIL(FAILED)
|
||||
if ret == ADB_EXEC_FAIL:
|
||||
logger.info("ERROR: tap execution failed", "red")
|
||||
break
|
||||
record_file.write(f"tap({int(user_input)}):::{elem_list[int(user_input) - 1].uid}\n")
|
||||
elif user_input.lower() == ActionOp.TEXT.value:
|
||||
logger.info(f"Which element do you want to input the text string? Choose a numeric tag from 1 to "
|
||||
f"{len(elem_list)}:", "blue")
|
||||
input_area = "xxx"
|
||||
while not input_area.isnumeric() or int(input_area) > len(elem_list) or int(input_area) < 1:
|
||||
input_area = input()
|
||||
logger.info("Enter your input text below:", "blue")
|
||||
user_input = ""
|
||||
while not user_input:
|
||||
user_input = input()
|
||||
env.step(
|
||||
EnvAPIAbstract(api_name="user_input", kwargs={"input_txt": user_input})
|
||||
)
|
||||
record_file.write(f"text({input_area}:sep:\"{user_input}\"):::{elem_list[int(input_area) - 1].uid}\n")
|
||||
elif user_input.lower() == ActionOp.LONG_PRESS.value:
|
||||
logger.info(
|
||||
f"Which element do you want to long press? Choose a numeric tag from 1 to {len(elem_list)}:",
|
||||
"blue")
|
||||
user_input = "xxx"
|
||||
while not user_input.isnumeric() or int(user_input) > len(elem_list) or int(user_input) < 1:
|
||||
user_input = input()
|
||||
tl, br = elem_list[int(user_input) - 1].bbox
|
||||
x, y = (tl[0] + br[0]) // 2, (tl[1] + br[1]) // 2
|
||||
env.step(EnvAPIAbstract(api_name="user_longpress", kwargs={"x": x, "y": y}))
|
||||
if ret == ADB_EXEC_FAIL:
|
||||
logger.info("ERROR: long press execution failed", "red")
|
||||
break
|
||||
record_file.write(f"long_press({int(user_input)}):::{elem_list[int(user_input) - 1].uid}\n")
|
||||
elif user_input.lower() == ActionOp.SWIPE.value:
|
||||
logger.info(
|
||||
f"What is the direction of your swipe? Choose one from the following options:\nup, down, left,"
|
||||
f" right", "blue")
|
||||
user_input = ""
|
||||
while user_input != SwipeOp.UP.value and user_input != SwipeOp.DOWN.value and user_input != SwipeOp.LEFT.value and user_input != SwipeOp.RIGHT.value:
|
||||
user_input = input()
|
||||
swipe_dir = user_input
|
||||
logger.info(
|
||||
f"Which element do you want to swipe? Choose a numeric tag from 1 to {len(elem_list)}:")
|
||||
while not user_input.isnumeric() or int(user_input) > len(elem_list) or int(user_input) < 1:
|
||||
user_input = input()
|
||||
tl, br = elem_list[int(user_input) - 1].bbox
|
||||
x, y = (tl[0] + br[0]) // 2, (tl[1] + br[1]) // 2
|
||||
ret = env.step(EnvAPIAbstract("user_swipe", kwargs={"x": x, "y": y, "orient": swipe_dir}))
|
||||
if ret == ADB_EXEC_FAIL:
|
||||
logger.info("ERROR: swipe execution failed", "red")
|
||||
break
|
||||
record_file.write(f"swipe({int(user_input)}:sep:{swipe_dir}):::{elem_list[int(user_input) - 1].uid}\n")
|
||||
elif user_input.lower() == ActionOp.STOP.value:
|
||||
record_file.write("stop\n")
|
||||
record_file.close()
|
||||
break
|
||||
else:
|
||||
break
|
||||
time.sleep(3)
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ class ActionOp(Enum):
|
|||
TEXT = "text"
|
||||
SWIPE = "swipe"
|
||||
GRID = "grid"
|
||||
STOP = "stop"
|
||||
|
||||
|
||||
class SwipeOp(Enum):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue