mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-08 15:05:17 +02:00
fix format
This commit is contained in:
parent
fb82be4248
commit
7610fa22d9
7 changed files with 218 additions and 143 deletions
|
|
@ -1,21 +1,23 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Desc : manual record user interaction in stage=learn & mode=manual, LIKE scripts/step_recorder.py
|
||||
import cv2
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from examples.andriod_assistant.utils.schema import OpLogItem, ActionOp, RunState, GridOp, ActionOp, TapOp, \
|
||||
TapGridOp, \
|
||||
LongPressOp, LongPressGridOp, SwipeOp, SwipeGridOp, TextOp, AndroidElement
|
||||
import cv2
|
||||
|
||||
from examples.andriod_assistant.utils.schema import (
|
||||
ActionOp,
|
||||
AndroidElement,
|
||||
SwipeOp,
|
||||
)
|
||||
from examples.andriod_assistant.utils.utils import draw_bbox_multi, traverse_xml_tree
|
||||
from metagpt.actions.action import Action
|
||||
from metagpt.logs import logger
|
||||
from metagpt.config2 import config
|
||||
from metagpt.const import ADB_EXEC_FAIL
|
||||
from metagpt.environment.android_env.android_env import AndroidEnv
|
||||
from metagpt.environment.api.env_api import EnvAPIAbstract
|
||||
from metagpt.const import ADB_EXEC_FAIL
|
||||
from metagpt.logs import logger
|
||||
|
||||
|
||||
class ManualRecord(Action):
|
||||
|
|
@ -23,15 +25,10 @@ class ManualRecord(Action):
|
|||
|
||||
name: str = "ManualRecord"
|
||||
|
||||
async def run(
|
||||
self, demo_name: str, task_dir: Path, env: AndroidEnv
|
||||
):
|
||||
|
||||
async def run(self, demo_name: str, task_dir: Path, env: AndroidEnv):
|
||||
# Question 这里是将通过ADB获取的东西存到本地的路径的吧
|
||||
screenshot_path: Path = env.step(
|
||||
EnvAPIAbstract(
|
||||
api_name="get_screenshot", kwargs={"ss_name": f"{demo_name}", "local_save_dir": task_dir}
|
||||
)
|
||||
EnvAPIAbstract(api_name="get_screenshot", kwargs={"ss_name": f"{demo_name}", "local_save_dir": task_dir})
|
||||
)
|
||||
xml_path: Path = env.step(
|
||||
EnvAPIAbstract(api_name="get_xml", kwargs={"xml_name": f"{demo_name}", "local_save_dir": task_dir})
|
||||
|
|
@ -74,31 +71,40 @@ class ManualRecord(Action):
|
|||
user_input = "xxx"
|
||||
logger.info(
|
||||
"Choose one of the following actions you want to perform on the current screen:\ntap, text, long "
|
||||
"press, swipe, stop", "blue")
|
||||
"press, swipe, stop",
|
||||
"blue",
|
||||
)
|
||||
|
||||
while user_input.lower() != ActionOp.TAP.value and user_input.lower() != ActionOp.TEXT.value and user_input.lower() != ActionOp.LONG_PRESS.value \
|
||||
and user_input.lower() != ActionOp.SWIPE.value and user_input.lower() != ActionOp.STOP.value:
|
||||
while (
|
||||
user_input.lower() != ActionOp.TAP.value
|
||||
and user_input.lower() != ActionOp.TEXT.value
|
||||
and user_input.lower() != ActionOp.LONG_PRESS.value
|
||||
and user_input.lower() != ActionOp.SWIPE.value
|
||||
and user_input.lower() != ActionOp.STOP.value
|
||||
):
|
||||
user_input = input()
|
||||
|
||||
if user_input.lower() == ActionOp.TAP.value:
|
||||
logger.info(f"Which element do you want to tap? Choose a numeric tag from 1 to {len(elem_list)}:",
|
||||
"blue")
|
||||
logger.info(
|
||||
f"Which element do you want to tap? Choose a numeric tag from 1 to {len(elem_list)}:", "blue"
|
||||
)
|
||||
user_input = "xxx"
|
||||
while not user_input.isnumeric() or int(user_input) > len(elem_list) or int(user_input) < 1:
|
||||
user_input = input()
|
||||
tl, br = elem_list[int(user_input) - 1].bbox
|
||||
x, y = (tl[0] + br[0]) // 2, (tl[1] + br[1]) // 2
|
||||
ret = env.step(
|
||||
EnvAPIAbstract(api_name="user_tap", kwargs={"x": x, "y": y})
|
||||
)
|
||||
ret = env.step(EnvAPIAbstract(api_name="user_tap", kwargs={"x": x, "y": y}))
|
||||
# Question 将 ERROR 替换为 ADB_EXEC_FAIL(FAILED)
|
||||
if ret == ADB_EXEC_FAIL:
|
||||
logger.info("ERROR: tap execution failed", "red")
|
||||
break
|
||||
record_file.write(f"tap({int(user_input)}):::{elem_list[int(user_input) - 1].uid}\n")
|
||||
elif user_input.lower() == ActionOp.TEXT.value:
|
||||
logger.info(f"Which element do you want to input the text string? Choose a numeric tag from 1 to "
|
||||
f"{len(elem_list)}:", "blue")
|
||||
logger.info(
|
||||
f"Which element do you want to input the text string? Choose a numeric tag from 1 to "
|
||||
f"{len(elem_list)}:",
|
||||
"blue",
|
||||
)
|
||||
input_area = "xxx"
|
||||
while not input_area.isnumeric() or int(input_area) > len(elem_list) or int(input_area) < 1:
|
||||
input_area = input()
|
||||
|
|
@ -106,14 +112,12 @@ class ManualRecord(Action):
|
|||
user_input = ""
|
||||
while not user_input:
|
||||
user_input = input()
|
||||
env.step(
|
||||
EnvAPIAbstract(api_name="user_input", kwargs={"input_txt": user_input})
|
||||
)
|
||||
record_file.write(f"text({input_area}:sep:\"{user_input}\"):::{elem_list[int(input_area) - 1].uid}\n")
|
||||
env.step(EnvAPIAbstract(api_name="user_input", kwargs={"input_txt": user_input}))
|
||||
record_file.write(f'text({input_area}:sep:"{user_input}"):::{elem_list[int(input_area) - 1].uid}\n')
|
||||
elif user_input.lower() == ActionOp.LONG_PRESS.value:
|
||||
logger.info(
|
||||
f"Which element do you want to long press? Choose a numeric tag from 1 to {len(elem_list)}:",
|
||||
"blue")
|
||||
f"Which element do you want to long press? Choose a numeric tag from 1 to {len(elem_list)}:", "blue"
|
||||
)
|
||||
user_input = "xxx"
|
||||
while not user_input.isnumeric() or int(user_input) > len(elem_list) or int(user_input) < 1:
|
||||
user_input = input()
|
||||
|
|
@ -126,14 +130,20 @@ class ManualRecord(Action):
|
|||
record_file.write(f"long_press({int(user_input)}):::{elem_list[int(user_input) - 1].uid}\n")
|
||||
elif user_input.lower() == ActionOp.SWIPE.value:
|
||||
logger.info(
|
||||
f"What is the direction of your swipe? Choose one from the following options:\nup, down, left,"
|
||||
f" right", "blue")
|
||||
"What is the direction of your swipe? Choose one from the following options:\nup, down, left,"
|
||||
" right",
|
||||
"blue",
|
||||
)
|
||||
user_input = ""
|
||||
while user_input != SwipeOp.UP.value and user_input != SwipeOp.DOWN.value and user_input != SwipeOp.LEFT.value and user_input != SwipeOp.RIGHT.value:
|
||||
while (
|
||||
user_input != SwipeOp.UP.value
|
||||
and user_input != SwipeOp.DOWN.value
|
||||
and user_input != SwipeOp.LEFT.value
|
||||
and user_input != SwipeOp.RIGHT.value
|
||||
):
|
||||
user_input = input()
|
||||
swipe_dir = user_input
|
||||
logger.info(
|
||||
f"Which element do you want to swipe? Choose a numeric tag from 1 to {len(elem_list)}:")
|
||||
logger.info(f"Which element do you want to swipe? Choose a numeric tag from 1 to {len(elem_list)}:")
|
||||
while not user_input.isnumeric() or int(user_input) > len(elem_list) or int(user_input) < 1:
|
||||
user_input = input()
|
||||
tl, br = elem_list[int(user_input) - 1].bbox
|
||||
|
|
|
|||
|
|
@ -3,35 +3,38 @@
|
|||
# @Desc : parse record to generate learned standard operations in stage=learn & mode=manual,
|
||||
# LIKE scripts/document_generation.py
|
||||
|
||||
import re
|
||||
import ast
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from examples.andriod_assistant.actions.parse_record_an import RECORD_PARSE_NODE
|
||||
from examples.andriod_assistant.prompts.operation_prompt import (
|
||||
long_press_doc_template,
|
||||
refine_doc_suffix,
|
||||
swipe_doc_template,
|
||||
tap_doc_template,
|
||||
text_doc_template,
|
||||
long_press_doc_template,
|
||||
swipe_doc_template,
|
||||
refine_doc_suffix
|
||||
)
|
||||
from examples.andriod_assistant.utils.schema import RecordLogItem, RunState, ActionOp, \
|
||||
SwipeOp, AndroidActionOutput
|
||||
from examples.andriod_assistant.actions.parse_record_an import RECORD_PARSE_NODE
|
||||
from examples.andriod_assistant.utils.schema import (
|
||||
ActionOp,
|
||||
AndroidActionOutput,
|
||||
RecordLogItem,
|
||||
RunState,
|
||||
SwipeOp,
|
||||
)
|
||||
from metagpt.actions.action import Action
|
||||
from metagpt.config2 import config
|
||||
from metagpt.environment.android_env.android_env import AndroidEnv
|
||||
from metagpt.utils.common import encode_image
|
||||
from metagpt.logs import logger
|
||||
from metagpt.actions.action import Action
|
||||
from metagpt.utils.common import encode_image
|
||||
|
||||
|
||||
class ParseRecord(Action):
|
||||
name: str = "ParseRecord"
|
||||
|
||||
async def run(
|
||||
self, app_name: str, demo_name: str, task_dir: Path, docs_dir: Path, env: AndroidEnv
|
||||
):
|
||||
async def run(self, app_name: str, demo_name: str, task_dir: Path, docs_dir: Path, env: AndroidEnv):
|
||||
doc_count = 0
|
||||
record_path = Path(task_dir) / "record.txt"
|
||||
|
||||
|
|
@ -81,24 +84,21 @@ class ParseRecord(Action):
|
|||
context += refine_context
|
||||
logger.info(
|
||||
f"Documentation for the element {resource_id} already exists. The doc will be "
|
||||
f"refined based on the latest demo.")
|
||||
f"refined based on the latest demo."
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Documentation for the element {resource_id} already exists. Turn on DOC_REFINE "
|
||||
f"in the config file if needed.")
|
||||
f"in the config file if needed."
|
||||
)
|
||||
continue
|
||||
else:
|
||||
doc_content = {
|
||||
"tap": "",
|
||||
"text": "",
|
||||
"v_swipe": "",
|
||||
"h_swipe": "",
|
||||
"long_press": ""
|
||||
}
|
||||
doc_content = {"tap": "", "text": "", "v_swipe": "", "h_swipe": "", "long_press": ""}
|
||||
|
||||
logger.info(f"Waiting for GPT-4V to generate documentation for the element {resource_id}")
|
||||
node = await RECORD_PARSE_NODE.fill(context=context, llm=self.llm,
|
||||
images=[img_before_base64, img_after_base64])
|
||||
node = await RECORD_PARSE_NODE.fill(
|
||||
context=context, llm=self.llm, images=[img_before_base64, img_after_base64]
|
||||
)
|
||||
if "error" in node.content:
|
||||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
|
||||
|
|
@ -108,8 +108,13 @@ class ParseRecord(Action):
|
|||
doc_content[action_type] = msg
|
||||
|
||||
with open(log_path, "a") as logfile:
|
||||
log_item = RecordLogItem(step=step, prompt=prompt, image_before=img_before_base64,
|
||||
image_after=img_after_base64, response=node.content)
|
||||
log_item = RecordLogItem(
|
||||
step=step,
|
||||
prompt=prompt,
|
||||
image_before=img_before_base64,
|
||||
image_after=img_after_base64,
|
||||
response=node.content,
|
||||
)
|
||||
# TODO 修改 dumps 方式
|
||||
logfile.write(json.dumps(log_item) + "\n")
|
||||
with open(doc_path, "w") as outfile:
|
||||
|
|
|
|||
|
|
@ -5,10 +5,11 @@
|
|||
from metagpt.actions.action_node import ActionNode
|
||||
|
||||
OBSERVATION = ActionNode(
|
||||
key="Observation", expected_type=str,
|
||||
key="Observation",
|
||||
expected_type=str,
|
||||
instruction="Provide a description of your observations of the two images. "
|
||||
"Subsequently, delineate the distinctions between the first image and the second one.",
|
||||
example=""
|
||||
"Subsequently, delineate the distinctions between the first image and the second one.",
|
||||
example="",
|
||||
)
|
||||
|
||||
THOUGHT = ActionNode(
|
||||
|
|
@ -22,7 +23,7 @@ DESCRIPTION = ActionNode(
|
|||
key="Description",
|
||||
expected_type=str,
|
||||
instruction="Describe the functionality of the UI element concisely in one or two sentences Do not include "
|
||||
"the numeric tag in your description",
|
||||
"the numeric tag in your description",
|
||||
example="",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,24 +2,41 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# @Desc : LIKE scripts/task_executor.py in stage=act
|
||||
|
||||
from pathlib import Path
|
||||
import ast
|
||||
from pathlib import Path
|
||||
|
||||
from examples.andriod_assistant.actions.screenshot_parse_an import SCREENSHOT_PARSE_NODE
|
||||
from examples.andriod_assistant.prompts.assistant_prompt import (
|
||||
screenshot_parse_template,
|
||||
screenshot_parse_with_grid_template,
|
||||
)
|
||||
from examples.andriod_assistant.utils.schema import OpLogItem, RunState, GridOp, TapOp, TapGridOp, \
|
||||
LongPressOp, LongPressGridOp, SwipeOp, SwipeGridOp, TextOp, AndroidElement, AndroidActionOutput
|
||||
from examples.andriod_assistant.actions.screenshot_parse_an import SCREENSHOT_PARSE_NODE
|
||||
from examples.andriod_assistant.utils.utils import draw_bbox_multi, traverse_xml_tree, area_to_xy, \
|
||||
screenshot_parse_extract, elem_bbox_to_xy
|
||||
from examples.andriod_assistant.utils.schema import (
|
||||
AndroidActionOutput,
|
||||
AndroidElement,
|
||||
GridOp,
|
||||
LongPressGridOp,
|
||||
LongPressOp,
|
||||
OpLogItem,
|
||||
RunState,
|
||||
SwipeGridOp,
|
||||
SwipeOp,
|
||||
TapGridOp,
|
||||
TapOp,
|
||||
TextOp,
|
||||
)
|
||||
from examples.andriod_assistant.utils.utils import (
|
||||
area_to_xy,
|
||||
draw_bbox_multi,
|
||||
elem_bbox_to_xy,
|
||||
screenshot_parse_extract,
|
||||
traverse_xml_tree,
|
||||
)
|
||||
from metagpt.actions.action import Action
|
||||
from metagpt.config2 import config
|
||||
from metagpt.const import ADB_EXEC_FAIL
|
||||
from metagpt.environment.android_env.android_env import AndroidEnv
|
||||
from metagpt.environment.api.env_api import EnvAPIAbstract
|
||||
from metagpt.utils.common import encode_image
|
||||
from metagpt.const import ADB_EXEC_FAIL
|
||||
|
||||
|
||||
class ScreenshotParse(Action):
|
||||
|
|
@ -42,21 +59,33 @@ next action. You should always prioritize these documented elements for interact
|
|||
if doc_content["tap"]:
|
||||
ui_doc += f"This UI element is clickable. {doc_content['tap']}\n\n"
|
||||
if doc_content["text"]:
|
||||
ui_doc += f"This UI element can receive text input. The text input is used for the following " \
|
||||
f"purposes: {doc_content['text']}\n\n"
|
||||
ui_doc += (
|
||||
f"This UI element can receive text input. The text input is used for the following "
|
||||
f"purposes: {doc_content['text']}\n\n"
|
||||
)
|
||||
if doc_content["long_press"]:
|
||||
ui_doc += f"This UI element is long clickable. {doc_content['long_press']}\n\n"
|
||||
if doc_content["v_swipe"]:
|
||||
ui_doc += f"This element can be swiped directly without tapping. You can swipe vertically on " \
|
||||
f"this UI element. {doc_content['v_swipe']}\n\n"
|
||||
ui_doc += (
|
||||
f"This element can be swiped directly without tapping. You can swipe vertically on "
|
||||
f"this UI element. {doc_content['v_swipe']}\n\n"
|
||||
)
|
||||
if doc_content["h_swipe"]:
|
||||
ui_doc += f"This element can be swiped directly without tapping. You can swipe horizontally on " \
|
||||
f"this UI element. {doc_content['h_swipe']}\n\n"
|
||||
ui_doc += (
|
||||
f"This element can be swiped directly without tapping. You can swipe horizontally on "
|
||||
f"this UI element. {doc_content['h_swipe']}\n\n"
|
||||
)
|
||||
return ui_doc
|
||||
|
||||
|
||||
async def run(
|
||||
self, round_count: int, task_desc: str, last_act: str, task_dir: Path, docs_dir: Path, grid_on: bool, env: AndroidEnv
|
||||
self,
|
||||
round_count: int,
|
||||
task_desc: str,
|
||||
last_act: str,
|
||||
task_dir: Path,
|
||||
docs_dir: Path,
|
||||
grid_on: bool,
|
||||
env: AndroidEnv,
|
||||
):
|
||||
screenshot_path: Path = env.step(
|
||||
EnvAPIAbstract(
|
||||
|
|
@ -102,7 +131,7 @@ next action. You should always prioritize these documented elements for interact
|
|||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
|
||||
prompt = node.compile(context=context, schema="json", mode="auto")
|
||||
log_item = OpLogItem(step=round_count, prompt=prompt, image=screenshot_labeled_path, response=node.content)
|
||||
OpLogItem(step=round_count, prompt=prompt, image=screenshot_labeled_path, response=node.content)
|
||||
|
||||
op_param = screenshot_parse_extract(node.instruct_content.model_dump(), grid_on)
|
||||
if op_param.param_state == RunState.FINISH:
|
||||
|
|
@ -126,7 +155,11 @@ next action. You should always prioritize these documented elements for interact
|
|||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
elif isinstance(op_param, SwipeOp):
|
||||
x, y = elem_bbox_to_xy(elem_list[op_param.area - 1].bbox)
|
||||
res = env.step(EnvAPIAbstract("user_swipe", kwargs={"x": x, "y": y, "orient": op_param.swipe_orient, "dist": op_param.dist}))
|
||||
res = env.step(
|
||||
EnvAPIAbstract(
|
||||
"user_swipe", kwargs={"x": x, "y": y, "orient": op_param.swipe_orient, "dist": op_param.dist}
|
||||
)
|
||||
)
|
||||
if res == ADB_EXEC_FAIL:
|
||||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
elif isinstance(op_param, GridOp):
|
||||
|
|
|
|||
|
|
@ -2,25 +2,47 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# @Desc : LIKE scripts/self_explorer.py in stage=learn & mode=auto self_explore_task stage
|
||||
|
||||
from pathlib import Path
|
||||
import ast
|
||||
from pathlib import Path
|
||||
|
||||
from examples.andriod_assistant.actions.screenshot_parse_an import SCREENSHOT_PARSE_NODE
|
||||
from examples.andriod_assistant.actions.self_learn_reflect_an import SELF_LEARN_REFLECT_NODE
|
||||
from examples.andriod_assistant.prompts.assistant_prompt import (
|
||||
screenshot_parse_self_explore_template, screenshot_parse_self_explore_reflect_template as reflect_template
|
||||
from examples.andriod_assistant.actions.self_learn_reflect_an import (
|
||||
SELF_LEARN_REFLECT_NODE,
|
||||
)
|
||||
from examples.andriod_assistant.prompts.assistant_prompt import (
|
||||
screenshot_parse_self_explore_reflect_template as reflect_template,
|
||||
)
|
||||
from examples.andriod_assistant.prompts.assistant_prompt import (
|
||||
screenshot_parse_self_explore_template,
|
||||
)
|
||||
from examples.andriod_assistant.utils.schema import (
|
||||
ActionOp,
|
||||
AndroidActionOutput,
|
||||
AndroidElement,
|
||||
Decision,
|
||||
DocContent,
|
||||
LongPressOp,
|
||||
OpLogItem,
|
||||
ReflectLogItem,
|
||||
RunState,
|
||||
SwipeOp,
|
||||
TapOp,
|
||||
TextOp,
|
||||
)
|
||||
from examples.andriod_assistant.utils.utils import (
|
||||
draw_bbox_multi,
|
||||
elem_bbox_to_xy,
|
||||
reflect_parse_extarct,
|
||||
screenshot_parse_extract,
|
||||
traverse_xml_tree,
|
||||
)
|
||||
from examples.andriod_assistant.utils.schema import AndroidElement, OpLogItem, ReflectLogItem, RunState, TapOp, \
|
||||
TextOp, SwipeOp, LongPressOp, ActionOp, Decision, DocContent, AndroidActionOutput
|
||||
from examples.andriod_assistant.utils.utils import draw_bbox_multi, traverse_xml_tree, screenshot_parse_extract, \
|
||||
elem_bbox_to_xy, reflect_parse_extarct
|
||||
from metagpt.actions.action import Action
|
||||
from metagpt.config2 import config
|
||||
from metagpt.const import ADB_EXEC_FAIL
|
||||
from metagpt.environment.android_env.android_env import AndroidEnv
|
||||
from metagpt.environment.api.env_api import EnvAPIAbstract
|
||||
from metagpt.utils.common import encode_image
|
||||
from metagpt.const import ADB_EXEC_FAIL
|
||||
from metagpt.logs import logger
|
||||
from metagpt.utils.common import encode_image
|
||||
|
||||
|
||||
class SelfLearnAndReflect(Action):
|
||||
|
|
@ -35,12 +57,16 @@ class SelfLearnAndReflect(Action):
|
|||
act_name: str = ""
|
||||
ui_area: int = -1
|
||||
|
||||
async def run(self, round_count: int, task_desc: str, last_act: str, task_dir: Path, docs_dir: Path, env: AndroidEnv) -> AndroidActionOutput:
|
||||
async def run(
|
||||
self, round_count: int, task_desc: str, last_act: str, task_dir: Path, docs_dir: Path, env: AndroidEnv
|
||||
) -> AndroidActionOutput:
|
||||
resp = self.run_self_learn(round_count, task_desc, last_act, task_dir, env)
|
||||
resp = self.run_reflect(round_count, task_desc, last_act, task_dir, docs_dir, env)
|
||||
return resp
|
||||
|
||||
async def run_self_learn(self, round_count: int, task_desc: str, last_act: str, task_dir: Path, env: AndroidEnv) -> AndroidActionOutput:
|
||||
async def run_self_learn(
|
||||
self, round_count: int, task_desc: str, last_act: str, task_dir: Path, env: AndroidEnv
|
||||
) -> AndroidActionOutput:
|
||||
screenshot_path: Path = env.step(
|
||||
EnvAPIAbstract(
|
||||
api_name="get_screenshot", kwargs={"ss_name": f"{round_count}_before", "local_save_dir": task_dir}
|
||||
|
|
@ -89,7 +115,7 @@ class SelfLearnAndReflect(Action):
|
|||
if "error" in node.content:
|
||||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
prompt = node.compile(context=context, schema="json", mode="auto")
|
||||
log_item = OpLogItem(step=round_count, prompt=prompt, image=screenshot_before_labeled_path, response=node.content)
|
||||
OpLogItem(step=round_count, prompt=prompt, image=screenshot_before_labeled_path, response=node.content)
|
||||
op_param = screenshot_parse_extract(node.instruct_content.model_dump(), grid_on=False)
|
||||
if op_param.param_state == RunState.FINISH:
|
||||
return AndroidActionOutput(action_state=RunState.FINISH)
|
||||
|
|
@ -116,7 +142,11 @@ class SelfLearnAndReflect(Action):
|
|||
self.ui_area = op_param.area
|
||||
self.swipe_orient = op_param.swipe_orient
|
||||
x, y = elem_bbox_to_xy(elem_list[op_param.area - 1].bbox)
|
||||
res = env.step(EnvAPIAbstract("user_swipe", kwargs={"x": x, "y": y, "orient": op_param.swipe_orient, "dist": op_param.dist}))
|
||||
res = env.step(
|
||||
EnvAPIAbstract(
|
||||
"user_swipe", kwargs={"x": x, "y": y, "orient": op_param.swipe_orient, "dist": op_param.dist}
|
||||
)
|
||||
)
|
||||
if res == ADB_EXEC_FAIL:
|
||||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
|
||||
|
|
@ -124,7 +154,9 @@ class SelfLearnAndReflect(Action):
|
|||
self.act_name = op_param.act_name
|
||||
return AndroidActionOutput()
|
||||
|
||||
async def run_reflect(self, round_count: int, task_desc: str, last_act: str, task_dir: Path, docs_dir: Path, env: AndroidEnv) -> AndroidActionOutput:
|
||||
async def run_reflect(
|
||||
self, round_count: int, task_desc: str, last_act: str, task_dir: Path, docs_dir: Path, env: AndroidEnv
|
||||
) -> AndroidActionOutput:
|
||||
screenshot_path: Path = env.step(
|
||||
EnvAPIAbstract(
|
||||
api_name="get_screenshot", kwargs={"ss_name": f"{round_count}_after", "local_save_dir": task_dir}
|
||||
|
|
@ -147,15 +179,24 @@ class SelfLearnAndReflect(Action):
|
|||
action = "v_swipe"
|
||||
elif self.swipe_orient == SwipeOp.LEFT.value or self.swipe_orient == SwipeOp.RIGHT.value:
|
||||
action = "h_swipe"
|
||||
context = reflect_template.format(action=action, ui_element=str(self.ui_area), task_desc=task_desc, last_act=last_act)
|
||||
node = await SELF_LEARN_REFLECT_NODE.fill(context=context, llm=self.llm, images=[self.screenshot_before_base64, img_base64])
|
||||
context = reflect_template.format(
|
||||
action=action, ui_element=str(self.ui_area), task_desc=task_desc, last_act=last_act
|
||||
)
|
||||
node = await SELF_LEARN_REFLECT_NODE.fill(
|
||||
context=context, llm=self.llm, images=[self.screenshot_before_base64, img_base64]
|
||||
)
|
||||
|
||||
if "error" in node.content:
|
||||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
|
||||
prompt = node.compile(context=context, schema="json", mode="auto")
|
||||
log_item = ReflectLogItem(step=round_count, prompt=prompt, image_before=self.screenshot_before_path,
|
||||
image_after=screenshot_after_labeled_path, response=node.content)
|
||||
ReflectLogItem(
|
||||
step=round_count,
|
||||
prompt=prompt,
|
||||
image_before=self.screenshot_before_path,
|
||||
image_after=screenshot_after_labeled_path,
|
||||
response=node.content,
|
||||
)
|
||||
|
||||
op_param = reflect_parse_extarct(node.instruct_content.model_dump())
|
||||
if op_param.param_state == RunState.FINISH:
|
||||
|
|
@ -163,7 +204,7 @@ class SelfLearnAndReflect(Action):
|
|||
if op_param.param_state == RunState.FAIL:
|
||||
return AndroidActionOutput(action_state=RunState.FAIL)
|
||||
|
||||
resource_id = self.elem_list[int(self.ui_area) -1].uid
|
||||
resource_id = self.elem_list[int(self.ui_area) - 1].uid
|
||||
if op_param.decision == Decision.INEFFECTIVE.value:
|
||||
self.useless_list.append(resource_id)
|
||||
last_act = "NONE" # TODO global
|
||||
|
|
|
|||
|
|
@ -4,28 +4,16 @@
|
|||
|
||||
from metagpt.actions.action_node import ActionNode
|
||||
|
||||
|
||||
DECISION = ActionNode(
|
||||
key="Decision",
|
||||
expected_type=str,
|
||||
instruction="explain why you made this decision",
|
||||
example="BACK"
|
||||
key="Decision", expected_type=str, instruction="explain why you made this decision", example="BACK"
|
||||
)
|
||||
|
||||
|
||||
THOUGHT = ActionNode(
|
||||
key="Thought",
|
||||
expected_type=str,
|
||||
instruction="explain why you made this decision",
|
||||
example=""
|
||||
)
|
||||
THOUGHT = ActionNode(key="Thought", expected_type=str, instruction="explain why you made this decision", example="")
|
||||
|
||||
|
||||
DOCUMENTATION = ActionNode(
|
||||
key="Documentation",
|
||||
expected_type=str,
|
||||
instruction="describe the function of the UI element",
|
||||
example=""
|
||||
key="Documentation", expected_type=str, instruction="describe the function of the UI element", example=""
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -3,36 +3,35 @@
|
|||
# @Desc : test case (imgs from appagent's)
|
||||
|
||||
|
||||
import re
|
||||
import ast
|
||||
import json
|
||||
import time
|
||||
import asyncio
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from actions.parse_record_an import RECORD_PARSE_NODE
|
||||
from prompts.operation_prompt import (
|
||||
long_press_doc_template,
|
||||
refine_doc_suffix,
|
||||
swipe_doc_template,
|
||||
tap_doc_template,
|
||||
text_doc_template,
|
||||
long_press_doc_template,
|
||||
swipe_doc_template,
|
||||
refine_doc_suffix
|
||||
)
|
||||
from utils.schema import ActionOp, SwipeOp
|
||||
from actions.parse_record_an import RECORD_PARSE_NODE
|
||||
from metagpt.config2 import config
|
||||
from metagpt.utils.common import encode_image
|
||||
from metagpt.logs import logger
|
||||
from metagpt.actions.action import Action
|
||||
|
||||
TEST_BEFORE_PATH = Path(
|
||||
"apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_1.png")
|
||||
TEST_AFTER_PATH = Path(
|
||||
"apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_2.png")
|
||||
from metagpt.actions.action import Action
|
||||
from metagpt.config2 import config
|
||||
from metagpt.logs import logger
|
||||
from metagpt.utils.common import encode_image
|
||||
|
||||
TEST_BEFORE_PATH = Path("apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_1.png")
|
||||
TEST_AFTER_PATH = Path("apps/demo_Contacts/labeled_screenshots/demo_Contacts_2024-01-30_21-50-19_2.png")
|
||||
RECORD_PATH = Path("apps/demo_Contacts/record.txt")
|
||||
TASK_DESC_PATH = Path("apps/demo_Contacts/task_desc.txt")
|
||||
DOCS_DIR = Path("storage")
|
||||
|
||||
testaction = Action(name="test")
|
||||
|
||||
|
||||
# TODO test for parse record
|
||||
# 仅使用一张图像进行测试
|
||||
async def manual_test():
|
||||
|
|
@ -80,26 +79,23 @@ async def manual_test():
|
|||
context += refine_context
|
||||
logger.info(
|
||||
f"Documentation for the element {resource_id} already exists. The doc will be "
|
||||
f"refined based on the latest demo.")
|
||||
f"refined based on the latest demo."
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Documentation for the element {resource_id} already exists. Turn on DOC_REFINE "
|
||||
f"in the config file if needed.")
|
||||
f"in the config file if needed."
|
||||
)
|
||||
else:
|
||||
doc_content = {
|
||||
"tap": "",
|
||||
"text": "",
|
||||
"v_swipe": "",
|
||||
"h_swipe": "",
|
||||
"long_press": ""
|
||||
}
|
||||
doc_content = {"tap": "", "text": "", "v_swipe": "", "h_swipe": "", "long_press": ""}
|
||||
logger.info(f"Waiting for GPT-4V to generate documentation for the element {resource_id}")
|
||||
|
||||
node = await RECORD_PARSE_NODE.fill(context=context, llm=testaction.llm,
|
||||
images=[img_before_base64, img_after_base64])
|
||||
node = await RECORD_PARSE_NODE.fill(
|
||||
context=context, llm=testaction.llm, images=[img_before_base64, img_after_base64]
|
||||
)
|
||||
|
||||
# log_path = task_dir.joinpath(f"log_{app_name}_{demo_name}.txt")
|
||||
prompt = node.compile(context=context, schema="json", mode="auto")
|
||||
node.compile(context=context, schema="json", mode="auto")
|
||||
msg = node.content
|
||||
doc_content[action_type] = msg
|
||||
|
||||
|
|
@ -107,6 +103,7 @@ async def manual_test():
|
|||
outfile.write(str(doc_content))
|
||||
logger.info(f"Documentation generated and saved to {doc_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(manual_test())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue