mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-11 15:15:18 +02:00
Update Parse Record Action Node & Modify Self Reflect Action node; Adds Result example
This commit is contained in:
parent
ea2b749655
commit
13975976d5
44 changed files with 106 additions and 70 deletions
|
|
@ -27,22 +27,29 @@ class ManualRecord(Action):
|
|||
name: str = "ManualRecord"
|
||||
|
||||
useless_list: list[str] = [] # store useless elements uid
|
||||
record_path: str = ""
|
||||
task_desc_path: str = ""
|
||||
screenshot_before_path: str = ""
|
||||
screenshot_after_path: str = ""
|
||||
xml_path: str = ""
|
||||
record_path: Path = ""
|
||||
task_desc_path: Path = ""
|
||||
screenshot_before_path: Path = ""
|
||||
screenshot_after_path: Path = ""
|
||||
xml_path: Path = ""
|
||||
|
||||
async def run(self, demo_name: str, task_desc: str,task_dir: Path, env: AndroidEnv):
|
||||
|
||||
self.record_path = Path(task_dir) / "record.txt"
|
||||
record_file = open(self.record_path, "w")
|
||||
self.task_desc_path = Path(task_dir) / "task_desc.txt"
|
||||
with open(self.task_desc_path, "w") as f:
|
||||
f.write(task_desc)
|
||||
self.screenshot_before_path = Path(task_dir)/"raw_screenshots"
|
||||
self.screenshot_after_path = Path(task_dir)/"labeled_screenshots"
|
||||
self.xml_path = Path(task_dir)/"xml"
|
||||
|
||||
for path in [self.screenshot_before_path,self.screenshot_after_path, self.xml_path]:
|
||||
if not path.exists():
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(self.record_path, 'w') as file:
|
||||
file.write('')
|
||||
record_file = open(self.record_path, "w")
|
||||
with open(self.task_desc_path, "w") as f:
|
||||
f.write(task_desc)
|
||||
step = 0
|
||||
while True:
|
||||
step += 1
|
||||
|
|
@ -182,10 +189,4 @@ class ManualRecord(Action):
|
|||
break
|
||||
time.sleep(3)
|
||||
|
||||
# TODO
|
||||
# 1. 截圖信息显示 KO
|
||||
# 2. 不同功能测试 OK
|
||||
# 3. demo 生成路径错误, 这个地方的结合需要考虑
|
||||
# 1. Documentation Generate
|
||||
# 2. Role Test
|
||||
|
||||
|
|
|
|||
|
|
@ -33,17 +33,26 @@ from metagpt.utils.common import encode_image
|
|||
|
||||
class ParseRecord(Action):
|
||||
name: str = "ParseRecord"
|
||||
record_path: Path = ""
|
||||
task_desc_path: Path = ""
|
||||
screenshot_before_path: Path = ""
|
||||
screenshot_after_path: Path = ""
|
||||
|
||||
async def run(self, app_name: str, demo_name: str, task_dir: Path, docs_dir: Path, env: AndroidEnv):
|
||||
if not docs_dir.exists():
|
||||
docs_dir.mkdir(parents=True, exist_ok=True)
|
||||
doc_count = 0
|
||||
record_path = Path(task_dir) / "record.txt"
|
||||
self.record_path = Path(task_dir) / "record.txt"
|
||||
self.task_desc_path = Path(task_dir) / "task_desc.txt"
|
||||
self.screenshot_before_path = Path(task_dir)/"raw_screenshots"
|
||||
self.screenshot_after_path = Path(task_dir)/"labeled_screenshots"
|
||||
|
||||
with open(record_path, "r") as record_file:
|
||||
with open(self.record_path, "r") as record_file:
|
||||
record_step_count = len(record_file.readlines()) - 1
|
||||
record_file.seek(0)
|
||||
for step in range(1, record_step_count + 1):
|
||||
img_before_base64 = encode_image(task_dir.joinpath(f"{task_dir}_{step}_labeled.png"))
|
||||
img_after_base64 = encode_image(task_dir.joinpath(f"{task_dir}_{step + 1}_labeled.png"))
|
||||
img_before_base64 = encode_image(self.screenshot_after_path.joinpath(f"{demo_name}_{step}_labeled.png"))
|
||||
img_after_base64 = encode_image(self.screenshot_after_path.joinpath(f"{demo_name}_{step + 1}_labeled.png"))
|
||||
rec = record_file.readline().strip()
|
||||
action, resource_id = rec.split(":::")
|
||||
action_type = action.split("(")[0]
|
||||
|
|
@ -115,13 +124,16 @@ class ParseRecord(Action):
|
|||
image_after=img_after_base64,
|
||||
response=node.content,
|
||||
)
|
||||
# TODO 修改 dumps 方式
|
||||
logfile.write(json.dumps(log_item) + "\n")
|
||||
logfile.write(json.dumps(log_item.model_dump()) + "\n")
|
||||
with open(doc_path, "w") as outfile:
|
||||
outfile.write(str(doc_content))
|
||||
doc_count += 1
|
||||
logger.info(f"Documentation generated and saved to {doc_path}")
|
||||
|
||||
time.sleep(config.get_other("request_interval"))
|
||||
# TODO MetaGPT 里面的Config 需要看一下
|
||||
# time.sleep(config.get_other("request_interval"))
|
||||
|
||||
logger.info(f"Documentation generation phase completed. {doc_count} docs generated.")
|
||||
|
||||
# TODO
|
||||
# 1. LOG中记录方式有问题,需要把IMG的部分拿出去丢掉
|
||||
|
|
@ -60,6 +60,9 @@ class SelfLearnAndReflect(Action):
|
|||
async def run(
|
||||
self, round_count: int, task_desc: str, last_act: str, task_dir: Path, docs_dir: Path, env: AndroidEnv
|
||||
) -> AndroidActionOutput:
|
||||
for path in [task_dir,docs_dir]:
|
||||
if not path.exists():
|
||||
path.mkdir(parents=True,exist_ok=True)
|
||||
resp = await self.run_self_learn(round_count, task_desc, last_act, task_dir, env)
|
||||
resp = await self.run_reflect(round_count, task_desc, last_act, task_dir, docs_dir, env)
|
||||
return resp
|
||||
|
|
@ -121,6 +124,8 @@ class SelfLearnAndReflect(Action):
|
|||
# Modify WindowsPath to Str
|
||||
OpLogItem(step=round_count, prompt=prompt, image=str(screenshot_before_labeled_path), response=node.content)
|
||||
op_param = screenshot_parse_extract(node.instruct_content.model_dump(), grid_on=False)
|
||||
# TODO Modify Op_param. When op_param.action is FINISH, how to solve this ?
|
||||
logger.info(op_param)
|
||||
if op_param.param_state == RunState.FINISH:
|
||||
return AndroidActionOutput(action_state=RunState.FINISH)
|
||||
if op_param.param_state == RunState.FAIL:
|
||||
|
|
@ -156,6 +161,7 @@ class SelfLearnAndReflect(Action):
|
|||
|
||||
self.elem_list = elem_list
|
||||
self.act_name = op_param.act_name
|
||||
print("探索阶段结束")
|
||||
return AndroidActionOutput()
|
||||
|
||||
async def run_reflect(
|
||||
|
|
@ -233,5 +239,8 @@ class SelfLearnAndReflect(Action):
|
|||
doc_content = DocContent()
|
||||
setattr(doc_content, self.act_name, doc)
|
||||
doc_path.write_text(str(doc_content))
|
||||
|
||||
print("反思阶段结束")
|
||||
return AndroidActionOutput(data={"last_act": last_act})
|
||||
|
||||
# TODO 如何处理 FINISH 状态,这一点应该需要与role 联动才能解决
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
tap='The UI element opens the app drawer, displaying a list of all the apps available on the device.' text='' v_swipe='' h_swipe='' long_press=''
|
||||
|
|
@ -0,0 +1 @@
|
|||
{'tap': '[CONTENT]\n{\n "Observation": "The first image shows a mobile phone\'s home screen with various app icons, while the second image displays an app drawer with a grid of app icons and a search bar at the top. The transition from the home screen to the app drawer occurred after interacting with the UI element.",\n "Thought": "The action taken on the UI element results in revealing a comprehensive list of apps installed on the device, suggesting that the UI element serves as a gateway to access all applications.",\n "Description": "Tapping this UI element opens the app drawer, displaying a list of all installed applications."\n}\n[/CONTENT]', 'text': '', 'v_swipe': '', 'h_swipe': '', 'long_press': ''}
|
||||
|
|
@ -0,0 +1 @@
|
|||
{'tap': '[CONTENT]\n{\n "Observation": "The first image shows an \'Edit contact\' screen with various fields and options for inputting and editing contact information. The UI element labeled with the number 2 is a checkmark icon located in the top right corner. The second image displays a confirmation of the contact details with the same checkmark icon at the top right, suggesting that the contact has been saved or updated.",\n "Thought": "Given the context and the common design patterns in user interfaces, tapping the checkmark icon typically indicates an action to save or confirm the changes made to the information on the screen.",\n "Description": "Tapping this UI element saves or confirms the changes made to the contact information."\n}\n[/CONTENT]', 'text': '', 'v_swipe': '', 'h_swipe': '', 'long_press': ''}
|
||||
|
|
@ -0,0 +1 @@
|
|||
{'tap': '[CONTENT]\n{\n "Observation": "The first image shows a mobile screen with a dialog box asking if the user wants to switch to editing the selected contact, with the information entered so far to be copied. The second image displays the contact details filled in, indicating that the user has chosen to edit the contact with the information that was previously entered.",\n "Thought": "Tapping the UI element prompts the app to transition from a confirmation dialog to the contact editing interface, carrying over any previously entered information.",\n "Description": "Tapping this UI element confirms the user\'s intent to edit the selected contact with the information already entered."\n}\n[/CONTENT]', 'text': '', 'v_swipe': '', 'h_swipe': '', 'long_press': ''}
|
||||
|
|
@ -0,0 +1 @@
|
|||
{'tap': '[CONTENT]\n{\n "Observation": "The first image shows a mobile app interface for adding a new contact with various fields such as Name, Phone, and Email. The second image shows the same interface, but with a keyboard displayed, indicating that a text field has been selected for input.",\n "Thought": "The appearance of the keyboard in the second image suggests that the UI element in question is a text input field, which when tapped, allows the user to enter information.",\n "Description": "Tapping this UI element allows the user to enter text into the Name field of a new contact."\n}\n[/CONTENT]', 'text': '[CONTENT]\n{\n "Observation": "The first image shows a mobile app interface for adding a new contact with an empty input field for the contact\'s name. The second image displays the same interface after the user has entered text into the name input field, as well as additional information in other fields.",\n "Thought": "The user has interacted with the UI element by inputting text, which indicates that the UI element is designed to accept and display user-entered information.",\n "Description": "The UI element is a text input field used for entering the name of a new contact."\n}\n[/CONTENT]', 'v_swipe': '', 'h_swipe': '', 'long_press': ''}
|
||||
|
|
@ -0,0 +1 @@
|
|||
{'tap': '[CONTENT]\n{\n "Observation": "The first image shows a mobile app\'s contact list screen with a blue floating action button at the bottom right corner. The second image displays an \'Add new contact\' screen with various fields for entering contact information such as name, phone, email, and more.",\n "Thought": "Tapping the UI element in the first image triggers the transition to the \'Add new contact\' screen as seen in the second image.",\n "Description": "Tapping this area will open the interface for adding a new contact to the user\'s address book."\n}\n[/CONTENT]', 'text': '', 'v_swipe': '', 'h_swipe': '', 'long_press': ''}
|
||||
|
|
@ -0,0 +1 @@
|
|||
{'tap': '[CONTENT]\n{\n "Observation": "The first image shows a grid of app icons on a mobile device, each with a numeric tag. The second image displays a contact list on the same device, indicating that the UI element previously tagged with the number 9 has been activated.",\n "Thought": "Tapping the UI element labeled with the number 9 in the first image has resulted in the opening of the contacts application, which is evident from the transition to the contact list in the second image.",\n "Description": "Tapping this UI element opens the contacts application, displaying the user\'s contact list."\n}\n[/CONTENT]', 'text': '', 'v_swipe': '', 'h_swipe': '', 'long_press': ''}
|
||||
|
|
@ -0,0 +1 @@
|
|||
stop
|
||||
|
|
@ -0,0 +1 @@
|
|||
Create a contact in Contacts App named zjy with a phone number +86 18831933368
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1 @@
|
|||
Create a contact in Contacts App named zjy with a phone number +86 18831933368
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1 @@
|
|||
Create a contact in Contacts App named zjy with a phone number +86 18831933368
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -1,10 +1,9 @@
|
|||
tap(9):::android.view.ViewGroup_1067_236_android.widget.TextView_183_204_Apps_2
|
||||
tap(9):::com.android.launcher3.id_apps_list_view_com.android.launcher3.id_icon_Contacts_7
|
||||
tap(6):::com.android.contacts.id_floating_action_button_container_com.android.contacts.id_floating_action_button_addnewcontact_0
|
||||
text(4:sep:"zr"):::com.android.contacts.id_editors_android.widget.EditText_775_142_0
|
||||
tap(4):::com.android.contacts.id_editors_android.widget.EditText_775_142_0
|
||||
text(4:sep:"zr"):::com.android.contacts.id_editors_android.widget.EditText_775_142_0
|
||||
text(4:sep:"zjy"):::com.android.contacts.id_editors_android.widget.EditText_775_142_0
|
||||
tap(6):::com.android.contacts.id_editors_android.widget.EditText_775_142_0
|
||||
text(6:sep:"+86 15231955333"):::com.android.contacts.id_editors_android.widget.EditText_775_142_0
|
||||
tap(2):::android.widget.LinearLayout_126_147_com.android.contacts.id_menu_save_Save_0
|
||||
tap(2):::android.widget.LinearLayout_756_164_android.id_button1_1
|
||||
tap(2):::android.widget.LinearLayout_231_147_com.android.contacts.id_menu_save_Save_0
|
||||
stop
|
||||
|
|
@ -0,0 +1 @@
|
|||
Create a contact in Contacts App named zjy with a phone number +86 18831933368
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1 @@
|
|||
<?xml version='1.0' encoding='UTF-8' standalone='yes' ?><hierarchy rotation="0"><node index="0" text="" resource-id="" class="android.widget.FrameLayout" package="com.android.contacts" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" bounds="[120,943][960,1393]"><node index="0" text="" resource-id="" class="android.widget.FrameLayout" package="com.android.contacts" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" bounds="[162,985][918,1351]"><node index="0" text="" resource-id="android:id/content" class="android.widget.FrameLayout" package="com.android.contacts" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" bounds="[162,985][918,1351]"><node index="0" text="" resource-id="android:id/parentPanel" class="android.widget.LinearLayout" package="com.android.contacts" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" bounds="[162,985][918,1351]"><node index="0" text="" resource-id="android:id/contentPanel" class="android.widget.FrameLayout" package="com.android.contacts" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" bounds="[162,985][918,1187]"><node index="0" text="" resource-id="android:id/scrollView" class="android.widget.ScrollView" package="com.android.contacts" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" bounds="[162,985][918,1187]"><node index="0" text="" resource-id="" class="android.widget.LinearLayout" package="com.android.contacts" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" bounds="[162,985][918,1187]"><node index="0" text="" resource-id="android:id/textSpacerNoTitle" class="android.view.View" package="com.android.contacts" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" bounds="[162,985][918,1032]" /><node index="1" text="Switch to editing the selected contact? Information you entered so far will be copied." resource-id="android:id/message" class="android.widget.TextView" package="com.android.contacts" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" bounds="[162,1032][918,1187]" /></node></node></node><node index="1" text="" resource-id="android:id/buttonPanel" class="android.widget.ScrollView" package="com.android.contacts" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="true" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" bounds="[162,1187][918,1351]"><node index="0" text="" resource-id="" class="android.widget.LinearLayout" package="com.android.contacts" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" bounds="[162,1187][918,1351]"><node index="0" text="CANCEL" resource-id="android:id/button2" class="android.widget.Button" package="com.android.contacts" content-desc="" checkable="false" checked="false" clickable="true" enabled="true" focusable="true" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" bounds="[516,1198][718,1340]" /><node index="1" text="OK" resource-id="android:id/button1" class="android.widget.Button" package="com.android.contacts" content-desc="" checkable="false" checked="false" clickable="true" enabled="true" focusable="true" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" bounds="[718,1198][886,1340]" /></node></node></node></node></node></node></hierarchy>
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -1,5 +0,0 @@
|
|||
{"step": 1, "prompt": "I will give you the screenshot of a mobile app before and after tapping the UI element labeled \nwith the number 9 on the screen. The numeric tag of each element is located at the center of the element. \nTapping this UI element is a necessary part of proceeding with a larger task, which is to create a new contact in Contacts app named zr ,with a phone number +86 15231955333. Your task is to \ndescribe the functionality of the UI element concisely in one or two sentences. Notice that your description of the UI \nelement should focus on the general function. For example, if the UI element is used to navigate to the chat window \nwith John, your description should not include the name of the specific person. Just say: \"Tapping this area will \nnavigate the user to the chat window\". Never include the numeric tag of the UI element in your description. You can use \npronouns such as \"the UI element\" to refer to the element.", "image_before": "demo_Contacts_2024-01-30_21-50-19_1.png", "image_after": "demo_Contacts_2024-01-30_21-50-19_2.png", "response": {"id": "chatcmpl-8miqk5n21ZtIdridhvSQyTZUzVel9", "choices": [{"finish_reason": "stop", "index": 0, "logprobs": null, "message": {"content": "Tapping this UI element opens the app drawer, displaying a list of all the apps installed on the device.", "role": "assistant", "function_call": null, "tool_calls": null}}], "created": 1706622838, "model": "gpt-4-1106-vision-preview", "object": "chat.completion", "system_fingerprint": null, "usage": {"completion_tokens": 22, "prompt_tokens": 3101, "total_tokens": 3123}}}
|
||||
{"step": 2, "prompt": "I will give you the screenshot of a mobile app before and after tapping the UI element labeled \nwith the number 9 on the screen. The numeric tag of each element is located at the center of the element. \nTapping this UI element is a necessary part of proceeding with a larger task, which is to create a new contact in Contacts app named zr ,with a phone number +86 15231955333. Your task is to \ndescribe the functionality of the UI element concisely in one or two sentences. Notice that your description of the UI \nelement should focus on the general function. For example, if the UI element is used to navigate to the chat window \nwith John, your description should not include the name of the specific person. Just say: \"Tapping this area will \nnavigate the user to the chat window\". Never include the numeric tag of the UI element in your description. You can use \npronouns such as \"the UI element\" to refer to the element.", "image_before": "demo_Contacts_2024-01-30_21-50-19_2.png", "image_after": "demo_Contacts_2024-01-30_21-50-19_3.png", "response": {"id": "chatcmpl-8mirFP7if9MJFST6hNhwTAwS3fSrz", "choices": [{"finish_reason": "stop", "index": 0, "logprobs": null, "message": {"content": "Tapping this UI element will open the Contacts application.", "role": "assistant", "function_call": null, "tool_calls": null}}], "created": 1706622869, "model": "gpt-4-1106-vision-preview", "object": "chat.completion", "system_fingerprint": null, "usage": {"completion_tokens": 11, "prompt_tokens": 3101, "total_tokens": 3112}}}
|
||||
{"step": 3, "prompt": "I will give you the screenshot of a mobile app before and after tapping the UI element labeled \nwith the number 6 on the screen. The numeric tag of each element is located at the center of the element. \nTapping this UI element is a necessary part of proceeding with a larger task, which is to create a new contact in Contacts app named zr ,with a phone number +86 15231955333. Your task is to \ndescribe the functionality of the UI element concisely in one or two sentences. Notice that your description of the UI \nelement should focus on the general function. For example, if the UI element is used to navigate to the chat window \nwith John, your description should not include the name of the specific person. Just say: \"Tapping this area will \nnavigate the user to the chat window\". Never include the numeric tag of the UI element in your description. You can use \npronouns such as \"the UI element\" to refer to the element.", "image_before": "demo_Contacts_2024-01-30_21-50-19_3.png", "image_after": "demo_Contacts_2024-01-30_21-50-19_4.png", "response": {"id": "chatcmpl-8mirf3RakbtpZK0zfvJjdXJ48rYNJ", "choices": [{"finish_reason": "stop", "index": 0, "logprobs": null, "message": {"content": "Tapping this UI element allows the user to add a new contact to their contact list.", "role": "assistant", "function_call": null, "tool_calls": null}}], "created": 1706622895, "model": "gpt-4-1106-vision-preview", "object": "chat.completion", "system_fingerprint": null, "usage": {"completion_tokens": 18, "prompt_tokens": 3101, "total_tokens": 3119}}}
|
||||
{"step": 4, "prompt": "I will give you the screenshot of a mobile app before and after typing in the input area labeled\nwith the number 4 on the screen. The numeric tag of each element is located at the center of the element. \nTyping in this UI element is a necessary part of proceeding with a larger task, which is to create a new contact in Contacts app named zr ,with a phone number +86 15231955333. Your task is \nto describe the functionality of the UI element concisely in one or two sentences. Notice that your description of the \nUI element should focus on the general function. For example, if the change of the screenshot shows that the user typed \n\"How are you?\" in the chat box, you do not need to mention the actual text. Just say: \"This input area is used for the \nuser to type a message to send to the chat window.\". Never include the numeric tag of the UI element in your \ndescription. You can use pronouns such as \"the UI element\" to refer to the element.", "image_before": "demo_Contacts_2024-01-30_21-50-19_4.png", "image_after": "demo_Contacts_2024-01-30_21-50-19_5.png", "response": {"id": "chatcmpl-8mis5yw6Dt9iqFvUBfyKyThUpUBIR", "choices": [{"finish_reason": "stop", "index": 0, "logprobs": null, "message": {"content": "The UI element is used for the user to enter the name of a new contact in the Contacts app.", "role": "assistant", "function_call": null, "tool_calls": null}}], "created": 1706622921, "model": "gpt-4-1106-vision-preview", "object": "chat.completion", "system_fingerprint": null, "usage": {"completion_tokens": 21, "prompt_tokens": 3112, "total_tokens": 3133}}}
|
||||
{"step": 5, "prompt": "I will give you the screenshot of a mobile app before and after tapping the UI element labeled \nwith the number 4 on the screen. The numeric tag of each element is located at the center of the element. \nTapping this UI element is a necessary part of proceeding with a larger task, which is to create a new contact in Contacts app named zr ,with a phone number +86 15231955333. Your task is to \ndescribe the functionality of the UI element concisely in one or two sentences. Notice that your description of the UI \nelement should focus on the general function. For example, if the UI element is used to navigate to the chat window \nwith John, your description should not include the name of the specific person. Just say: \"Tapping this area will \nnavigate the user to the chat window\". Never include the numeric tag of the UI element in your description. You can use \npronouns such as \"the UI element\" to refer to the element.", "image_before": "demo_Contacts_2024-01-30_21-50-19_5.png", "image_after": "demo_Contacts_2024-01-30_21-50-19_6.png", "response": {"id": "chatcmpl-8misV60JHJEblfhdkseEPxtj5sqqi", "choices": [{"finish_reason": "stop", "index": 0, "logprobs": null, "message": {"content": "Tapping this UI element allows the user to enter a name for the new contact.", "role": "assistant", "function_call": null, "tool_calls": null}}], "created": 1706622947, "model": "gpt-4-1106-vision-preview", "object": "chat.completion", "system_fingerprint": null, "usage": {"completion_tokens": 17, "prompt_tokens": 3101, "total_tokens": 3118}}}
|
||||
|
|
@ -1 +0,0 @@
|
|||
create a new contact in Contacts app named zr ,with a phone number +86 15231955333
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -1 +0,0 @@
|
|||
{'tap': 'Tapping this UI element opens the app drawer, displaying a list of all the apps installed on the device.', 'text': '', 'v_swipe': '', 'h_swipe': '', 'long_press': ''}
|
||||
|
|
@ -1 +0,0 @@
|
|||
{'tap': 'Tapping this UI element saves the new contact information that has been entered into the Contacts app.', 'text': '', 'v_swipe': '', 'h_swipe': '', 'long_press': ''}
|
||||
|
|
@ -1 +0,0 @@
|
|||
{'tap': 'Tapping this UI element allows the user to enter a name for the new contact.', 'text': 'The UI element is used for the user to enter the name of a new contact in the Contacts app.', 'v_swipe': '', 'h_swipe': '', 'long_press': ''}
|
||||
|
|
@ -1 +0,0 @@
|
|||
{'tap': 'Tapping this UI element allows the user to add a new contact to their contact list.', 'text': '', 'v_swipe': '', 'h_swipe': '', 'long_press': ''}
|
||||
|
|
@ -1 +0,0 @@
|
|||
{'tap': 'Tapping this UI element will open the Contacts application.', 'text': '', 'v_swipe': '', 'h_swipe': '', 'long_press': ''}
|
||||
|
|
@ -10,9 +10,10 @@ from actions.self_learn_and_reflect import SelfLearnAndReflect
|
|||
from metagpt.environment.android_env.android_env import AndroidEnv
|
||||
|
||||
TASK_PATH = Path("apps/Contacts")
|
||||
DOC_PATH = TASK_PATH.joinpath("docs")
|
||||
DEMO_NAME = str(time.time())
|
||||
# TODO Test for Self Learning、
|
||||
SELF_EXPLORE_DOC_PATH = TASK_PATH.joinpath("autodocs")
|
||||
PARSE_RECORD_DOC_PATH = TASK_PATH.joinpath("demodocs")
|
||||
|
||||
test_env_self_learn_android = AndroidEnv(
|
||||
device_id="emulator-5554",
|
||||
xml_dir=Path("/sdcard"),
|
||||
|
|
@ -20,7 +21,6 @@ test_env_self_learn_android = AndroidEnv(
|
|||
)
|
||||
test_self_learning = SelfLearnAndReflect()
|
||||
|
||||
# TODO Test for Manual Learning
|
||||
test_env_manual_learn_android = AndroidEnv(
|
||||
device_id="emulator-5554",
|
||||
xml_dir=Path("/sdcard"),
|
||||
|
|
@ -34,33 +34,37 @@ test_manual_parse = ParseRecord()
|
|||
|
||||
if __name__ == "__main__":
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
# loop.run_until_complete(
|
||||
loop.run_until_complete(
|
||||
test_manual_parse.run(
|
||||
app_name="Contacts",
|
||||
demo_name="1708753998.5757847",
|
||||
task_dir=TASK_PATH / "demos" / f"manual_record_1708753998.5757847", # 修要修改
|
||||
docs_dir=PARSE_RECORD_DOC_PATH, # 需要修改
|
||||
env=test_env_manual_learn_android
|
||||
))
|
||||
# test_action_list = [
|
||||
# # test_self_learning.run(
|
||||
# # round_count=20,
|
||||
# # task_desc="Create a contact in Contacts App named zjy with a phone number +86 18831933368 ",
|
||||
# # last_act="",
|
||||
# # task_dir= TASK_PATH / "demos" / f"self_learning_{DEMO_NAME}",
|
||||
# # docs_dir=DOC_PATH,
|
||||
# # env=test_env_self_learn_android
|
||||
# # ),
|
||||
# test_manual_record.run(
|
||||
# demo_name=DEMO_NAME,
|
||||
# task_dir=TASK_PATH / "demos" / f"manual_record_{DEMO_NAME}",
|
||||
# task_desc="Create a contact in Contacts App named zjy with a phone number +86 18831933368 ",
|
||||
# task_dir=TASK_PATH,
|
||||
# env=test_env_manual_learn_android
|
||||
# ),
|
||||
# test_manual_parse.run(
|
||||
# app_name="Contacts",
|
||||
# demo_name=DEMO_NAME,
|
||||
# task_dir=TASK_PATH / "demos" / f"manual_record_{DEMO_NAME}", # 修要修改
|
||||
# docs_dir=PARSE_RECORD_DOC_PATH, # 需要修改
|
||||
# env=test_env_manual_learn_android
|
||||
# )
|
||||
# )
|
||||
|
||||
test_action_list = [
|
||||
test_self_learning.run(
|
||||
round_count=20,
|
||||
task_desc="Create a contact in Contacts App named zjy with a phone number +86 18831933368 ",
|
||||
last_act="",
|
||||
task_dir=TASK_PATH,
|
||||
docs_dir=DOC_PATH,
|
||||
env=test_env_self_learn_android
|
||||
),
|
||||
test_manual_record.run(
|
||||
demo_name=DEMO_NAME,
|
||||
task_dir=TASK_PATH,
|
||||
task_desc="Create a contact in Contacts App named zjy with a phone number +86 18831933368 ",
|
||||
env=test_env_manual_learn_android
|
||||
)
|
||||
]
|
||||
|
||||
# ]
|
||||
# test_action_list = [
|
||||
# test_self_learning.run(
|
||||
# round_count=20,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue