From 5300bca6ca56f086190dd671dc28ed70ae01b422 Mon Sep 17 00:00:00 2001 From: better629 Date: Tue, 2 Apr 2024 15:42:07 +0800 Subject: [PATCH] fix missing last_act value --- .../ext/android_assistant/actions/screenshot_parse.py | 9 +++++---- .../ext/android_assistant/prompts/assistant_prompt.py | 4 ++-- metagpt/ext/android_assistant/roles/android_assistant.py | 5 +++-- metagpt/ext/android_assistant/utils/schema.py | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/metagpt/ext/android_assistant/actions/screenshot_parse.py b/metagpt/ext/android_assistant/actions/screenshot_parse.py index e55d56f0a..2004aa85f 100644 --- a/metagpt/ext/android_assistant/actions/screenshot_parse.py +++ b/metagpt/ext/android_assistant/actions/screenshot_parse.py @@ -56,9 +56,9 @@ class ScreenshotParse(Action): return "" ui_doc = """ - You also have access to the following documentations that describes the functionalities of UI - elements you can interact on the screen. These docs are crucial for you to determine the target of your - next action. You should always prioritize these documented elements for interaction:""" +You also have access to the following documentations that describes the functionalities of UI +elements you can interact on the screen. These docs are crucial for you to determine the target of your +next action. You should always prioritize these documented elements for interaction: """ for i, elem in enumerate(elem_list): doc_path = docs_idr.joinpath(f"{elem.uid}.txt") if not doc_path.exists(): @@ -157,6 +157,7 @@ class ScreenshotParse(Action): if op_param.param_state == RunState.FAIL: return AndroidActionOutput(action_state=RunState.FAIL) + last_act = op_param.last_act if isinstance(op_param, TapOpParam): x, y = elem_bbox_to_xy(elem_list[op_param.area - 1].bbox) action = EnvAction(action_type=EnvActionType.SYSTEM_TAP, coord=(x, y)) @@ -199,4 +200,4 @@ class ScreenshotParse(Action): if op_param.act_name != "grid": grid_on = False - return AndroidActionOutput(data={"grid_on": grid_on}) + return AndroidActionOutput(data={"grid_on": grid_on, "last_act": last_act}) diff --git a/metagpt/ext/android_assistant/prompts/assistant_prompt.py b/metagpt/ext/android_assistant/prompts/assistant_prompt.py index c0dc6f22f..34baf5841 100644 --- a/metagpt/ext/android_assistant/prompts/assistant_prompt.py +++ b/metagpt/ext/android_assistant/prompts/assistant_prompt.py @@ -39,7 +39,7 @@ other elements with numeric tags cannot help with the task. The function will br smartphone screen into small areas and this will give you more freedom to choose any part of the screen to tap, long press, or swipe. {ui_document} -The task you need to complete is to {task_description}. Your past actions to proceed with this task are summarized as +The task you need to complete is to: {task_description}. Your past actions to proceed with this task are summarized as follows: {last_act} Now, given the documentation and the following labeled screenshot, you need to think and call the function needed to proceed with the task. Your output should include three parts in the given format: @@ -78,7 +78,7 @@ bottom, and bottom-right. A simple use case can be swipe(21, "center", 25, "right"), which performs a swipe starting from the center of grid area 21 to the right part of grid area 25. -The task you need to complete is to {task_description}. Your past actions to proceed with this task are summarized as +The task you need to complete is to: {task_description}. Your past actions to proceed with this task are summarized as follows: {last_act} Now, given the following labeled screenshot, you need to think and call the function needed to proceed with the task. Your output should include three parts in the given format: diff --git a/metagpt/ext/android_assistant/roles/android_assistant.py b/metagpt/ext/android_assistant/roles/android_assistant.py index 531c82261..8abdbab59 100644 --- a/metagpt/ext/android_assistant/roles/android_assistant.py +++ b/metagpt/ext/android_assistant/roles/android_assistant.py @@ -30,7 +30,7 @@ class AndroidAssistant(Role): task_desc: str = "" round_count: int = 0 - last_act: str = "" + last_act: str = "None" output_root_dir: Optional[Path] = Field(default=None) task_dir: Optional[Path] = Field(default=None) docs_dir: Optional[Path] = Field(default=None) @@ -131,7 +131,8 @@ class AndroidAssistant(Role): ) if resp.action_state == RunState.SUCCESS: logger.info(f"grid_on: {resp.data.get('grid_on')}") - self.grid_on = resp.data.get("grid_on") + self.grid_on = resp.data.get("grid_on", False) + self.last_act = resp.data.get("last_act", "None") msg = Message( content=f"RoundCount: {self.round_count}, action_state: {resp.action_state}", role=self.profile, diff --git a/metagpt/ext/android_assistant/utils/schema.py b/metagpt/ext/android_assistant/utils/schema.py index 661f4abf4..c066f98b6 100644 --- a/metagpt/ext/android_assistant/utils/schema.py +++ b/metagpt/ext/android_assistant/utils/schema.py @@ -92,7 +92,7 @@ class RunState(Enum): class BaseOpParam(BaseModel): act_name: str = Field(default="", validate_default=True) - last_act: str = Field(default="") + last_act: str = Field(default="None") param_state: RunState = Field(default=RunState.SUCCESS, description="return state when extract params")