fix missing last_act value

This commit is contained in:
better629 2024-04-02 15:42:07 +08:00
parent 0ea6d7cc2d
commit 5300bca6ca
4 changed files with 11 additions and 9 deletions

View file

@ -56,9 +56,9 @@ class ScreenshotParse(Action):
return ""
ui_doc = """
You also have access to the following documentations that describes the functionalities of UI
elements you can interact on the screen. These docs are crucial for you to determine the target of your
next action. You should always prioritize these documented elements for interaction:"""
You also have access to the following documentations that describes the functionalities of UI
elements you can interact on the screen. These docs are crucial for you to determine the target of your
next action. You should always prioritize these documented elements for interaction: """
for i, elem in enumerate(elem_list):
doc_path = docs_idr.joinpath(f"{elem.uid}.txt")
if not doc_path.exists():
@ -157,6 +157,7 @@ class ScreenshotParse(Action):
if op_param.param_state == RunState.FAIL:
return AndroidActionOutput(action_state=RunState.FAIL)
last_act = op_param.last_act
if isinstance(op_param, TapOpParam):
x, y = elem_bbox_to_xy(elem_list[op_param.area - 1].bbox)
action = EnvAction(action_type=EnvActionType.SYSTEM_TAP, coord=(x, y))
@ -199,4 +200,4 @@ class ScreenshotParse(Action):
if op_param.act_name != "grid":
grid_on = False
return AndroidActionOutput(data={"grid_on": grid_on})
return AndroidActionOutput(data={"grid_on": grid_on, "last_act": last_act})

View file

@ -39,7 +39,7 @@ other elements with numeric tags cannot help with the task. The function will br
smartphone screen into small areas and this will give you more freedom to choose any part of the screen to tap, long
press, or swipe.
{ui_document}
The task you need to complete is to {task_description}. Your past actions to proceed with this task are summarized as
The task you need to complete is to: {task_description}. Your past actions to proceed with this task are summarized as
follows: {last_act}
Now, given the documentation and the following labeled screenshot, you need to think and call the function needed to
proceed with the task. Your output should include three parts in the given format:
@ -78,7 +78,7 @@ bottom, and bottom-right.
A simple use case can be swipe(21, "center", 25, "right"), which performs a swipe starting from the center of grid area
21 to the right part of grid area 25.
The task you need to complete is to {task_description}. Your past actions to proceed with this task are summarized as
The task you need to complete is to: {task_description}. Your past actions to proceed with this task are summarized as
follows: {last_act}
Now, given the following labeled screenshot, you need to think and call the function needed to proceed with the task.
Your output should include three parts in the given format:

View file

@ -30,7 +30,7 @@ class AndroidAssistant(Role):
task_desc: str = ""
round_count: int = 0
last_act: str = ""
last_act: str = "None"
output_root_dir: Optional[Path] = Field(default=None)
task_dir: Optional[Path] = Field(default=None)
docs_dir: Optional[Path] = Field(default=None)
@ -131,7 +131,8 @@ class AndroidAssistant(Role):
)
if resp.action_state == RunState.SUCCESS:
logger.info(f"grid_on: {resp.data.get('grid_on')}")
self.grid_on = resp.data.get("grid_on")
self.grid_on = resp.data.get("grid_on", False)
self.last_act = resp.data.get("last_act", "None")
msg = Message(
content=f"RoundCount: {self.round_count}, action_state: {resp.action_state}",
role=self.profile,

View file

@ -92,7 +92,7 @@ class RunState(Enum):
class BaseOpParam(BaseModel):
act_name: str = Field(default="", validate_default=True)
last_act: str = Field(default="")
last_act: str = Field(default="None")
param_state: RunState = Field(default=RunState.SUCCESS, description="return state when extract params")