From 9a7b4df20340f0e9fdb4fdf9709d8632609a0c79 Mon Sep 17 00:00:00 2001 From: lidanyang Date: Tue, 16 Jul 2024 17:43:31 +0800 Subject: [PATCH 1/8] add experience examples for scraping task without using browser --- metagpt/strategy/experience_retriever.py | 75 ++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 6 deletions(-) diff --git a/metagpt/strategy/experience_retriever.py b/metagpt/strategy/experience_retriever.py index 04807ebec..f50a90e1c 100644 --- a/metagpt/strategy/experience_retriever.py +++ b/metagpt/strategy/experience_retriever.py @@ -638,8 +638,10 @@ class KeywordExpRetriever(ExpRetriever): return DEPLOY_EXAMPLE elif "issue" in context.lower(): return FIX_ISSUE_EXAMPLE - elif "https:" or "http:" in context.lower(): - return WEB_SCRAPING_EXAMPLE + elif "https:" in context.lower() or "http:" in context.lower(): + if "search" in context.lower() or "click" in context.lower(): + return WEB_SCRAPING_EXAMPLE + return WEB_SCRAPING_EXAMPLE_SIMPLE elif exp_type == "task": if "diagnose" in context.lower(): return SEARCH_SYMBOL_EXAMPLE @@ -916,7 +918,7 @@ Explanation: The requirement is to scrape data from a website and extract inform "task_id": "1", "dependent_task_ids": [], "instruction": "Navigate to the yelp website.", - "assignee": "Browser" + "assignee": "David" } }, { @@ -925,7 +927,7 @@ Explanation: The requirement is to scrape data from a website and extract inform "task_id": "2", "dependent_task_ids": ["1"], "instruction": "Search for restaurants with the keyword 'beef'.", - "assignee": "Browser" + "assignee": "David" } }, { @@ -934,7 +936,7 @@ Explanation: The requirement is to scrape data from a website and extract inform "task_id": "3", "dependent_task_ids": ["2"], "instruction": "View the html content of the search result page before scrap data to understand the structure.", - "assignee": "DataAnalyst" + "assignee": "David" } }, { @@ -943,7 +945,7 @@ Explanation: The requirement is to scrape data from a website and extract inform "task_id": "4", "dependent_task_ids": ["3"], "instruction": "Parse the html content to scrape the restaurant names and print it.", - "assignee": "DataAnalyst" + "assignee": "David" } } ] @@ -1020,3 +1022,64 @@ Here is the command to finish the current task and parse the html content: ... """ + + +WEB_SCRAPING_EXAMPLE_SIMPLE = """ +## action 1 +User Requirement: List the restaurant names on the website https://www.yelp.com/search?find_desc=beef&find_loc=New+York%2C+NY. +Explanation: The requirement is to scrape data from a website and extract information about restaurants. The process involves retrieving and presenting the data in a structured format. + +```json +[ + { + "command_name": "Plan.append_task", + "args": { + "task_id": "1", + "dependent_task_ids": [], + "instruction": "View the html content of the page before scrap data to understand the structure.", + "assignee": "David" + } + }, + { + "command_name": "Plan.append_task", + "args": { + "task_id": "2", + "dependent_task_ids": ["3"], + "instruction": "Parse the html content to scrape the restaurant names and print it.", + "assignee": "David" + } + } +] +``` + +## action 2 +Explanation: To scrap data from the website, I will first view the html content of the page. +Here is the command to view the html content: + +```json +[ + { + "command_name": "DataAnalyst.write_and_exec_code", + "args": {} + } +] +``` + +## action 3 +Explanation: Since the DataAnalyst has successfully viewed the html content of the page, I will finish the current task and then write code to parse the html content and extract the restaurant names. +Here is the command to finish the current task and parse the html content: + +```json +[ + { + "command_name": "Plan.finish_current_task", + "args": {} + }, + { + "command_name": "DataAnalyst.write_and_exec_code", + "args": {} + } +] +``` +... +""" From 5f1bb59f1e177d18fba9137af8e8ffd947658477 Mon Sep 17 00:00:00 2001 From: lidanyang Date: Tue, 16 Jul 2024 19:02:00 +0800 Subject: [PATCH 2/8] keep len of web view content --- metagpt/actions/di/execute_nb_code.py | 2 ++ metagpt/tools/libs/browser.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/metagpt/actions/di/execute_nb_code.py b/metagpt/actions/di/execute_nb_code.py index f3dfd1601..b3bf4fcb8 100644 --- a/metagpt/actions/di/execute_nb_code.py +++ b/metagpt/actions/di/execute_nb_code.py @@ -193,6 +193,8 @@ class ExecuteNbCode(Action): # the useful information of normal output is at the begining. if '' not in output_text: output_text = output_text[:keep_len] if is_success else output_text[-keep_len:] + else: + output_text = output_text[:20000] if is_success else output_text[-keep_len:] parsed_output.append(output_text) return is_success, ",".join(parsed_output) diff --git a/metagpt/tools/libs/browser.py b/metagpt/tools/libs/browser.py index 864996e8c..8736ab98c 100644 --- a/metagpt/tools/libs/browser.py +++ b/metagpt/tools/libs/browser.py @@ -161,7 +161,7 @@ class Browser: await self._wait_until_page_idle(page) self.accessibility_tree = await get_accessibility_tree(page) await self.reporter.async_report(page, "page") - return f"SUCCESS, URL: {page.url}" + return f"SUCCESS, URL: {page.url} have been loaded." def _register_page_event(self, page: Page): page.last_busy_time = time.time() @@ -196,9 +196,9 @@ class Browser: async def _on_frame_change(self, frame: Frame): await self._update_page_last_busy_time(frame.page) - async def view(self): + async def view(self, keep_len: int = 10000): observation = parse_accessibility_tree(self.accessibility_tree) - return f"Current Browser Viewer\n URL: {self.page.url}\nOBSERVATION:\n{observation[0]}\n" + return f"Current Browser Viewer\n URL: {self.page.url}\nOBSERVATION:\n{observation[0][:keep_len]}\n" async def __aenter__(self): await self.start() From 55d89f740c3c83ab0b5292e9432cd1caf900a283 Mon Sep 17 00:00:00 2001 From: lidanyang Date: Tue, 16 Jul 2024 19:02:36 +0800 Subject: [PATCH 3/8] refine prompt --- metagpt/prompts/di/role_zero.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index b642df9c7..2e32e9325 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -7,6 +7,8 @@ Note: 1. If you keeping encountering errors, unexpected situation, or you are not sure of proceeding, use RoleZero.ask_human to ask for help. 2. Carefully review your progress at the current task, if your actions so far has not fulfilled the task instruction, you should continue with current task. Otherwise, finish current task by Plan.finish_current_task explicitly. 3. Each time you finish a task, use RoleZero.reply_to_human to report your progress. +4. Don't forget to append task first when all existing tasks are finished and new tasks are required. +5. Don't repeat the same command if it has been executed successfully. """ CMD_PROMPT = """ From 91997aba5a5134acca9b9990824424c4cf51c39b Mon Sep 17 00:00:00 2001 From: lidanyang Date: Tue, 16 Jul 2024 19:03:57 +0800 Subject: [PATCH 4/8] default for task_type --- metagpt/schema.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/metagpt/schema.py b/metagpt/schema.py index 94e64d7fa..18fd7b013 100644 --- a/metagpt/schema.py +++ b/metagpt/schema.py @@ -669,7 +669,14 @@ class Plan(BaseModel): """ return [task for task in self.tasks if task.is_finished] - def append_task(self, task_id: str, dependent_task_ids: list[str], instruction: str, assignee: str, task_type: str): + def append_task( + self, + task_id: str, + dependent_task_ids: list[str], + instruction: str, + assignee: str, + task_type: str = "" + ): """Append a new task with task_id (number) to the end of existing task sequences. If dependent_task_ids is not empty, the task will depend on the tasks with the ids in the list.""" new_task = Task( task_id=task_id, From 5a309005c480c81db958404eb5b1e9e9075f76bf Mon Sep 17 00:00:00 2001 From: lidanyang Date: Wed, 17 Jul 2024 15:16:24 +0800 Subject: [PATCH 5/8] refine prompt --- metagpt/roles/di/data_analyst.py | 2 ++ metagpt/strategy/experience_retriever.py | 2 +- metagpt/strategy/planner.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/metagpt/roles/di/data_analyst.py b/metagpt/roles/di/data_analyst.py index 2b1bb10b1..c52127527 100644 --- a/metagpt/roles/di/data_analyst.py +++ b/metagpt/roles/di/data_analyst.py @@ -105,5 +105,7 @@ class DataAnalyst(RoleZero): status = 'Success' if success else 'Failed' output = CODE_STATUS.format(code=code, status=status, result=result) + if success: + output += 'The code written has been executed successfully.' self.rc.working_memory.clear() return output diff --git a/metagpt/strategy/experience_retriever.py b/metagpt/strategy/experience_retriever.py index 5f1c443c9..f667f03a5 100644 --- a/metagpt/strategy/experience_retriever.py +++ b/metagpt/strategy/experience_retriever.py @@ -1035,7 +1035,7 @@ Explanation: The requirement is to scrape data from a website and extract inform "command_name": "Plan.append_task", "args": { "task_id": "2", - "dependent_task_ids": ["3"], + "dependent_task_ids": ["1"], "instruction": "Parse the html content to scrape the restaurant names and print it.", "assignee": "David" } diff --git a/metagpt/strategy/planner.py b/metagpt/strategy/planner.py index 95ad1f5cc..d195cc03b 100644 --- a/metagpt/strategy/planner.py +++ b/metagpt/strategy/planner.py @@ -47,7 +47,7 @@ PLAN_STATUS = """ {current_task_result} ## Task Guidance -Write code for the incomplete sections of 'Current Task'. And avoid duplicating code from 'Finished Tasks', such as repeated import of packages, reading data, etc. +Write code for the incomplete sections of 'Current Task'. And avoid duplicating code from 'Finished Tasks' and 'Finished Section of Current Task', such as repeated import of packages, reading data, etc. Specifically, {guidance} """ From 08975c3a0838cfda45fe69afe8bd804c9891e889 Mon Sep 17 00:00:00 2001 From: lidanyang Date: Wed, 17 Jul 2024 15:16:56 +0800 Subject: [PATCH 6/8] keep len to 20000 --- metagpt/actions/di/execute_nb_code.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/metagpt/actions/di/execute_nb_code.py b/metagpt/actions/di/execute_nb_code.py index b3bf4fcb8..dfeea7e67 100644 --- a/metagpt/actions/di/execute_nb_code.py +++ b/metagpt/actions/di/execute_nb_code.py @@ -158,7 +158,7 @@ class ExecuteNbCode(Action): else: cell["outputs"].append(new_output(output_type="stream", name="stdout", text=str(output))) - def parse_outputs(self, outputs: list[str], keep_len: int = 5000) -> Tuple[bool, str]: + def parse_outputs(self, outputs: list[str], keep_len: int = 20000) -> Tuple[bool, str]: """Parses the outputs received from notebook execution.""" assert isinstance(outputs, list) parsed_output, is_success = [], True @@ -191,10 +191,8 @@ class ExecuteNbCode(Action): output_text = remove_log_and_warning_lines(output_text) # The useful information of the exception is at the end, # the useful information of normal output is at the begining. - if '' not in output_text: - output_text = output_text[:keep_len] if is_success else output_text[-keep_len:] - else: - output_text = output_text[:20000] if is_success else output_text[-keep_len:] + # if '' not in output_text: + output_text = output_text[:keep_len] if is_success else output_text[-keep_len:] parsed_output.append(output_text) return is_success, ",".join(parsed_output) From 08c4af29b5cc913627fb71c96f5172e6ca98b070 Mon Sep 17 00:00:00 2001 From: lidanyang Date: Thu, 18 Jul 2024 17:06:42 +0800 Subject: [PATCH 7/8] recover code --- metagpt/actions/di/execute_nb_code.py | 6 +++--- metagpt/tools/libs/browser.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/metagpt/actions/di/execute_nb_code.py b/metagpt/actions/di/execute_nb_code.py index dfeea7e67..f3dfd1601 100644 --- a/metagpt/actions/di/execute_nb_code.py +++ b/metagpt/actions/di/execute_nb_code.py @@ -158,7 +158,7 @@ class ExecuteNbCode(Action): else: cell["outputs"].append(new_output(output_type="stream", name="stdout", text=str(output))) - def parse_outputs(self, outputs: list[str], keep_len: int = 20000) -> Tuple[bool, str]: + def parse_outputs(self, outputs: list[str], keep_len: int = 5000) -> Tuple[bool, str]: """Parses the outputs received from notebook execution.""" assert isinstance(outputs, list) parsed_output, is_success = [], True @@ -191,8 +191,8 @@ class ExecuteNbCode(Action): output_text = remove_log_and_warning_lines(output_text) # The useful information of the exception is at the end, # the useful information of normal output is at the begining. - # if '' not in output_text: - output_text = output_text[:keep_len] if is_success else output_text[-keep_len:] + if '' not in output_text: + output_text = output_text[:keep_len] if is_success else output_text[-keep_len:] parsed_output.append(output_text) return is_success, ",".join(parsed_output) diff --git a/metagpt/tools/libs/browser.py b/metagpt/tools/libs/browser.py index 4fd6127b3..a458109e6 100644 --- a/metagpt/tools/libs/browser.py +++ b/metagpt/tools/libs/browser.py @@ -197,9 +197,9 @@ class Browser: async def _on_frame_change(self, frame: Frame): await self._update_page_last_busy_time(frame.page) - async def view(self, keep_len: int = 10000): + async def view(self): observation = parse_accessibility_tree(self.accessibility_tree) - return f"Current Browser Viewer\n URL: {self.page.url}\nOBSERVATION:\n{observation[0][:keep_len]}\n" + return f"Current Browser Viewer\n URL: {self.page.url}\nOBSERVATION:\n{observation[0]}\n" async def __aenter__(self): await self.start() From 5c758facc509b400525dc50163433e8293301c14 Mon Sep 17 00:00:00 2001 From: lidanyang Date: Thu, 18 Jul 2024 17:22:18 +0800 Subject: [PATCH 8/8] refine prompt --- metagpt/prompts/di/role_zero.py | 4 ++-- metagpt/prompts/task_type.py | 2 +- metagpt/strategy/experience_retriever.py | 10 ++-------- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 5f5178aee..0037d9384 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -8,7 +8,7 @@ Note: 2. Carefully review your progress at the current task, if your actions so far has not fulfilled the task instruction, you should continue with current task. Otherwise, finish current task by Plan.finish_current_task explicitly. 3. Each time you finish a task, use RoleZero.reply_to_human to report your progress. 4. Don't forget to append task first when all existing tasks are finished and new tasks are required. -5. Don't repeat the same command if it has been executed successfully. +5. Avoid repeating tasks you have already completed. And end loop when all requirements are met. """ # To ensure compatibility with hard-coded experience, do not add any other content between "# Example" and "# Available Commands". CMD_PROMPT = """ @@ -44,7 +44,7 @@ You may use any of the available commands to create a plan or update the plan. Y If you finish current task, you will automatically take the next task in the existing plan, use Plan.finish_task, DON'T append a new task. # Your commands in a json array, in the following output format with correct command_name and args. If there is nothing to do, use the pass or end command: -Some text indicating your thoughts, such as how you should update the plan status, respond to inquiry, or seek for help. Then a json array of commands. You must output ONE and ONLY ONE json array. DON'T output multiple json arrays with thoughts between them. +Some text indicating your thoughts before JSON is required, such as what tasks have been completed, what tasks are next, how you should update the plan status, respond to inquiry, or seek for help. Then a json array of commands. You must output ONE and ONLY ONE json array. DON'T output multiple json arrays with thoughts between them. ```json [ {{ diff --git a/metagpt/prompts/task_type.py b/metagpt/prompts/task_type.py index 312421c21..2e4af4c1d 100644 --- a/metagpt/prompts/task_type.py +++ b/metagpt/prompts/task_type.py @@ -56,6 +56,6 @@ The current task is about converting image into webpage code. please note the fo # Prompt for taking on "web_scraping" tasks WEB_SCRAPING_PROMPT = """ -- Remember to view and print the necessary HTML content in a separate task to understand the structure first before scraping data. +- Remember to view and print the necessary HTML content in a separate task to understand the structure first before scraping data. Such as `html_content = await view_page_element_to_scrape(...)\nprint(html_content)`. - Since the data required by user may not correspond directly to the actual HTML element names, you should thoroughly analyze the HTML structure and meanings of all elements in the executing result first. Ensure the `class_` in your code should derived from the actual HTML structure directly, not based on your knowledge. To ensure it, analyse the most suitable location of the 'class_' in the actual HTML content before code. """ diff --git a/metagpt/strategy/experience_retriever.py b/metagpt/strategy/experience_retriever.py index f667f03a5..e9adb6f10 100644 --- a/metagpt/strategy/experience_retriever.py +++ b/metagpt/strategy/experience_retriever.py @@ -944,7 +944,6 @@ Explanation: The requirement is to scrape data from a website and extract inform ## action 2 Explanation: To search for restaurants, I will now go to the website https://www.yelp.com/ first. -Here is the command to navigate to the website: ```json [ @@ -959,7 +958,6 @@ Here is the command to navigate to the website: ## action 3 Explanation: Since the Browser has successfully navigated to the website, and I find that the element id of the search box is 53. I will finish the current task and then use the Browser tool to type the keyword `beef` in the search box and press enter. -Here is the command to finish the current task and type the keyword: ```json [ @@ -980,7 +978,6 @@ Here is the command to finish the current task and type the keyword: ## action 4 Explanation: Since the Browser has successfully search the keyword `beef`, I will finish the current task and then write code to view the html content of the page. -Here is the command to finish the current task and view the html content: ```json [ @@ -996,8 +993,7 @@ Here is the command to finish the current task and view the html content: ``` ## action 5 -Explanation: Since the DataAnalyst has successfully viewed the html content of the page, I will finish the current task and then write code to parse the html content and extract the restaurant names. -Here is the command to finish the current task and parse the html content: +Explanation: Since I has successfully viewed the html content in the context, I will first finish the current task and then write code to parse the html content and extract the restaurant names. ```json [ @@ -1045,7 +1041,6 @@ Explanation: The requirement is to scrape data from a website and extract inform ## action 2 Explanation: To scrap data from the website, I will first view the html content of the page. -Here is the command to view the html content: ```json [ @@ -1057,8 +1052,7 @@ Here is the command to view the html content: ``` ## action 3 -Explanation: Since the DataAnalyst has successfully viewed the html content of the page, I will finish the current task and then write code to parse the html content and extract the restaurant names. -Here is the command to finish the current task and parse the html content: +Explanation: Since I has successfully viewed the html content in the context, I will first finish the current task and then write code to parse the html content and extract the restaurant names. ```json [