diff --git a/metagpt/roles/di/data_analyst.py b/metagpt/roles/di/data_analyst.py index f3586a6f5..647196433 100644 --- a/metagpt/roles/di/data_analyst.py +++ b/metagpt/roles/di/data_analyst.py @@ -5,8 +5,11 @@ from pydantic import Field, model_validator from metagpt.actions.di.execute_nb_code import ExecuteNbCode from metagpt.actions.di.write_analysis_code import WriteAnalysisCode from metagpt.logs import logger +from metagpt.prompts.di.data_analyst import BROWSER_INSTRUCTION +from metagpt.prompts.di.role_zero import ROLE_INSTRUCTION from metagpt.roles.di.role_zero import RoleZero from metagpt.schema import TaskResult, Message +from metagpt.strategy.experience_retriever import ExpRetriever, WebExpRetriever from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender from metagpt.tools.tool_registry import register_tool @@ -16,10 +19,12 @@ class DataAnalyst(RoleZero): name: str = "David" profile: str = "DataAnalyst" goal: str = "Take on any data-related tasks, such as data analysis, machine learning, deep learning, web browsing, web scraping, web searching, web deployment, terminal operation, git and github operation, etc." + instruction: str = ROLE_INSTRUCTION + BROWSER_INSTRUCTION - tools: list[str] = ["Plan", "DataAnalyst", "RoleZero"] + tools: list[str] = ["Plan", "DataAnalyst", "RoleZero", "Browser"] custom_tools: list[str] = ["machine learning", "web scraping", "Terminal"] custom_tool_recommender: ToolRecommender = None + experience_retriever: ExpRetriever = WebExpRetriever() use_reflection: bool = True write_code: WriteAnalysisCode = Field(default_factory=WriteAnalysisCode, exclude=True) @@ -63,6 +68,7 @@ class DataAnalyst(RoleZero): tool_info=tool_info, working_memory=self.rc.working_memory.get() if use_reflection else None, use_reflection=use_reflection, + browser_memory=self.browser_memory ) self.rc.working_memory.add(Message(content=code, role="assistant", cause_by=WriteAnalysisCode)) diff --git a/metagpt/strategy/experience_retriever.py b/metagpt/strategy/experience_retriever.py index 5e85b056a..6356a0faf 100644 --- a/metagpt/strategy/experience_retriever.py +++ b/metagpt/strategy/experience_retriever.py @@ -448,3 +448,129 @@ Explanation: to review the code, call ReviewAndRewriteCode.run. ] ``` """ + + +WEB_SCRAPING_EXAMPLE = """ +## action 1 +User Requirement: Scrap and list the restaurant names of first page by searching for the keyword `beef` on the website https://www.yelp.com/. +Explanation: The requirement is to scrape data from a website and extract information about restaurants. The process involves searching for restaurants with a specific keyword, retrieving and presenting the data in a structured format. + +```json +[ + { + "command_name": "Plan.append_task", + "args": { + "task_id": "1", + "dependent_task_ids": [], + "instruction": "Navigate to the yelp website.", + "assignee": "Browser" + } + }, + { + "command_name": "Plan.append_task", + "args": { + "task_id": "2", + "dependent_task_ids": ["1"], + "instruction": "Search for restaurants with the keyword 'beef'.", + "assignee": "Browser" + } + }, + { + "command_name": "Plan.append_task", + "args": { + "task_id": "3", + "dependent_task_ids": ["2"], + "instruction": "View the html content of the search result page before scrap data to understand the structure.", + "assignee": "DataAnalyst" + } + }, + { + "command_name": "Plan.append_task", + "args": { + "task_id": "4", + "dependent_task_ids": ["3"], + "instruction": "Parse the html content to scrape the restaurant names and print it.", + "assignee": "DataAnalyst" + } + } +] +``` + +## action 2 +Explanation: To search for restaurants, I will now go to the website https://www.yelp.com/ first. +Here is the command to navigate to the website: + +```json +[ + { + "command_name": "Browser.goto", + "args": { + "url": "https://www.yelp.com/" + } + } +] +``` + +## action 3 +Explanation: Since the Browser has successfully navigated to the website, and I find that the element id of the search box is 53. I will finish the current task and then use the Browser tool to type the keyword `beef` in the search box and press enter. +Here is the command to finish the current task and type the keyword: + +```json +[ + { + "command_name": "Plan.finish_current_task", + "args": {} + }, + { + "command_name": "Browser.type", + "args": { + "element_id": 53, + "content": "beef", + "press_enter_after": true + } + } +] +``` + +## action 4 +Explanation: Since the Browser has successfully search the keyword `beef`, I will finish the current task and then write code to view the html content of the page. +Here is the command to finish the current task and view the html content: + +```json +[ + { + "command_name": "Plan.finish_current_task", + "args": {} + }, + { + "command_name": "DataAnalyst.write_and_exec_code", + "args": {} + } +] +``` + +## action 5 +Explanation: Since the DataAnalyst has successfully viewed the html content of the page, I will finish the current task and then write code to parse the html content and extract the restaurant names. +Here is the command to finish the current task and parse the html content: + +```json +[ + { + "command_name": "Plan.finish_current_task", + "args": {} + }, + { + "command_name": "DataAnalyst.write_and_exec_code", + "args": {} + } +] + +... +""" + + +class WebExpRetriever(ExpRetriever): + """A simple experience retriever that returns manually crafted examples.""" + + def retrieve(self, context: str = "") -> str: + return WEB_SCRAPING_EXAMPLE