add experience examples for scraping task

This commit is contained in:
lidanyang 2024-07-09 17:01:07 +08:00
parent 0b7d7bdf55
commit 32fc276245
2 changed files with 133 additions and 1 deletions

View file

@ -5,8 +5,11 @@ from pydantic import Field, model_validator
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
from metagpt.actions.di.write_analysis_code import WriteAnalysisCode
from metagpt.logs import logger
from metagpt.prompts.di.data_analyst import BROWSER_INSTRUCTION
from metagpt.prompts.di.role_zero import ROLE_INSTRUCTION
from metagpt.roles.di.role_zero import RoleZero
from metagpt.schema import TaskResult, Message
from metagpt.strategy.experience_retriever import ExpRetriever, WebExpRetriever
from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender
from metagpt.tools.tool_registry import register_tool
@ -16,10 +19,12 @@ class DataAnalyst(RoleZero):
name: str = "David"
profile: str = "DataAnalyst"
goal: str = "Take on any data-related tasks, such as data analysis, machine learning, deep learning, web browsing, web scraping, web searching, web deployment, terminal operation, git and github operation, etc."
instruction: str = ROLE_INSTRUCTION + BROWSER_INSTRUCTION
tools: list[str] = ["Plan", "DataAnalyst", "RoleZero"]
tools: list[str] = ["Plan", "DataAnalyst", "RoleZero", "Browser"]
custom_tools: list[str] = ["machine learning", "web scraping", "Terminal"]
custom_tool_recommender: ToolRecommender = None
experience_retriever: ExpRetriever = WebExpRetriever()
use_reflection: bool = True
write_code: WriteAnalysisCode = Field(default_factory=WriteAnalysisCode, exclude=True)
@ -63,6 +68,7 @@ class DataAnalyst(RoleZero):
tool_info=tool_info,
working_memory=self.rc.working_memory.get() if use_reflection else None,
use_reflection=use_reflection,
browser_memory=self.browser_memory
)
self.rc.working_memory.add(Message(content=code, role="assistant", cause_by=WriteAnalysisCode))

View file

@ -448,3 +448,129 @@ Explanation: to review the code, call ReviewAndRewriteCode.run.
]
```
"""
WEB_SCRAPING_EXAMPLE = """
## action 1
User Requirement: Scrap and list the restaurant names of first page by searching for the keyword `beef` on the website https://www.yelp.com/.
Explanation: The requirement is to scrape data from a website and extract information about restaurants. The process involves searching for restaurants with a specific keyword, retrieving and presenting the data in a structured format.
```json
[
{
"command_name": "Plan.append_task",
"args": {
"task_id": "1",
"dependent_task_ids": [],
"instruction": "Navigate to the yelp website.",
"assignee": "Browser"
}
},
{
"command_name": "Plan.append_task",
"args": {
"task_id": "2",
"dependent_task_ids": ["1"],
"instruction": "Search for restaurants with the keyword 'beef'.",
"assignee": "Browser"
}
},
{
"command_name": "Plan.append_task",
"args": {
"task_id": "3",
"dependent_task_ids": ["2"],
"instruction": "View the html content of the search result page before scrap data to understand the structure.",
"assignee": "DataAnalyst"
}
},
{
"command_name": "Plan.append_task",
"args": {
"task_id": "4",
"dependent_task_ids": ["3"],
"instruction": "Parse the html content to scrape the restaurant names and print it.",
"assignee": "DataAnalyst"
}
}
]
```
## action 2
Explanation: To search for restaurants, I will now go to the website https://www.yelp.com/ first.
Here is the command to navigate to the website:
```json
[
{
"command_name": "Browser.goto",
"args": {
"url": "https://www.yelp.com/"
}
}
]
```
## action 3
Explanation: Since the Browser has successfully navigated to the website, and I find that the element id of the search box is 53. I will finish the current task and then use the Browser tool to type the keyword `beef` in the search box and press enter.
Here is the command to finish the current task and type the keyword:
```json
[
{
"command_name": "Plan.finish_current_task",
"args": {}
},
{
"command_name": "Browser.type",
"args": {
"element_id": 53,
"content": "beef",
"press_enter_after": true
}
}
]
```
## action 4
Explanation: Since the Browser has successfully search the keyword `beef`, I will finish the current task and then write code to view the html content of the page.
Here is the command to finish the current task and view the html content:
```json
[
{
"command_name": "Plan.finish_current_task",
"args": {}
},
{
"command_name": "DataAnalyst.write_and_exec_code",
"args": {}
}
]
```
## action 5
Explanation: Since the DataAnalyst has successfully viewed the html content of the page, I will finish the current task and then write code to parse the html content and extract the restaurant names.
Here is the command to finish the current task and parse the html content:
```json
[
{
"command_name": "Plan.finish_current_task",
"args": {}
},
{
"command_name": "DataAnalyst.write_and_exec_code",
"args": {}
}
]
...
"""
class WebExpRetriever(ExpRetriever):
"""A simple experience retriever that returns manually crafted examples."""
def retrieve(self, context: str = "") -> str:
return WEB_SCRAPING_EXAMPLE