mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-11 15:15:18 +02:00
add experience examples for scraping task
This commit is contained in:
parent
0b7d7bdf55
commit
32fc276245
2 changed files with 133 additions and 1 deletions
|
|
@ -5,8 +5,11 @@ from pydantic import Field, model_validator
|
|||
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
|
||||
from metagpt.actions.di.write_analysis_code import WriteAnalysisCode
|
||||
from metagpt.logs import logger
|
||||
from metagpt.prompts.di.data_analyst import BROWSER_INSTRUCTION
|
||||
from metagpt.prompts.di.role_zero import ROLE_INSTRUCTION
|
||||
from metagpt.roles.di.role_zero import RoleZero
|
||||
from metagpt.schema import TaskResult, Message
|
||||
from metagpt.strategy.experience_retriever import ExpRetriever, WebExpRetriever
|
||||
from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
|
||||
|
|
@ -16,10 +19,12 @@ class DataAnalyst(RoleZero):
|
|||
name: str = "David"
|
||||
profile: str = "DataAnalyst"
|
||||
goal: str = "Take on any data-related tasks, such as data analysis, machine learning, deep learning, web browsing, web scraping, web searching, web deployment, terminal operation, git and github operation, etc."
|
||||
instruction: str = ROLE_INSTRUCTION + BROWSER_INSTRUCTION
|
||||
|
||||
tools: list[str] = ["Plan", "DataAnalyst", "RoleZero"]
|
||||
tools: list[str] = ["Plan", "DataAnalyst", "RoleZero", "Browser"]
|
||||
custom_tools: list[str] = ["machine learning", "web scraping", "Terminal"]
|
||||
custom_tool_recommender: ToolRecommender = None
|
||||
experience_retriever: ExpRetriever = WebExpRetriever()
|
||||
|
||||
use_reflection: bool = True
|
||||
write_code: WriteAnalysisCode = Field(default_factory=WriteAnalysisCode, exclude=True)
|
||||
|
|
@ -63,6 +68,7 @@ class DataAnalyst(RoleZero):
|
|||
tool_info=tool_info,
|
||||
working_memory=self.rc.working_memory.get() if use_reflection else None,
|
||||
use_reflection=use_reflection,
|
||||
browser_memory=self.browser_memory
|
||||
)
|
||||
self.rc.working_memory.add(Message(content=code, role="assistant", cause_by=WriteAnalysisCode))
|
||||
|
||||
|
|
|
|||
|
|
@ -448,3 +448,129 @@ Explanation: to review the code, call ReviewAndRewriteCode.run.
|
|||
]
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
WEB_SCRAPING_EXAMPLE = """
|
||||
## action 1
|
||||
User Requirement: Scrap and list the restaurant names of first page by searching for the keyword `beef` on the website https://www.yelp.com/.
|
||||
Explanation: The requirement is to scrape data from a website and extract information about restaurants. The process involves searching for restaurants with a specific keyword, retrieving and presenting the data in a structured format.
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "Plan.append_task",
|
||||
"args": {
|
||||
"task_id": "1",
|
||||
"dependent_task_ids": [],
|
||||
"instruction": "Navigate to the yelp website.",
|
||||
"assignee": "Browser"
|
||||
}
|
||||
},
|
||||
{
|
||||
"command_name": "Plan.append_task",
|
||||
"args": {
|
||||
"task_id": "2",
|
||||
"dependent_task_ids": ["1"],
|
||||
"instruction": "Search for restaurants with the keyword 'beef'.",
|
||||
"assignee": "Browser"
|
||||
}
|
||||
},
|
||||
{
|
||||
"command_name": "Plan.append_task",
|
||||
"args": {
|
||||
"task_id": "3",
|
||||
"dependent_task_ids": ["2"],
|
||||
"instruction": "View the html content of the search result page before scrap data to understand the structure.",
|
||||
"assignee": "DataAnalyst"
|
||||
}
|
||||
},
|
||||
{
|
||||
"command_name": "Plan.append_task",
|
||||
"args": {
|
||||
"task_id": "4",
|
||||
"dependent_task_ids": ["3"],
|
||||
"instruction": "Parse the html content to scrape the restaurant names and print it.",
|
||||
"assignee": "DataAnalyst"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## action 2
|
||||
Explanation: To search for restaurants, I will now go to the website https://www.yelp.com/ first.
|
||||
Here is the command to navigate to the website:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "Browser.goto",
|
||||
"args": {
|
||||
"url": "https://www.yelp.com/"
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## action 3
|
||||
Explanation: Since the Browser has successfully navigated to the website, and I find that the element id of the search box is 53. I will finish the current task and then use the Browser tool to type the keyword `beef` in the search box and press enter.
|
||||
Here is the command to finish the current task and type the keyword:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "Plan.finish_current_task",
|
||||
"args": {}
|
||||
},
|
||||
{
|
||||
"command_name": "Browser.type",
|
||||
"args": {
|
||||
"element_id": 53,
|
||||
"content": "beef",
|
||||
"press_enter_after": true
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## action 4
|
||||
Explanation: Since the Browser has successfully search the keyword `beef`, I will finish the current task and then write code to view the html content of the page.
|
||||
Here is the command to finish the current task and view the html content:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "Plan.finish_current_task",
|
||||
"args": {}
|
||||
},
|
||||
{
|
||||
"command_name": "DataAnalyst.write_and_exec_code",
|
||||
"args": {}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## action 5
|
||||
Explanation: Since the DataAnalyst has successfully viewed the html content of the page, I will finish the current task and then write code to parse the html content and extract the restaurant names.
|
||||
Here is the command to finish the current task and parse the html content:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"command_name": "Plan.finish_current_task",
|
||||
"args": {}
|
||||
},
|
||||
{
|
||||
"command_name": "DataAnalyst.write_and_exec_code",
|
||||
"args": {}
|
||||
}
|
||||
]
|
||||
|
||||
...
|
||||
"""
|
||||
|
||||
|
||||
class WebExpRetriever(ExpRetriever):
|
||||
"""A simple experience retriever that returns manually crafted examples."""
|
||||
|
||||
def retrieve(self, context: str = "") -> str:
|
||||
return WEB_SCRAPING_EXAMPLE
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue