Merge branch 'data_analyst_ldy' into 'mgx_ops'

Data analyst ldy

See merge request pub/MetaGPT!189
This commit is contained in:
林义章 2024-07-15 06:55:52 +00:00
commit e2ce006d15
17 changed files with 376 additions and 198 deletions

View file

@ -629,6 +629,8 @@ class KeywordExpRetriever(ExpRetriever):
return DEPLOY_EXAMPLE
elif "issue" in context.lower():
return FIX_ISSUE_EXAMPLE
elif "https:" or "http:" in context.lower():
return WEB_SCRAPING_EXAMPLE
elif exp_type == "task":
if "diagnose" in context.lower():
return SEARCH_SYMBOL_EXAMPLE
@ -890,3 +892,122 @@ Explanation: to review the code, call ReviewAndRewriteCode.run.
]
```
"""
WEB_SCRAPING_EXAMPLE = """
## action 1
User Requirement: Scrap and list the restaurant names of first page by searching for the keyword `beef` on the website https://www.yelp.com/.
Explanation: The requirement is to scrape data from a website and extract information about restaurants. The process involves searching for restaurants with a specific keyword, retrieving and presenting the data in a structured format.
```json
[
{
"command_name": "Plan.append_task",
"args": {
"task_id": "1",
"dependent_task_ids": [],
"instruction": "Navigate to the yelp website.",
"assignee": "Browser"
}
},
{
"command_name": "Plan.append_task",
"args": {
"task_id": "2",
"dependent_task_ids": ["1"],
"instruction": "Search for restaurants with the keyword 'beef'.",
"assignee": "Browser"
}
},
{
"command_name": "Plan.append_task",
"args": {
"task_id": "3",
"dependent_task_ids": ["2"],
"instruction": "View the html content of the search result page before scrap data to understand the structure.",
"assignee": "DataAnalyst"
}
},
{
"command_name": "Plan.append_task",
"args": {
"task_id": "4",
"dependent_task_ids": ["3"],
"instruction": "Parse the html content to scrape the restaurant names and print it.",
"assignee": "DataAnalyst"
}
}
]
```
## action 2
Explanation: To search for restaurants, I will now go to the website https://www.yelp.com/ first.
Here is the command to navigate to the website:
```json
[
{
"command_name": "Browser.goto",
"args": {
"url": "https://www.yelp.com/"
}
}
]
```
## action 3
Explanation: Since the Browser has successfully navigated to the website, and I find that the element id of the search box is 53. I will finish the current task and then use the Browser tool to type the keyword `beef` in the search box and press enter.
Here is the command to finish the current task and type the keyword:
```json
[
{
"command_name": "Plan.finish_current_task",
"args": {}
},
{
"command_name": "Browser.type",
"args": {
"element_id": 53,
"content": "beef",
"press_enter_after": true
}
}
]
```
## action 4
Explanation: Since the Browser has successfully search the keyword `beef`, I will finish the current task and then write code to view the html content of the page.
Here is the command to finish the current task and view the html content:
```json
[
{
"command_name": "Plan.finish_current_task",
"args": {}
},
{
"command_name": "DataAnalyst.write_and_exec_code",
"args": {}
}
]
```
## action 5
Explanation: Since the DataAnalyst has successfully viewed the html content of the page, I will finish the current task and then write code to parse the html content and extract the restaurant names.
Here is the command to finish the current task and parse the html content:
```json
[
{
"command_name": "Plan.finish_current_task",
"args": {}
},
{
"command_name": "DataAnalyst.write_and_exec_code",
"args": {}
}
]
...
"""

View file

@ -40,8 +40,14 @@ PLAN_STATUS = """
## Current Task
{current_task}
## Finished Section of Current Task
### code
{current_task_code}
### execution result
{current_task_result}
## Task Guidance
Write complete code for 'Current Task'. And avoid duplicating code from 'Finished Tasks', such as repeated import of packages, reading data, etc.
Write code for the incomplete sections of 'Current Task'. And avoid duplicating code from 'Finished Tasks', such as repeated import of packages, reading data, etc.
Specifically, {guidance}
"""
@ -173,6 +179,8 @@ class Planner(BaseModel):
code_written=code_written,
task_results=task_results,
current_task=self.current_task.instruction,
current_task_code=self.current_task.code if self.current_task.code else "",
current_task_result=self.current_task.result if self.current_task.result else "",
guidance=guidance,
)

View file

@ -8,7 +8,7 @@ from metagpt.prompts.task_type import (
FEATURE_ENGINEERING_PROMPT,
IMAGE2WEBPAGE_PROMPT,
MODEL_EVALUATE_PROMPT,
MODEL_TRAIN_PROMPT,
MODEL_TRAIN_PROMPT, WEB_SCRAPING_PROMPT,
)
@ -62,6 +62,7 @@ class TaskType(Enum):
WEBSCRAPING = TaskTypeDef(
name="web scraping",
desc="For scraping data from web pages.",
guidance=WEB_SCRAPING_PROMPT,
)
EMAIL_LOGIN = TaskTypeDef(
name="email login",