mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-14 15:25:17 +02:00
add crawler tools
This commit is contained in:
parent
d1d44e9cea
commit
4f43b905a2
5 changed files with 83 additions and 11 deletions
|
|
@ -6,16 +6,19 @@
|
|||
"""
|
||||
|
||||
from metagpt.roles.di.data_interpreter import DataInterpreter
|
||||
from metagpt.tools.libs.browser import Browser as _
|
||||
|
||||
|
||||
PAPER_LIST_REQ = """"
|
||||
Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
|
||||
and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*
|
||||
and save it to a csv file. paper title must include `multiagent` or `large language model`.
|
||||
**Notice: view the page element before writing scraping code**
|
||||
"""
|
||||
|
||||
ECOMMERCE_REQ = """
|
||||
Get products data from website https://scrapeme.live/shop/ and save it as a csv file.
|
||||
**Notice: Firstly parse the web page encoding and the text HTML structure;
|
||||
The first page product name, price, product URL, and image URL must be saved in the csv;**
|
||||
The first page product name, price, product URL, and image URL must be saved in the csv.
|
||||
**Notice: view the page element before writing scraping code**
|
||||
"""
|
||||
|
||||
NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash 所有初创企业融资的信息, **注意: 这是一个中文网站**;
|
||||
|
|
@ -25,11 +28,12 @@ NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash
|
|||
3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*`快讯`*的标题、链接、时间;
|
||||
4. 筛选最近3天的初创企业融资*`快讯`*, 以list[dict]形式打印前5个。
|
||||
5. 将全部结果存在本地csv中
|
||||
**Notice: view the page element before writing scraping code**
|
||||
"""
|
||||
|
||||
|
||||
async def main():
|
||||
di = DataInterpreter(tools=["scrape_web_playwright"])
|
||||
di = DataInterpreter(tools=["Browser"])
|
||||
|
||||
await di.run(ECOMMERCE_REQ)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue