add crawler tools

This commit is contained in:
shenchucheng 2024-05-30 20:04:02 +08:00
parent d1d44e9cea
commit 4f43b905a2
5 changed files with 83 additions and 11 deletions

View file

@ -6,16 +6,19 @@
"""
from metagpt.roles.di.data_interpreter import DataInterpreter
from metagpt.tools.libs.browser import Browser as _
PAPER_LIST_REQ = """"
Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*
and save it to a csv file. paper title must include `multiagent` or `large language model`.
**Notice: view the page element before writing scraping code**
"""
ECOMMERCE_REQ = """
Get products data from website https://scrapeme.live/shop/ and save it as a csv file.
**Notice: Firstly parse the web page encoding and the text HTML structure;
The first page product name, price, product URL, and image URL must be saved in the csv;**
The first page product name, price, product URL, and image URL must be saved in the csv.
**Notice: view the page element before writing scraping code**
"""
NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash 所有初创企业融资的信息, **注意: 这是一个中文网站**;
@ -25,11 +28,12 @@ NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash
3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*`快讯`*的标题链接时间;
4. 筛选最近3天的初创企业融资*`快讯`*, 以list[dict]形式打印前5个
5. 将全部结果存在本地csv中
**Notice: view the page element before writing scraping code**
"""
async def main():
di = DataInterpreter(tools=["scrape_web_playwright"])
di = DataInterpreter(tools=["Browser"])
await di.run(ECOMMERCE_REQ)