From 00f7f93234d0c19286aca3d16233367be2d5fd2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 15 Jan 2024 18:09:56 +0800 Subject: [PATCH] add scrape_web. --- metagpt/tools/__init__.py | 6 ++++++ .../tools/functions/schemas/scrape_web.yml | 21 +++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 metagpt/tools/functions/schemas/scrape_web.yml diff --git a/metagpt/tools/__init__.py b/metagpt/tools/__init__.py index 41c8708b2..c24dc6fce 100644 --- a/metagpt/tools/__init__.py +++ b/metagpt/tools/__init__.py @@ -76,6 +76,12 @@ TOOL_TYPE_MAPPINGS = { desc="Related to text2image, image2image using stable diffusion model.", usage_prompt="", ), + "scrape_web": ToolType( + name="scrape_web", + module="metagpt.tools.scrape_web", + desc="Scrape data from web page.", + usage_prompt="", + ), "other": ToolType( name="other", module="", diff --git a/metagpt/tools/functions/schemas/scrape_web.yml b/metagpt/tools/functions/schemas/scrape_web.yml new file mode 100644 index 000000000..ecca3fbed --- /dev/null +++ b/metagpt/tools/functions/schemas/scrape_web.yml @@ -0,0 +1,21 @@ +scrape_web: + type: async funciton + description: "Scrape and save the HTML structure and inner text content of a web page using Playwright." + parameters: + properties: + url: + type: str + description: "web url" + \*url: + type: Non-Keyword Arguments + description: "other web urls, you can assagin sub url link to it." + required: + - url + returns: + inner_text: + type: str + description: The inner text content of the web page. + html: + type: str + description: The html structure of the web page. +