diff --git a/metagpt/tools/__init__.py b/metagpt/tools/__init__.py index f18d1d276..bb87f1b62 100644 --- a/metagpt/tools/__init__.py +++ b/metagpt/tools/__init__.py @@ -11,7 +11,7 @@ from metagpt.tools import tool_types # this registers all tool types from metagpt.tools import libs # this registers all tools from metagpt.tools.tool_registry import TOOL_REGISTRY -_, _, _ = tool_types, libs, TOOL_REGISTRY # Avoid pre-commit error +_ = tool_types, libs, TOOL_REGISTRY # Avoid pre-commit error class SearchEngineType(Enum): diff --git a/metagpt/tools/functions/libs/scrape_web/__init__.py b/metagpt/tools/functions/libs/scrape_web/__init__.py deleted file mode 100644 index d5cd1524b..000000000 --- a/metagpt/tools/functions/libs/scrape_web/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from metagpt.tools.functions.libs.scrape_web.scrape_web import scrape_web diff --git a/metagpt/tools/libs/__init__.py b/metagpt/tools/libs/__init__.py index b576997c9..442f57149 100644 --- a/metagpt/tools/libs/__init__.py +++ b/metagpt/tools/libs/__init__.py @@ -9,6 +9,7 @@ from metagpt.tools.libs import ( feature_engineering, sd_engine, gpt_v_generator, + web_scrapping, ) -_, _, _, _ = data_preprocess, feature_engineering, sd_engine, gpt_v_generator # Avoid pre-commit error +_ = data_preprocess, feature_engineering, sd_engine, gpt_v_generator, web_scrapping # Avoid pre-commit error diff --git a/metagpt/tools/functions/libs/scrape_web/scrape_web.py b/metagpt/tools/libs/web_scrapping.py similarity index 76% rename from metagpt/tools/functions/libs/scrape_web/scrape_web.py rename to metagpt/tools/libs/web_scrapping.py index e68ce0e64..e8e73f123 100644 --- a/metagpt/tools/functions/libs/scrape_web/scrape_web.py +++ b/metagpt/tools/libs/web_scrapping.py @@ -1,9 +1,10 @@ -import asyncio - +from metagpt.tools.tool_data_type import ToolTypeEnum +from metagpt.tools.tool_registry import register_tool from metagpt.tools.web_browser_engine_playwright import PlaywrightWrapper -async def scrape_web(url, *urls): +@register_tool(tool_type=ToolTypeEnum.WEBSCRAPING.value) +async def scrape_web_playwright(url, *urls): """ Scrape and save the HTML structure and inner text content of a web page using Playwright. @@ -19,5 +20,3 @@ async def scrape_web(url, *urls): # Return the inner text content of the web page return {"inner_text": web.inner_text, "html": web.html} - -# 需要改三个地方: yaml, 对应路径下init, MetaGPT/metagpt/prompts/ml_engineer.py中ML_MODULE_MAP diff --git a/metagpt/tools/functions/schemas/scrape_web.yml b/metagpt/tools/schemas/web_scrapping/scrape_web_playwright.yml similarity index 96% rename from metagpt/tools/functions/schemas/scrape_web.yml rename to metagpt/tools/schemas/web_scrapping/scrape_web_playwright.yml index ecca3fbed..a6ff7d6c7 100644 --- a/metagpt/tools/functions/schemas/scrape_web.yml +++ b/metagpt/tools/schemas/web_scrapping/scrape_web_playwright.yml @@ -1,4 +1,4 @@ -scrape_web: +scrape_web_playwright: type: async funciton description: "Scrape and save the HTML structure and inner text content of a web page using Playwright." parameters: diff --git a/metagpt/tools/tool_data_type.py b/metagpt/tools/tool_data_type.py index 45fb539a6..0c4eea4cc 100644 --- a/metagpt/tools/tool_data_type.py +++ b/metagpt/tools/tool_data_type.py @@ -11,6 +11,7 @@ class ToolTypeEnum(Enum): MODEL_EVALUATE = "model_evaluate" STABLE_DIFFUSION = "stable_diffusion" IMAGE2WEBPAGE = "image2webpage" + WEBSCRAPING = "web_scraping" OTHER = "other" def __missing__(self, key): diff --git a/metagpt/tools/tool_types.py b/metagpt/tools/tool_types.py index b5b233d53..35c0772b1 100644 --- a/metagpt/tools/tool_types.py +++ b/metagpt/tools/tool_types.py @@ -12,7 +12,7 @@ from metagpt.tools.tool_registry import register_tool_type @register_tool_type class EDA(ToolType): name: str = ToolTypeEnum.EDA.value - desc: str = "Useful for performing exploratory data analysis" + desc: str = "For performing exploratory data analysis" @register_tool_type @@ -56,6 +56,12 @@ class Image2Webpage(ToolType): usage_prompt: str = IMAGE2WEBPAGE_PROMPT +@register_tool_type +class WebScraping(ToolType): + name: str = ToolTypeEnum.WEBSCRAPING.value + desc: str = "For scraping data from web pages." + + @register_tool_type class Other(ToolType): name: str = ToolTypeEnum.OTHER.value diff --git a/metagpt/tools/web_browser_engine_playwright.py b/metagpt/tools/web_browser_engine_playwright.py index a45f6a12e..15c8a78d7 100644 --- a/metagpt/tools/web_browser_engine_playwright.py +++ b/metagpt/tools/web_browser_engine_playwright.py @@ -12,7 +12,6 @@ from typing import Literal from playwright.async_api import async_playwright -from metagpt.config import CONFIG from metagpt.logs import logger from metagpt.utils.parse_html import WebPage @@ -32,6 +31,8 @@ class PlaywrightWrapper: launch_kwargs: dict | None = None, **kwargs, ) -> None: + from metagpt.config import CONFIG + if browser_type is None: browser_type = CONFIG.playwright_browser_type self.browser_type = browser_type