diff --git a/examples/crawl_webpage.py b/examples/crawl_webpage.py index 7dcbf7993..2db9e407b 100644 --- a/examples/crawl_webpage.py +++ b/examples/crawl_webpage.py @@ -10,7 +10,7 @@ from metagpt.roles.ci.code_interpreter import CodeInterpreter async def main(): prompt = """Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/, - and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key data*""" + and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*""" ci = CodeInterpreter(goal=prompt, use_tools=True) await ci.run(prompt) diff --git a/metagpt/tools/libs/web_scraping.py b/metagpt/tools/libs/web_scraping.py index 6fd3b9435..d01e69d09 100644 --- a/metagpt/tools/libs/web_scraping.py +++ b/metagpt/tools/libs/web_scraping.py @@ -4,19 +4,18 @@ from metagpt.tools.web_browser_engine_playwright import PlaywrightWrapper @register_tool(tool_type=ToolType.WEBSCRAPING.type_name) -async def scrape_web_playwright(url, *urls): +async def scrape_web_playwright(url): """ - Scrape and save the HTML structure and inner text content of a web page using Playwright. + Asynchronously Scrape and save the HTML structure and inner text content of a web page using Playwright. Args: url (str): The main URL to fetch inner text from. - *urls (str): Additional URLs to fetch inner text from. Returns: - (dict): The inner text content and html structure of the web page, key are : 'inner_text', 'html'. + dict: The inner text content and html structure of the web page, keys are 'inner_text', 'html'. """ # Create a PlaywrightWrapper instance for the Chromium browser - web = await PlaywrightWrapper().run(url, *urls) + web = await PlaywrightWrapper().run(url) # Return the inner text content of the web page return {"inner_text": web.inner_text.strip(), "html": web.html.strip()}