mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-04-25 00:36:55 +02:00
update webscraping tool
This commit is contained in:
parent
23c27627ce
commit
9b72370cbe
2 changed files with 5 additions and 6 deletions
|
|
@ -10,7 +10,7 @@ from metagpt.roles.ci.code_interpreter import CodeInterpreter
|
|||
|
||||
async def main():
|
||||
prompt = """Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
|
||||
and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key data*"""
|
||||
and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*"""
|
||||
ci = CodeInterpreter(goal=prompt, use_tools=True)
|
||||
|
||||
await ci.run(prompt)
|
||||
|
|
|
|||
|
|
@ -4,19 +4,18 @@ from metagpt.tools.web_browser_engine_playwright import PlaywrightWrapper
|
|||
|
||||
|
||||
@register_tool(tool_type=ToolType.WEBSCRAPING.type_name)
|
||||
async def scrape_web_playwright(url, *urls):
|
||||
async def scrape_web_playwright(url):
|
||||
"""
|
||||
Scrape and save the HTML structure and inner text content of a web page using Playwright.
|
||||
Asynchronously Scrape and save the HTML structure and inner text content of a web page using Playwright.
|
||||
|
||||
Args:
|
||||
url (str): The main URL to fetch inner text from.
|
||||
*urls (str): Additional URLs to fetch inner text from.
|
||||
|
||||
Returns:
|
||||
(dict): The inner text content and html structure of the web page, key are : 'inner_text', 'html'.
|
||||
dict: The inner text content and html structure of the web page, keys are 'inner_text', 'html'.
|
||||
"""
|
||||
# Create a PlaywrightWrapper instance for the Chromium browser
|
||||
web = await PlaywrightWrapper().run(url, *urls)
|
||||
web = await PlaywrightWrapper().run(url)
|
||||
|
||||
# Return the inner text content of the web page
|
||||
return {"inner_text": web.inner_text.strip(), "html": web.html.strip()}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue