Merge pull request #69 from shenchucheng/main

Add web page scraping feature implemented by Playwright/Selenium
This commit is contained in:
geekan 2023-07-25 11:28:34 +08:00 committed by GitHub
commit 007c8c0457
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 479 additions and 30 deletions

View file

@ -0,0 +1,25 @@
import pytest
from metagpt.config import Config
from metagpt.tools import web_browser_engine, WebBrowserEngineType
@pytest.mark.asyncio
@pytest.mark.parametrize(
"browser_type, url, urls",
[
(WebBrowserEngineType.PLAYWRIGHT, "https://fuzhi.ai", ("https://fuzhi.ai",)),
(WebBrowserEngineType.SELENIUM, "https://fuzhi.ai", ("https://fuzhi.ai",)),
],
ids=["playwright", "selenium"],
)
async def test_scrape_web_page(browser_type, url, urls):
browser = web_browser_engine.WebBrowserEngine(browser_type)
result = await browser.run(url)
assert isinstance(result, str)
assert "深度赋智" in result
if urls:
results = await browser.run(url, *urls)
assert isinstance(results, list)
assert len(results) == len(urls) + 1
assert all(("深度赋智" in i) for i in results)

View file

@ -0,0 +1,34 @@
import pytest
from metagpt.config import CONFIG
from metagpt.tools import web_browser_engine_playwright
@pytest.mark.asyncio
@pytest.mark.parametrize(
"browser_type, use_proxy, kwagrs, url, urls",
[
("chromium", {"proxy": True}, {}, "https://fuzhi.ai", ("https://fuzhi.ai",)),
("firefox", {}, {"ignore_https_errors": True}, "https://fuzhi.ai", ("https://fuzhi.ai",)),
("webkit", {}, {"ignore_https_errors": True}, "https://fuzhi.ai", ("https://fuzhi.ai",)),
],
ids=["chromium-normal", "firefox-normal", "webkit-normal"],
)
async def test_scrape_web_page(browser_type, use_proxy, kwagrs, url, urls, proxy, capfd):
try:
global_proxy = CONFIG.global_proxy
if use_proxy:
CONFIG.global_proxy = proxy
browser = web_browser_engine_playwright.PlaywrightWrapper(browser_type, **kwagrs)
result = await browser.run(url)
assert isinstance(result, str)
assert "Deepwisdom" in result
if urls:
results = await browser.run(url, *urls)
assert isinstance(results, list)
assert len(results) == len(urls) + 1
assert all(("Deepwisdom" in i) for i in results)
if use_proxy:
assert "Proxy:" in capfd.readouterr().out
finally:
CONFIG.global_proxy = global_proxy

View file

@ -0,0 +1,34 @@
import pytest
from metagpt.config import CONFIG
from metagpt.tools import web_browser_engine_selenium
@pytest.mark.asyncio
@pytest.mark.parametrize(
"browser_type, use_proxy, url, urls",
[
("chrome", True, "https://fuzhi.ai", ("https://fuzhi.ai",)),
("firefox", False, "https://fuzhi.ai", ("https://fuzhi.ai",)),
("edge", False, "https://fuzhi.ai", ("https://fuzhi.ai",)),
],
ids=["chrome-normal", "firefox-normal", "edge-normal"],
)
async def test_scrape_web_page(browser_type, use_proxy, url, urls, proxy, capfd):
try:
global_proxy = CONFIG.global_proxy
if use_proxy:
CONFIG.global_proxy = proxy
browser = web_browser_engine_selenium.SeleniumWrapper(browser_type)
result = await browser.run(url)
assert isinstance(result, str)
assert "Deepwisdom" in result
if urls:
results = await browser.run(url, *urls)
assert isinstance(results, list)
assert len(results) == len(urls) + 1
assert all(("Deepwisdom" in i) for i in results)
if use_proxy:
assert "Proxy:" in capfd.readouterr().out
finally:
CONFIG.global_proxy = global_proxy