mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-04-26 09:16:21 +02:00
Merge pull request #69 from shenchucheng/main
Add web page scraping feature implemented by Playwright/Selenium
This commit is contained in:
commit
007c8c0457
13 changed files with 479 additions and 30 deletions
25
tests/metagpt/tools/test_web_browser_engine.py
Normal file
25
tests/metagpt/tools/test_web_browser_engine.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import pytest
|
||||
from metagpt.config import Config
|
||||
from metagpt.tools import web_browser_engine, WebBrowserEngineType
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"browser_type, url, urls",
|
||||
[
|
||||
(WebBrowserEngineType.PLAYWRIGHT, "https://fuzhi.ai", ("https://fuzhi.ai",)),
|
||||
(WebBrowserEngineType.SELENIUM, "https://fuzhi.ai", ("https://fuzhi.ai",)),
|
||||
],
|
||||
ids=["playwright", "selenium"],
|
||||
)
|
||||
async def test_scrape_web_page(browser_type, url, urls):
|
||||
browser = web_browser_engine.WebBrowserEngine(browser_type)
|
||||
result = await browser.run(url)
|
||||
assert isinstance(result, str)
|
||||
assert "深度赋智" in result
|
||||
|
||||
if urls:
|
||||
results = await browser.run(url, *urls)
|
||||
assert isinstance(results, list)
|
||||
assert len(results) == len(urls) + 1
|
||||
assert all(("深度赋智" in i) for i in results)
|
||||
34
tests/metagpt/tools/test_web_browser_engine_playwright.py
Normal file
34
tests/metagpt/tools/test_web_browser_engine_playwright.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import pytest
|
||||
from metagpt.config import CONFIG
|
||||
from metagpt.tools import web_browser_engine_playwright
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"browser_type, use_proxy, kwagrs, url, urls",
|
||||
[
|
||||
("chromium", {"proxy": True}, {}, "https://fuzhi.ai", ("https://fuzhi.ai",)),
|
||||
("firefox", {}, {"ignore_https_errors": True}, "https://fuzhi.ai", ("https://fuzhi.ai",)),
|
||||
("webkit", {}, {"ignore_https_errors": True}, "https://fuzhi.ai", ("https://fuzhi.ai",)),
|
||||
],
|
||||
ids=["chromium-normal", "firefox-normal", "webkit-normal"],
|
||||
)
|
||||
async def test_scrape_web_page(browser_type, use_proxy, kwagrs, url, urls, proxy, capfd):
|
||||
try:
|
||||
global_proxy = CONFIG.global_proxy
|
||||
if use_proxy:
|
||||
CONFIG.global_proxy = proxy
|
||||
browser = web_browser_engine_playwright.PlaywrightWrapper(browser_type, **kwagrs)
|
||||
result = await browser.run(url)
|
||||
assert isinstance(result, str)
|
||||
assert "Deepwisdom" in result
|
||||
|
||||
if urls:
|
||||
results = await browser.run(url, *urls)
|
||||
assert isinstance(results, list)
|
||||
assert len(results) == len(urls) + 1
|
||||
assert all(("Deepwisdom" in i) for i in results)
|
||||
if use_proxy:
|
||||
assert "Proxy:" in capfd.readouterr().out
|
||||
finally:
|
||||
CONFIG.global_proxy = global_proxy
|
||||
34
tests/metagpt/tools/test_web_browser_engine_selenium.py
Normal file
34
tests/metagpt/tools/test_web_browser_engine_selenium.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import pytest
|
||||
from metagpt.config import CONFIG
|
||||
from metagpt.tools import web_browser_engine_selenium
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"browser_type, use_proxy, url, urls",
|
||||
[
|
||||
("chrome", True, "https://fuzhi.ai", ("https://fuzhi.ai",)),
|
||||
("firefox", False, "https://fuzhi.ai", ("https://fuzhi.ai",)),
|
||||
("edge", False, "https://fuzhi.ai", ("https://fuzhi.ai",)),
|
||||
],
|
||||
ids=["chrome-normal", "firefox-normal", "edge-normal"],
|
||||
)
|
||||
async def test_scrape_web_page(browser_type, use_proxy, url, urls, proxy, capfd):
|
||||
try:
|
||||
global_proxy = CONFIG.global_proxy
|
||||
if use_proxy:
|
||||
CONFIG.global_proxy = proxy
|
||||
browser = web_browser_engine_selenium.SeleniumWrapper(browser_type)
|
||||
result = await browser.run(url)
|
||||
assert isinstance(result, str)
|
||||
assert "Deepwisdom" in result
|
||||
|
||||
if urls:
|
||||
results = await browser.run(url, *urls)
|
||||
assert isinstance(results, list)
|
||||
assert len(results) == len(urls) + 1
|
||||
assert all(("Deepwisdom" in i) for i in results)
|
||||
if use_proxy:
|
||||
assert "Proxy:" in capfd.readouterr().out
|
||||
finally:
|
||||
CONFIG.global_proxy = global_proxy
|
||||
Loading…
Add table
Add a link
Reference in a new issue