Merge pull request #69 from shenchucheng/main

Add web page scraping feature implemented by Playwright/Selenium
2026-06-14 15:25:17 +02:00 · 2023-07-25 11:28:34 +08:00 · 2023-07-25 11:28:34 +08:00 · 007c8c0457
commit 007c8c0457
parent a538f9a3ca e44410b3ad
13 changed files with 479 additions and 30 deletions
--- a/tests/metagpt/tools/test_web_browser_engine.py
+++ b/tests/metagpt/tools/test_web_browser_engine.py
@ -0,0 +1,25 @@
+import pytest
+from metagpt.config import Config
+from metagpt.tools import web_browser_engine, WebBrowserEngineType
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "browser_type, url, urls",
+    [
+        (WebBrowserEngineType.PLAYWRIGHT, "https://fuzhi.ai", ("https://fuzhi.ai",)),
+        (WebBrowserEngineType.SELENIUM, "https://fuzhi.ai", ("https://fuzhi.ai",)),
+    ],
+    ids=["playwright", "selenium"],
+)
+async def test_scrape_web_page(browser_type, url, urls):
+    browser = web_browser_engine.WebBrowserEngine(browser_type)
+    result = await browser.run(url)
+    assert isinstance(result, str)
+    assert "深度赋智" in result
+
+    if urls:
+        results = await browser.run(url, *urls)
+        assert isinstance(results, list)
+        assert len(results) == len(urls) + 1
+        assert all(("深度赋智" in i) for i in results)
--- a/tests/metagpt/tools/test_web_browser_engine_playwright.py
+++ b/tests/metagpt/tools/test_web_browser_engine_playwright.py
@ -0,0 +1,34 @@
+import pytest
+from metagpt.config import CONFIG
+from metagpt.tools import web_browser_engine_playwright
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "browser_type, use_proxy, kwagrs, url, urls",
+    [
+        ("chromium", {"proxy": True}, {}, "https://fuzhi.ai", ("https://fuzhi.ai",)),
+        ("firefox", {}, {"ignore_https_errors": True}, "https://fuzhi.ai", ("https://fuzhi.ai",)),
+        ("webkit", {}, {"ignore_https_errors": True}, "https://fuzhi.ai", ("https://fuzhi.ai",)),
+    ],
+    ids=["chromium-normal", "firefox-normal", "webkit-normal"],
+)
+async def test_scrape_web_page(browser_type, use_proxy, kwagrs, url, urls, proxy, capfd):
+    try:
+        global_proxy = CONFIG.global_proxy
+        if use_proxy:
+            CONFIG.global_proxy = proxy
+        browser = web_browser_engine_playwright.PlaywrightWrapper(browser_type, **kwagrs)
+        result = await browser.run(url)
+        assert isinstance(result, str)
+        assert "Deepwisdom" in result
+
+        if urls:
+            results = await browser.run(url, *urls)
+            assert isinstance(results, list)
+            assert len(results) == len(urls) + 1
+            assert all(("Deepwisdom" in i) for i in results)
+        if use_proxy:
+            assert "Proxy:" in capfd.readouterr().out
+    finally:
+        CONFIG.global_proxy = global_proxy
--- a/tests/metagpt/tools/test_web_browser_engine_selenium.py
+++ b/tests/metagpt/tools/test_web_browser_engine_selenium.py
@ -0,0 +1,34 @@
+import pytest
+from metagpt.config import CONFIG
+from metagpt.tools import web_browser_engine_selenium
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "browser_type, use_proxy, url, urls",
+    [
+        ("chrome", True, "https://fuzhi.ai", ("https://fuzhi.ai",)),
+        ("firefox", False, "https://fuzhi.ai", ("https://fuzhi.ai",)),
+        ("edge", False, "https://fuzhi.ai", ("https://fuzhi.ai",)),
+    ],
+    ids=["chrome-normal", "firefox-normal", "edge-normal"],
+)
+async def test_scrape_web_page(browser_type, use_proxy, url, urls, proxy, capfd):
+    try:
+        global_proxy = CONFIG.global_proxy
+        if use_proxy:
+            CONFIG.global_proxy = proxy
+        browser = web_browser_engine_selenium.SeleniumWrapper(browser_type)
+        result = await browser.run(url)
+        assert isinstance(result, str)
+        assert "Deepwisdom" in result
+
+        if urls:
+            results = await browser.run(url, *urls)
+            assert isinstance(results, list)
+            assert len(results) == len(urls) + 1
+            assert all(("Deepwisdom" in i) for i in results)
+        if use_proxy:
+            assert "Proxy:" in capfd.readouterr().out
+    finally:
+        CONFIG.global_proxy = global_proxy