replace *.deepwisdom.ai for scraping by local server

2026-04-30 11:26:23 +02:00 · 2024-02-21 15:40:23 +08:00 · 2024-02-21 15:40:23 +08:00 · d53cfd39f7
commit d53cfd39f7
parent ac755e7668
6 changed files with 50 additions and 35 deletions
--- a/tests/metagpt/tools/libs/test_web_scraping.py
+++ b/tests/metagpt/tools/libs/test_web_scraping.py
@ -4,8 +4,8 @@ from metagpt.tools.libs.web_scraping import scrape_web_playwright


@pytest.mark.asyncio
-async def test_scrape_web_playwright():
-    test_url = "https://www.deepwisdom.ai"
+async def test_scrape_web_playwright(http_server):
+    server, test_url = await http_server()

    result = await scrape_web_playwright(test_url)

@ -21,3 +21,4 @@ async def test_scrape_web_playwright():
    assert not result["inner_text"].endswith(" ")
    assert not result["html"].startswith(" ")
    assert not result["html"].endswith(" ")
+    await server.stop()
--- a/tests/metagpt/tools/test_web_browser_engine.py
+++ b/tests/metagpt/tools/test_web_browser_engine.py
@ -9,14 +9,16 @@ from metagpt.utils.parse_html import WebPage

@pytest.mark.asyncio
@pytest.mark.parametrize(
-    "browser_type, url, urls",
+    "browser_type",
    [
-        (WebBrowserEngineType.PLAYWRIGHT, "https://deepwisdom.ai", ("https://deepwisdom.ai",)),
-        (WebBrowserEngineType.SELENIUM, "https://deepwisdom.ai", ("https://deepwisdom.ai",)),
+        WebBrowserEngineType.PLAYWRIGHT,
+        WebBrowserEngineType.SELENIUM,
    ],
    ids=["playwright", "selenium"],
 )
-async def test_scrape_web_page(browser_type, url, urls):
+async def test_scrape_web_page(browser_type, http_server):
+    server, url = await http_server()
+    urls = [url, url, url]
    browser = web_browser_engine.WebBrowserEngine(engine=browser_type)
    result = await browser.run(url)
    assert isinstance(result, WebPage)
@ -27,6 +29,7 @@ async def test_scrape_web_page(browser_type, url, urls):
        assert isinstance(results, list)
        assert len(results) == len(urls) + 1
        assert all(("MetaGPT" in i.inner_text) for i in results)
+    await server.stop()


 if __name__ == "__main__":
--- a/tests/metagpt/tools/test_web_browser_engine_playwright.py
+++ b/tests/metagpt/tools/test_web_browser_engine_playwright.py
@ -9,18 +9,28 @@ from metagpt.utils.parse_html import WebPage

@pytest.mark.asyncio
@pytest.mark.parametrize(
-    "browser_type, use_proxy, kwagrs, url, urls",
+    "browser_type, use_proxy, kwagrs,",
    [
-        ("chromium", {"proxy": True}, {}, "https://www.deepwisdom.ai", ("https://www.deepwisdom.ai",)),
-        ("firefox", {}, {"ignore_https_errors": True}, "https://www.deepwisdom.ai", ("https://www.deepwisdom.ai",)),
-        ("webkit", {}, {"ignore_https_errors": True}, "https://www.deepwisdom.ai", ("https://www.deepwisdom.ai",)),
+        ("chromium", {"proxy": True}, {}),
+        (
+            "firefox",
+            {},
+            {"ignore_https_errors": True},
+        ),
+        (
+            "webkit",
+            {},
+            {"ignore_https_errors": True},
+        ),
    ],
    ids=["chromium-normal", "firefox-normal", "webkit-normal"],
 )
-async def test_scrape_web_page(browser_type, use_proxy, kwagrs, url, urls, proxy, capfd):
+async def test_scrape_web_page(browser_type, use_proxy, kwagrs, proxy, capfd, http_server):
+    server, url = await http_server()
+    urls = [url, url, url]
    proxy_url = None
    if use_proxy:
-        server, proxy_url = await proxy()
+        proxy_server, proxy_url = await proxy()
    browser = web_browser_engine_playwright.PlaywrightWrapper(browser_type=browser_type, proxy=proxy_url, **kwagrs)
    result = await browser.run(url)
    assert isinstance(result, WebPage)
@ -32,8 +42,10 @@ async def test_scrape_web_page(browser_type, use_proxy, kwagrs, url, urls, proxy
        assert len(results) == len(urls) + 1
        assert all(("MetaGPT" in i.inner_text) for i in results)
    if use_proxy:
-        server.close()
+        proxy_server.close()
+        await proxy_server.wait_closed()
        assert "Proxy:" in capfd.readouterr().out
+    await server.stop()


 if __name__ == "__main__":
--- a/tests/metagpt/tools/test_web_browser_engine_selenium.py
+++ b/tests/metagpt/tools/test_web_browser_engine_selenium.py
@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

+
 import browsers
 import pytest

@ -10,51 +11,48 @@ from metagpt.utils.parse_html import WebPage

@pytest.mark.asyncio
@pytest.mark.parametrize(
-    "browser_type, use_proxy, url, urls",
+    "browser_type, use_proxy,",
    [
        pytest.param(
            "chrome",
-            True,
-            "https://deepwisdom.ai",
-            ("https://deepwisdom.ai",),
+            False,
            marks=pytest.mark.skipif(not browsers.get("chrome"), reason="chrome browser not found"),
        ),
        pytest.param(
            "firefox",
            False,
-            "https://deepwisdom.ai",
-            ("https://deepwisdom.ai",),
            marks=pytest.mark.skipif(not browsers.get("firefox"), reason="firefox browser not found"),
        ),
        pytest.param(
            "edge",
            False,
-            "https://deepwisdom.ai",
-            ("https://deepwisdom.ai",),
            marks=pytest.mark.skipif(not browsers.get("msedge"), reason="edge browser not found"),
        ),
    ],
    ids=["chrome-normal", "firefox-normal", "edge-normal"],
 )
-async def test_scrape_web_page(browser_type, use_proxy, url, urls, proxy, capfd):
+async def test_scrape_web_page(browser_type, use_proxy, proxy, capfd, http_server):
    # Prerequisites
    # firefox, chrome, Microsoft Edge
+    server, url = await http_server()
+    urls = [url, url, url]
    proxy_url = None
    if use_proxy:
-        server, proxy_url = await proxy()
+        proxy_server, proxy_url = await proxy()
    browser = web_browser_engine_selenium.SeleniumWrapper(browser_type=browser_type, proxy=proxy_url)
    result = await browser.run(url)
    assert isinstance(result, WebPage)
    assert "MetaGPT" in result.inner_text

-    if urls:
-        results = await browser.run(url, *urls)
-        assert isinstance(results, list)
-        assert len(results) == len(urls) + 1
-        assert all(("MetaGPT" in i.inner_text) for i in results)
+    results = await browser.run(url, *urls)
+    assert isinstance(results, list)
+    assert len(results) == len(urls) + 1
+    assert all(("MetaGPT" in i.inner_text) for i in results)
    if use_proxy:
-        server.close()
-        assert "Proxy:" in capfd.readouterr().out
+        proxy_server.close()
+        await proxy_server.wait_closed()
+        assert "Proxy: localhost" in capfd.readouterr().out
+    await server.stop()


 if __name__ == "__main__":