mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-04-30 11:26:23 +02:00
replace *.deepwisdom.ai for scraping by local server
This commit is contained in:
parent
ac755e7668
commit
d53cfd39f7
6 changed files with 50 additions and 35 deletions
|
|
@ -4,8 +4,8 @@ from metagpt.tools.libs.web_scraping import scrape_web_playwright
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_web_playwright():
|
||||
test_url = "https://www.deepwisdom.ai"
|
||||
async def test_scrape_web_playwright(http_server):
|
||||
server, test_url = await http_server()
|
||||
|
||||
result = await scrape_web_playwright(test_url)
|
||||
|
||||
|
|
@ -21,3 +21,4 @@ async def test_scrape_web_playwright():
|
|||
assert not result["inner_text"].endswith(" ")
|
||||
assert not result["html"].startswith(" ")
|
||||
assert not result["html"].endswith(" ")
|
||||
await server.stop()
|
||||
|
|
|
|||
|
|
@ -9,14 +9,16 @@ from metagpt.utils.parse_html import WebPage
|
|||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"browser_type, url, urls",
|
||||
"browser_type",
|
||||
[
|
||||
(WebBrowserEngineType.PLAYWRIGHT, "https://deepwisdom.ai", ("https://deepwisdom.ai",)),
|
||||
(WebBrowserEngineType.SELENIUM, "https://deepwisdom.ai", ("https://deepwisdom.ai",)),
|
||||
WebBrowserEngineType.PLAYWRIGHT,
|
||||
WebBrowserEngineType.SELENIUM,
|
||||
],
|
||||
ids=["playwright", "selenium"],
|
||||
)
|
||||
async def test_scrape_web_page(browser_type, url, urls):
|
||||
async def test_scrape_web_page(browser_type, http_server):
|
||||
server, url = await http_server()
|
||||
urls = [url, url, url]
|
||||
browser = web_browser_engine.WebBrowserEngine(engine=browser_type)
|
||||
result = await browser.run(url)
|
||||
assert isinstance(result, WebPage)
|
||||
|
|
@ -27,6 +29,7 @@ async def test_scrape_web_page(browser_type, url, urls):
|
|||
assert isinstance(results, list)
|
||||
assert len(results) == len(urls) + 1
|
||||
assert all(("MetaGPT" in i.inner_text) for i in results)
|
||||
await server.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -9,18 +9,28 @@ from metagpt.utils.parse_html import WebPage
|
|||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"browser_type, use_proxy, kwagrs, url, urls",
|
||||
"browser_type, use_proxy, kwagrs,",
|
||||
[
|
||||
("chromium", {"proxy": True}, {}, "https://www.deepwisdom.ai", ("https://www.deepwisdom.ai",)),
|
||||
("firefox", {}, {"ignore_https_errors": True}, "https://www.deepwisdom.ai", ("https://www.deepwisdom.ai",)),
|
||||
("webkit", {}, {"ignore_https_errors": True}, "https://www.deepwisdom.ai", ("https://www.deepwisdom.ai",)),
|
||||
("chromium", {"proxy": True}, {}),
|
||||
(
|
||||
"firefox",
|
||||
{},
|
||||
{"ignore_https_errors": True},
|
||||
),
|
||||
(
|
||||
"webkit",
|
||||
{},
|
||||
{"ignore_https_errors": True},
|
||||
),
|
||||
],
|
||||
ids=["chromium-normal", "firefox-normal", "webkit-normal"],
|
||||
)
|
||||
async def test_scrape_web_page(browser_type, use_proxy, kwagrs, url, urls, proxy, capfd):
|
||||
async def test_scrape_web_page(browser_type, use_proxy, kwagrs, proxy, capfd, http_server):
|
||||
server, url = await http_server()
|
||||
urls = [url, url, url]
|
||||
proxy_url = None
|
||||
if use_proxy:
|
||||
server, proxy_url = await proxy()
|
||||
proxy_server, proxy_url = await proxy()
|
||||
browser = web_browser_engine_playwright.PlaywrightWrapper(browser_type=browser_type, proxy=proxy_url, **kwagrs)
|
||||
result = await browser.run(url)
|
||||
assert isinstance(result, WebPage)
|
||||
|
|
@ -32,8 +42,10 @@ async def test_scrape_web_page(browser_type, use_proxy, kwagrs, url, urls, proxy
|
|||
assert len(results) == len(urls) + 1
|
||||
assert all(("MetaGPT" in i.inner_text) for i in results)
|
||||
if use_proxy:
|
||||
server.close()
|
||||
proxy_server.close()
|
||||
await proxy_server.wait_closed()
|
||||
assert "Proxy:" in capfd.readouterr().out
|
||||
await server.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import browsers
|
||||
import pytest
|
||||
|
||||
|
|
@ -10,51 +11,48 @@ from metagpt.utils.parse_html import WebPage
|
|||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"browser_type, use_proxy, url, urls",
|
||||
"browser_type, use_proxy,",
|
||||
[
|
||||
pytest.param(
|
||||
"chrome",
|
||||
True,
|
||||
"https://deepwisdom.ai",
|
||||
("https://deepwisdom.ai",),
|
||||
False,
|
||||
marks=pytest.mark.skipif(not browsers.get("chrome"), reason="chrome browser not found"),
|
||||
),
|
||||
pytest.param(
|
||||
"firefox",
|
||||
False,
|
||||
"https://deepwisdom.ai",
|
||||
("https://deepwisdom.ai",),
|
||||
marks=pytest.mark.skipif(not browsers.get("firefox"), reason="firefox browser not found"),
|
||||
),
|
||||
pytest.param(
|
||||
"edge",
|
||||
False,
|
||||
"https://deepwisdom.ai",
|
||||
("https://deepwisdom.ai",),
|
||||
marks=pytest.mark.skipif(not browsers.get("msedge"), reason="edge browser not found"),
|
||||
),
|
||||
],
|
||||
ids=["chrome-normal", "firefox-normal", "edge-normal"],
|
||||
)
|
||||
async def test_scrape_web_page(browser_type, use_proxy, url, urls, proxy, capfd):
|
||||
async def test_scrape_web_page(browser_type, use_proxy, proxy, capfd, http_server):
|
||||
# Prerequisites
|
||||
# firefox, chrome, Microsoft Edge
|
||||
server, url = await http_server()
|
||||
urls = [url, url, url]
|
||||
proxy_url = None
|
||||
if use_proxy:
|
||||
server, proxy_url = await proxy()
|
||||
proxy_server, proxy_url = await proxy()
|
||||
browser = web_browser_engine_selenium.SeleniumWrapper(browser_type=browser_type, proxy=proxy_url)
|
||||
result = await browser.run(url)
|
||||
assert isinstance(result, WebPage)
|
||||
assert "MetaGPT" in result.inner_text
|
||||
|
||||
if urls:
|
||||
results = await browser.run(url, *urls)
|
||||
assert isinstance(results, list)
|
||||
assert len(results) == len(urls) + 1
|
||||
assert all(("MetaGPT" in i.inner_text) for i in results)
|
||||
results = await browser.run(url, *urls)
|
||||
assert isinstance(results, list)
|
||||
assert len(results) == len(urls) + 1
|
||||
assert all(("MetaGPT" in i.inner_text) for i in results)
|
||||
if use_proxy:
|
||||
server.close()
|
||||
assert "Proxy:" in capfd.readouterr().out
|
||||
proxy_server.close()
|
||||
await proxy_server.wait_closed()
|
||||
assert "Proxy: localhost" in capfd.readouterr().out
|
||||
await server.stop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue