From 0700cd2e62477227034017ea3fd8bc65f98446cb Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Tue, 25 Jul 2023 00:32:02 +0800 Subject: [PATCH] add parse_func for WebBrowserEngin to support custom parsing --- metagpt/tools/web_browser_engine.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/metagpt/tools/web_browser_engine.py b/metagpt/tools/web_browser_engine.py index 629ada195..90449d8e1 100644 --- a/metagpt/tools/web_browser_engine.py +++ b/metagpt/tools/web_browser_engine.py @@ -16,6 +16,7 @@ class WebBrowserEngine: self, engine: WebBrowserEngineType | None = None, run_func: Callable[..., Coroutine[Any, Any, str | list[str]]] | None = None, + parse_func: Callable[[str], str] | None = None, ): engine = engine or CONFIG.web_browser_engine @@ -29,6 +30,7 @@ class WebBrowserEngine: run_func = run_func else: raise NotImplementedError + self.parse_func = parse_func or get_page_content self.run_func = run_func self.engine = engine @@ -43,8 +45,8 @@ class WebBrowserEngine: async def run(self, url: str, *urls: str) -> str | list[str]: page = await self.run_func(url, *urls) if isinstance(page, str): - return get_page_content(page) - return [get_page_content(i) for i in page] + return self.parse_func(page) + return [self.parse_func(i) for i in page] def get_page_content(page: str):