Merge branch 'feat-qa-search' into 'mgx_ops'

Feat qa search

See merge request pub/MetaGPT!294
This commit is contained in:
王金淋 2024-08-09 10:34:10 +00:00
commit f68cb033b3
3 changed files with 23 additions and 7 deletions

View file

@ -180,7 +180,13 @@ class CollectLinks(Action):
results = self.rank_func(results)
return [i["link"] for i in results[:num_results]]
async def _search_urls(self, query: str, max_results: int) -> list[str]:
async def _search_urls(self, query: str, max_results: int) -> list[dict[str, str]]:
"""Use search_engine to get urls.
Returns:
e.g. [{"title": "...", "link": "...", "snippet", "..."}]
"""
return await self.search_engine.run(query, max_results=max_results, as_string=False)

View file

@ -74,6 +74,14 @@ class SearchEnhancedQA(Action):
java_script_enabled: bool = Field(
default=False, description="Whether or not to enable JavaScript in the web browser context. Defaults to False."
)
user_agent: str = Field(
default="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.81",
description="Specific user agent to use in browser.",
)
extra_http_headers: dict = Field(
default={"sec-ch-ua": 'Chromium";v="125", "Not.A/Brand";v="24'},
description="An object containing additional HTTP headers to be sent with every request.",
)
max_chars_per_webpage_summary: int = Field(
default=4000, description="Maximum summary length for each web page content."
)
@ -86,7 +94,11 @@ class SearchEnhancedQA(Action):
def initialize(self):
if self.web_browse_and_summarize_action is None:
self.web_browser_engine = WebBrowserEngine.from_browser_config(
self.config.browser, proxy=self.config.proxy, java_script_enabled=self.java_script_enabled
self.config.browser,
proxy=self.config.proxy,
java_script_enabled=self.java_script_enabled,
extra_http_headers=self.extra_http_headers,
user_agent=self.user_agent,
)
self.web_browse_and_summarize_action = WebBrowseAndSummarize(web_browser_engine=self.web_browser_engine)

View file

@ -39,11 +39,9 @@ class PlaywrightWrapper(BaseModel):
if not any(str.startswith(i, "--proxy-server=") for i in args):
launch_kwargs["proxy"] = {"server": self.proxy}
if "ignore_https_errors" in kwargs:
self.context_kwargs["ignore_https_errors"] = kwargs["ignore_https_errors"]
if "java_script_enabled" in kwargs:
self.context_kwargs["java_script_enabled"] = kwargs["java_script_enabled"]
for key in ["ignore_https_errors", "java_script_enabled", "extra_http_headers", "user_agent"]:
if key in kwargs:
self.context_kwargs[key] = kwargs[key]
async def run(self, url: str, *urls: str, per_page_timeout: float = None) -> WebPage | list[WebPage]:
async with async_playwright() as ap: