mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-05 14:55:18 +02:00
Merge branch 'feat-qa-search' into 'mgx_ops'
Feat qa search See merge request pub/MetaGPT!294
This commit is contained in:
commit
f68cb033b3
3 changed files with 23 additions and 7 deletions
|
|
@ -180,7 +180,13 @@ class CollectLinks(Action):
|
|||
results = self.rank_func(results)
|
||||
return [i["link"] for i in results[:num_results]]
|
||||
|
||||
async def _search_urls(self, query: str, max_results: int) -> list[str]:
|
||||
async def _search_urls(self, query: str, max_results: int) -> list[dict[str, str]]:
|
||||
"""Use search_engine to get urls.
|
||||
|
||||
Returns:
|
||||
e.g. [{"title": "...", "link": "...", "snippet", "..."}]
|
||||
"""
|
||||
|
||||
return await self.search_engine.run(query, max_results=max_results, as_string=False)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -74,6 +74,14 @@ class SearchEnhancedQA(Action):
|
|||
java_script_enabled: bool = Field(
|
||||
default=False, description="Whether or not to enable JavaScript in the web browser context. Defaults to False."
|
||||
)
|
||||
user_agent: str = Field(
|
||||
default="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.81",
|
||||
description="Specific user agent to use in browser.",
|
||||
)
|
||||
extra_http_headers: dict = Field(
|
||||
default={"sec-ch-ua": 'Chromium";v="125", "Not.A/Brand";v="24'},
|
||||
description="An object containing additional HTTP headers to be sent with every request.",
|
||||
)
|
||||
max_chars_per_webpage_summary: int = Field(
|
||||
default=4000, description="Maximum summary length for each web page content."
|
||||
)
|
||||
|
|
@ -86,7 +94,11 @@ class SearchEnhancedQA(Action):
|
|||
def initialize(self):
|
||||
if self.web_browse_and_summarize_action is None:
|
||||
self.web_browser_engine = WebBrowserEngine.from_browser_config(
|
||||
self.config.browser, proxy=self.config.proxy, java_script_enabled=self.java_script_enabled
|
||||
self.config.browser,
|
||||
proxy=self.config.proxy,
|
||||
java_script_enabled=self.java_script_enabled,
|
||||
extra_http_headers=self.extra_http_headers,
|
||||
user_agent=self.user_agent,
|
||||
)
|
||||
|
||||
self.web_browse_and_summarize_action = WebBrowseAndSummarize(web_browser_engine=self.web_browser_engine)
|
||||
|
|
|
|||
|
|
@ -39,11 +39,9 @@ class PlaywrightWrapper(BaseModel):
|
|||
if not any(str.startswith(i, "--proxy-server=") for i in args):
|
||||
launch_kwargs["proxy"] = {"server": self.proxy}
|
||||
|
||||
if "ignore_https_errors" in kwargs:
|
||||
self.context_kwargs["ignore_https_errors"] = kwargs["ignore_https_errors"]
|
||||
|
||||
if "java_script_enabled" in kwargs:
|
||||
self.context_kwargs["java_script_enabled"] = kwargs["java_script_enabled"]
|
||||
for key in ["ignore_https_errors", "java_script_enabled", "extra_http_headers", "user_agent"]:
|
||||
if key in kwargs:
|
||||
self.context_kwargs[key] = kwargs[key]
|
||||
|
||||
async def run(self, url: str, *urls: str, per_page_timeout: float = None) -> WebPage | list[WebPage]:
|
||||
async with async_playwright() as ap:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue