From 9deb9f35127925aeca9dab6b8e3fc6cea3b11859 Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Mon, 12 Aug 2024 16:40:27 +0800 Subject: [PATCH 1/3] add url params for SERPAPI/SERPER search engine --- metagpt/tools/search_engine_serpapi.py | 19 ++++++++----------- metagpt/tools/search_engine_serper.py | 15 ++++++--------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/metagpt/tools/search_engine_serpapi.py b/metagpt/tools/search_engine_serpapi.py index 5744b1b62..b3ccb0649 100644 --- a/metagpt/tools/search_engine_serpapi.py +++ b/metagpt/tools/search_engine_serpapi.py @@ -6,7 +6,7 @@ @File : search_engine_serpapi.py """ import warnings -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional import aiohttp from pydantic import BaseModel, ConfigDict, Field, model_validator @@ -24,6 +24,7 @@ class SerpAPIWrapper(BaseModel): "hl": "en", } ) + url: str = "https://serpapi.com/search" aiosession: Optional[aiohttp.ClientSession] = None proxy: Optional[str] = None @@ -49,22 +50,18 @@ class SerpAPIWrapper(BaseModel): async def results(self, query: str, max_results: int) -> dict: """Use aiohttp to run query through SerpAPI and return the results async.""" - def construct_url_and_params() -> Tuple[str, Dict[str, str]]: - params = self.get_params(query) - params["source"] = "python" - params["num"] = max_results - params["output"] = "json" - url = "https://serpapi.com/search" - return url, params + params = self.get_params(query) + params["source"] = "python" + params["num"] = max_results + params["output"] = "json" - url, params = construct_url_and_params() if not self.aiosession: async with aiohttp.ClientSession() as session: - async with session.get(url, params=params, proxy=self.proxy) as response: + async with session.get(self.url, params=params, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() else: - async with self.aiosession.get(url, params=params, proxy=self.proxy) as response: + async with self.aiosession.get(self.url, params=params, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() diff --git a/metagpt/tools/search_engine_serper.py b/metagpt/tools/search_engine_serper.py index ba2fb4f93..bcb959ed3 100644 --- a/metagpt/tools/search_engine_serper.py +++ b/metagpt/tools/search_engine_serper.py @@ -7,7 +7,7 @@ """ import json import warnings -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional import aiohttp from pydantic import BaseModel, ConfigDict, Field, model_validator @@ -17,6 +17,7 @@ class SerperWrapper(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) api_key: str + url: str = "https://google.serper.dev/search" payload: dict = Field(default_factory=lambda: {"page": 1, "num": 10}) aiosession: Optional[aiohttp.ClientSession] = None proxy: Optional[str] = None @@ -46,20 +47,16 @@ class SerperWrapper(BaseModel): async def results(self, queries: list[str], max_results: int = 8) -> dict: """Use aiohttp to run query through Serper and return the results async.""" - def construct_url_and_payload_and_headers() -> Tuple[str, Dict[str, str]]: - payloads = self.get_payloads(queries, max_results) - url = "https://google.serper.dev/search" - headers = self.get_headers() - return url, payloads, headers + payloads = self.get_payloads(queries, max_results) + headers = self.get_headers() - url, payloads, headers = construct_url_and_payload_and_headers() if not self.aiosession: async with aiohttp.ClientSession() as session: - async with session.post(url, data=payloads, headers=headers, proxy=self.proxy) as response: + async with session.post(self.url, data=payloads, headers=headers, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() else: - async with self.aiosession.get.post(url, data=payloads, headers=headers, proxy=self.proxy) as response: + async with self.aiosession.get.post(self.url, data=payloads, headers=headers, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() From f1d102e76c97eeca7f33f984414a990b74c8317b Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Mon, 12 Aug 2024 20:22:33 +0800 Subject: [PATCH 2/3] add url params for SERPAPI/SERPER search engine --- metagpt/configs/search_config.py | 5 +++-- metagpt/tools/search_engine_serpapi.py | 19 ++++++++----------- metagpt/tools/search_engine_serper.py | 16 +++++++--------- 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/metagpt/configs/search_config.py b/metagpt/configs/search_config.py index 5f7f2d9a3..7b50fb6d3 100644 --- a/metagpt/configs/search_config.py +++ b/metagpt/configs/search_config.py @@ -7,7 +7,7 @@ """ from typing import Callable, Optional -from pydantic import Field +from pydantic import ConfigDict, Field from metagpt.tools import SearchEngineType from metagpt.utils.yaml_model import YamlModel @@ -16,10 +16,11 @@ from metagpt.utils.yaml_model import YamlModel class SearchConfig(YamlModel): """Config for Search""" + model_config = ConfigDict(extra="allow") + api_type: SearchEngineType = SearchEngineType.DUCK_DUCK_GO api_key: str = "" cse_id: str = "" # for google - discovery_service_url: str = "" # for google search_func: Optional[Callable] = None params: dict = Field( default_factory=lambda: { diff --git a/metagpt/tools/search_engine_serpapi.py b/metagpt/tools/search_engine_serpapi.py index 5744b1b62..b3ccb0649 100644 --- a/metagpt/tools/search_engine_serpapi.py +++ b/metagpt/tools/search_engine_serpapi.py @@ -6,7 +6,7 @@ @File : search_engine_serpapi.py """ import warnings -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional import aiohttp from pydantic import BaseModel, ConfigDict, Field, model_validator @@ -24,6 +24,7 @@ class SerpAPIWrapper(BaseModel): "hl": "en", } ) + url: str = "https://serpapi.com/search" aiosession: Optional[aiohttp.ClientSession] = None proxy: Optional[str] = None @@ -49,22 +50,18 @@ class SerpAPIWrapper(BaseModel): async def results(self, query: str, max_results: int) -> dict: """Use aiohttp to run query through SerpAPI and return the results async.""" - def construct_url_and_params() -> Tuple[str, Dict[str, str]]: - params = self.get_params(query) - params["source"] = "python" - params["num"] = max_results - params["output"] = "json" - url = "https://serpapi.com/search" - return url, params + params = self.get_params(query) + params["source"] = "python" + params["num"] = max_results + params["output"] = "json" - url, params = construct_url_and_params() if not self.aiosession: async with aiohttp.ClientSession() as session: - async with session.get(url, params=params, proxy=self.proxy) as response: + async with session.get(self.url, params=params, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() else: - async with self.aiosession.get(url, params=params, proxy=self.proxy) as response: + async with self.aiosession.get(self.url, params=params, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() diff --git a/metagpt/tools/search_engine_serper.py b/metagpt/tools/search_engine_serper.py index ba2fb4f93..932f2eb44 100644 --- a/metagpt/tools/search_engine_serper.py +++ b/metagpt/tools/search_engine_serper.py @@ -7,7 +7,7 @@ """ import json import warnings -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional import aiohttp from pydantic import BaseModel, ConfigDict, Field, model_validator @@ -17,6 +17,7 @@ class SerperWrapper(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) api_key: str + url: str = "https://google.serper.dev/search" payload: dict = Field(default_factory=lambda: {"page": 1, "num": 10}) aiosession: Optional[aiohttp.ClientSession] = None proxy: Optional[str] = None @@ -33,6 +34,7 @@ class SerperWrapper(BaseModel): "To use serper search engine, make sure you provide the `api_key` when constructing an object. You can obtain " "an API key from https://serper.dev/." ) + return values async def run(self, query: str, max_results: int = 8, as_string: bool = True, **kwargs: Any) -> str: @@ -46,20 +48,16 @@ class SerperWrapper(BaseModel): async def results(self, queries: list[str], max_results: int = 8) -> dict: """Use aiohttp to run query through Serper and return the results async.""" - def construct_url_and_payload_and_headers() -> Tuple[str, Dict[str, str]]: - payloads = self.get_payloads(queries, max_results) - url = "https://google.serper.dev/search" - headers = self.get_headers() - return url, payloads, headers + payloads = self.get_payloads(queries, max_results) + headers = self.get_headers() - url, payloads, headers = construct_url_and_payload_and_headers() if not self.aiosession: async with aiohttp.ClientSession() as session: - async with session.post(url, data=payloads, headers=headers, proxy=self.proxy) as response: + async with session.post(self.url, data=payloads, headers=headers, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() else: - async with self.aiosession.get.post(url, data=payloads, headers=headers, proxy=self.proxy) as response: + async with self.aiosession.post(self.url, data=payloads, headers=headers, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() From 2a4e3730e10cd6eb96d718b71a4eee80610e6508 Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Tue, 13 Aug 2024 15:12:24 +0800 Subject: [PATCH 3/3] add search report --- metagpt/actions/search_enhanced_qa.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/metagpt/actions/search_enhanced_qa.py b/metagpt/actions/search_enhanced_qa.py index 1d7944d61..152e615b6 100644 --- a/metagpt/actions/search_enhanced_qa.py +++ b/metagpt/actions/search_enhanced_qa.py @@ -4,7 +4,7 @@ from __future__ import annotations import json -from pydantic import Field, model_validator +from pydantic import Field, PrivateAttr, model_validator from metagpt.actions import Action from metagpt.actions.research import CollectLinks, WebBrowseAndSummarize @@ -90,6 +90,8 @@ class SearchEnhancedQA(Action): description="Maximum number of search results (links) to collect using the collect_links_action. This controls the number of potential sources for answering the question.", ) + _reporter: ThoughtReporter = PrivateAttr(ThoughtReporter()) + @model_validator(mode="after") def initialize(self): if self.web_browse_and_summarize_action is None: @@ -118,13 +120,14 @@ class SearchEnhancedQA(Action): Raises: ValueError: If the query is invalid. """ + async with self._reporter: + await self._reporter.async_report({"type": "search", "stage": "init"}) + self._validate_query(query) - self._validate_query(query) + processed_query = await self._process_query(query, rewrite_query) + context = await self._build_context(processed_query) - processed_query = await self._process_query(query, rewrite_query) - context = await self._build_context(processed_query) - - return await self._generate_answer(processed_query, context) + return await self._generate_answer(processed_query, context) def _validate_query(self, query: str) -> None: """Validate the input query. @@ -203,6 +206,7 @@ class SearchEnhancedQA(Action): """ relevant_urls = await self._collect_relevant_links(query) + await self._reporter.async_report({"type": "search", "stage": "searching", "urls": relevant_urls}) if not relevant_urls: logger.warning(f"No relevant URLs found for query: {query}") return [] @@ -245,10 +249,12 @@ class SearchEnhancedQA(Action): contents = await self._fetch_web_contents(urls) summaries = {} + await self._reporter.async_report( + {"type": "search", "stage": "browsing", "pages": [i.model_dump() for i in contents]} + ) for content in contents: url = content.url inner_text = content.inner_text.replace("\n", "") - if self.web_browse_and_summarize_action._is_content_invalid(inner_text): logger.warning(f"Invalid content detected for URL {url}: {inner_text[:10]}...") continue @@ -276,8 +282,7 @@ class SearchEnhancedQA(Action): system_prompt = SEARCH_ENHANCED_QA_SYSTEM_PROMPT.format(context=context) - async with ThoughtReporter(enable_llm_stream=True) as reporter: - await reporter.async_report({"type": "quick"}) + async with ThoughtReporter(uuid=self._reporter.uuid, enable_llm_stream=True) as reporter: + await reporter.async_report({"type": "search", "stage": "answer"}) rsp = await self._aask(query, [system_prompt]) - return rsp