From 43ffa3558b89134c7e0c05445b5e55f160546b7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Mon, 12 Aug 2024 13:52:32 +0800 Subject: [PATCH 01/12] add repair_escape_error function to parse commands --- metagpt/prompts/di/role_zero.py | 3 +++ metagpt/roles/di/role_zero.py | 36 ++++++++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 8443a7960..25ca4637a 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -100,6 +100,9 @@ JSON_REPAIR_PROMPT = """ ## json data {json_data} +## json decode error +{json_decode_error} + ## Output Format ```json diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 773124dcc..e483f03cc 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -311,10 +311,20 @@ class RoleZero(Role): if commands.endswith("]") and not commands.startswith("["): commands = "[" + commands commands = json.loads(repair_llm_raw_output(output=commands, req_keys=[None], repair_type=RepairType.JSON)) - except json.JSONDecodeError: + except json.JSONDecodeError as e: logger.warning(f"Failed to parse JSON for: {self.command_rsp}. Trying to repair...") - commands = await self.llm.aask(msg=JSON_REPAIR_PROMPT.format(json_data=self.command_rsp)) - commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=commands)) + commands = await self.llm.aask( + msg=JSON_REPAIR_PROMPT.format(json_data=self.command_rsp, json_decode_error=str(e)) + ) + try: + commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=commands)) + except json.JSONDecodeError: + # repair escape error of code and math + commands = CodeParser.parse_code(block=None, lang="json", text=self.command_rsp) + new_command = self.repair_escape_error(commands) + commands = json.loads( + repair_llm_raw_output(output=new_command, req_keys=[None], repair_type=RepairType.JSON) + ) except Exception as e: tb = traceback.format_exc() print(tb) @@ -327,6 +337,26 @@ class RoleZero(Role): commands = commands["commands"] if "commands" in commands else [commands] return commands, True + def repair_escape_error(self, commands): + """Repaires escape errors in command responses""" + escape_repair_map = { + "\a": "\\\\a", + "\b": "\\\\b", + "\f": "\\\\f", + "\r": "\\\\r", + "\t": "\\\\t", + "\v": "\\\\v", + } + new_command = "" + for index, ch in enumerate(commands): + if ch == "\\" and index + 1 < len(commands): + if commands[index + 1] not in ["n", '"', " "]: + new_command += "\\" + elif ch in escape_repair_map: + ch = escape_repair_map[ch] + new_command += ch + return commands + async def _run_commands(self, commands) -> str: outputs = [] for cmd in commands: From 9deb9f35127925aeca9dab6b8e3fc6cea3b11859 Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Mon, 12 Aug 2024 16:40:27 +0800 Subject: [PATCH 02/12] add url params for SERPAPI/SERPER search engine --- metagpt/tools/search_engine_serpapi.py | 19 ++++++++----------- metagpt/tools/search_engine_serper.py | 15 ++++++--------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/metagpt/tools/search_engine_serpapi.py b/metagpt/tools/search_engine_serpapi.py index 5744b1b62..b3ccb0649 100644 --- a/metagpt/tools/search_engine_serpapi.py +++ b/metagpt/tools/search_engine_serpapi.py @@ -6,7 +6,7 @@ @File : search_engine_serpapi.py """ import warnings -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional import aiohttp from pydantic import BaseModel, ConfigDict, Field, model_validator @@ -24,6 +24,7 @@ class SerpAPIWrapper(BaseModel): "hl": "en", } ) + url: str = "https://serpapi.com/search" aiosession: Optional[aiohttp.ClientSession] = None proxy: Optional[str] = None @@ -49,22 +50,18 @@ class SerpAPIWrapper(BaseModel): async def results(self, query: str, max_results: int) -> dict: """Use aiohttp to run query through SerpAPI and return the results async.""" - def construct_url_and_params() -> Tuple[str, Dict[str, str]]: - params = self.get_params(query) - params["source"] = "python" - params["num"] = max_results - params["output"] = "json" - url = "https://serpapi.com/search" - return url, params + params = self.get_params(query) + params["source"] = "python" + params["num"] = max_results + params["output"] = "json" - url, params = construct_url_and_params() if not self.aiosession: async with aiohttp.ClientSession() as session: - async with session.get(url, params=params, proxy=self.proxy) as response: + async with session.get(self.url, params=params, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() else: - async with self.aiosession.get(url, params=params, proxy=self.proxy) as response: + async with self.aiosession.get(self.url, params=params, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() diff --git a/metagpt/tools/search_engine_serper.py b/metagpt/tools/search_engine_serper.py index ba2fb4f93..bcb959ed3 100644 --- a/metagpt/tools/search_engine_serper.py +++ b/metagpt/tools/search_engine_serper.py @@ -7,7 +7,7 @@ """ import json import warnings -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional import aiohttp from pydantic import BaseModel, ConfigDict, Field, model_validator @@ -17,6 +17,7 @@ class SerperWrapper(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) api_key: str + url: str = "https://google.serper.dev/search" payload: dict = Field(default_factory=lambda: {"page": 1, "num": 10}) aiosession: Optional[aiohttp.ClientSession] = None proxy: Optional[str] = None @@ -46,20 +47,16 @@ class SerperWrapper(BaseModel): async def results(self, queries: list[str], max_results: int = 8) -> dict: """Use aiohttp to run query through Serper and return the results async.""" - def construct_url_and_payload_and_headers() -> Tuple[str, Dict[str, str]]: - payloads = self.get_payloads(queries, max_results) - url = "https://google.serper.dev/search" - headers = self.get_headers() - return url, payloads, headers + payloads = self.get_payloads(queries, max_results) + headers = self.get_headers() - url, payloads, headers = construct_url_and_payload_and_headers() if not self.aiosession: async with aiohttp.ClientSession() as session: - async with session.post(url, data=payloads, headers=headers, proxy=self.proxy) as response: + async with session.post(self.url, data=payloads, headers=headers, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() else: - async with self.aiosession.get.post(url, data=payloads, headers=headers, proxy=self.proxy) as response: + async with self.aiosession.get.post(self.url, data=payloads, headers=headers, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() From be6c3b445554bf4e8278392ab9935cc4cd127afd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Mon, 12 Aug 2024 16:53:48 +0800 Subject: [PATCH 03/12] =?UTF-8?q?=E8=B0=83=E6=95=B4repair=5Fescape=5Ferror?= =?UTF-8?q?=E5=87=BD=E6=95=B0=E4=BD=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/roles/di/role_zero.py | 28 ++++++-------------------- metagpt/utils/repair_llm_raw_output.py | 24 ++++++++++++++++++++++ 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 032bf8101..960dfa805 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -35,7 +35,11 @@ from metagpt.tools.libs.editor import Editor from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender from metagpt.tools.tool_registry import register_tool from metagpt.utils.common import CodeParser, any_to_str -from metagpt.utils.repair_llm_raw_output import RepairType, repair_llm_raw_output +from metagpt.utils.repair_llm_raw_output import ( + RepairType, + repair_escape_error, + repair_llm_raw_output, +) from metagpt.utils.report import ThoughtReporter @@ -326,7 +330,7 @@ class RoleZero(Role): except json.JSONDecodeError: # repair escape error of code and math commands = CodeParser.parse_code(block=None, lang="json", text=self.command_rsp) - new_command = self.repair_escape_error(commands) + new_command = repair_escape_error(commands) commands = json.loads( repair_llm_raw_output(output=new_command, req_keys=[None], repair_type=RepairType.JSON) ) @@ -342,26 +346,6 @@ class RoleZero(Role): commands = commands["commands"] if "commands" in commands else [commands] return commands, True - def repair_escape_error(self, commands): - """Repaires escape errors in command responses""" - escape_repair_map = { - "\a": "\\\\a", - "\b": "\\\\b", - "\f": "\\\\f", - "\r": "\\\\r", - "\t": "\\\\t", - "\v": "\\\\v", - } - new_command = "" - for index, ch in enumerate(commands): - if ch == "\\" and index + 1 < len(commands): - if commands[index + 1] not in ["n", '"', " "]: - new_command += "\\" - elif ch in escape_repair_map: - ch = escape_repair_map[ch] - new_command += ch - return commands - async def _run_commands(self, commands) -> str: outputs = [] for cmd in commands: diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index 17e095c5f..fc27448eb 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -347,3 +347,27 @@ def extract_state_value_from_output(content: str) -> str: matches = list(set(matches)) state = matches[0] if len(matches) > 0 else "-1" return state + + +def repair_escape_error(commands): + """ + Repaires escape errors in command responses. + When role-zero parses a command, the command may contain unknown escape characters. + """ + escape_repair_map = { + "\a": "\\\\a", + "\b": "\\\\b", + "\f": "\\\\f", + "\r": "\\\\r", + "\t": "\\\\t", + "\v": "\\\\v", + } + new_command = "" + for index, ch in enumerate(commands): + if ch == "\\" and index + 1 < len(commands): + if commands[index + 1] not in ["n", '"', " "]: + new_command += "\\" + elif ch in escape_repair_map: + ch = escape_repair_map[ch] + new_command += ch + return new_command From 90e1e53bb69a79afd8dafa87cefd6e75e61ef5e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Mon, 12 Aug 2024 20:07:35 +0800 Subject: [PATCH 04/12] add annotations --- metagpt/utils/repair_llm_raw_output.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index fc27448eb..f1607255e 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -353,6 +353,23 @@ def repair_escape_error(commands): """ Repaires escape errors in command responses. When role-zero parses a command, the command may contain unknown escape characters. + + This function has two steps: + 1. Transform unescaped substrings like "\d" and "\(" to "\\\\d" and "\\\\(". + 2. Transform escaped characters like '\f' to substrings like "\\\\f". + + Example: + When the original JSON string is " {"content":"\\\\( \\\\frac{1}{2} \\\\)"} ", + The "content" will be parsed correctly to "\( \frac{1}{2} \)". + + When there is a wrong JSON string like: " {"content":"\( \frac{1}{2} \)"}", + It will cause a parsing error. + + To repair the wrong JSON string, the following transformations will be used: + "\(" ---> "\\\\(" + '\f' ---> "\\\\f" + "\)" ---> "\\\\)" + """ escape_repair_map = { "\a": "\\\\a", From f1d102e76c97eeca7f33f984414a990b74c8317b Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Mon, 12 Aug 2024 20:22:33 +0800 Subject: [PATCH 05/12] add url params for SERPAPI/SERPER search engine --- metagpt/configs/search_config.py | 5 +++-- metagpt/tools/search_engine_serpapi.py | 19 ++++++++----------- metagpt/tools/search_engine_serper.py | 16 +++++++--------- 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/metagpt/configs/search_config.py b/metagpt/configs/search_config.py index 5f7f2d9a3..7b50fb6d3 100644 --- a/metagpt/configs/search_config.py +++ b/metagpt/configs/search_config.py @@ -7,7 +7,7 @@ """ from typing import Callable, Optional -from pydantic import Field +from pydantic import ConfigDict, Field from metagpt.tools import SearchEngineType from metagpt.utils.yaml_model import YamlModel @@ -16,10 +16,11 @@ from metagpt.utils.yaml_model import YamlModel class SearchConfig(YamlModel): """Config for Search""" + model_config = ConfigDict(extra="allow") + api_type: SearchEngineType = SearchEngineType.DUCK_DUCK_GO api_key: str = "" cse_id: str = "" # for google - discovery_service_url: str = "" # for google search_func: Optional[Callable] = None params: dict = Field( default_factory=lambda: { diff --git a/metagpt/tools/search_engine_serpapi.py b/metagpt/tools/search_engine_serpapi.py index 5744b1b62..b3ccb0649 100644 --- a/metagpt/tools/search_engine_serpapi.py +++ b/metagpt/tools/search_engine_serpapi.py @@ -6,7 +6,7 @@ @File : search_engine_serpapi.py """ import warnings -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional import aiohttp from pydantic import BaseModel, ConfigDict, Field, model_validator @@ -24,6 +24,7 @@ class SerpAPIWrapper(BaseModel): "hl": "en", } ) + url: str = "https://serpapi.com/search" aiosession: Optional[aiohttp.ClientSession] = None proxy: Optional[str] = None @@ -49,22 +50,18 @@ class SerpAPIWrapper(BaseModel): async def results(self, query: str, max_results: int) -> dict: """Use aiohttp to run query through SerpAPI and return the results async.""" - def construct_url_and_params() -> Tuple[str, Dict[str, str]]: - params = self.get_params(query) - params["source"] = "python" - params["num"] = max_results - params["output"] = "json" - url = "https://serpapi.com/search" - return url, params + params = self.get_params(query) + params["source"] = "python" + params["num"] = max_results + params["output"] = "json" - url, params = construct_url_and_params() if not self.aiosession: async with aiohttp.ClientSession() as session: - async with session.get(url, params=params, proxy=self.proxy) as response: + async with session.get(self.url, params=params, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() else: - async with self.aiosession.get(url, params=params, proxy=self.proxy) as response: + async with self.aiosession.get(self.url, params=params, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() diff --git a/metagpt/tools/search_engine_serper.py b/metagpt/tools/search_engine_serper.py index ba2fb4f93..932f2eb44 100644 --- a/metagpt/tools/search_engine_serper.py +++ b/metagpt/tools/search_engine_serper.py @@ -7,7 +7,7 @@ """ import json import warnings -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional import aiohttp from pydantic import BaseModel, ConfigDict, Field, model_validator @@ -17,6 +17,7 @@ class SerperWrapper(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) api_key: str + url: str = "https://google.serper.dev/search" payload: dict = Field(default_factory=lambda: {"page": 1, "num": 10}) aiosession: Optional[aiohttp.ClientSession] = None proxy: Optional[str] = None @@ -33,6 +34,7 @@ class SerperWrapper(BaseModel): "To use serper search engine, make sure you provide the `api_key` when constructing an object. You can obtain " "an API key from https://serper.dev/." ) + return values async def run(self, query: str, max_results: int = 8, as_string: bool = True, **kwargs: Any) -> str: @@ -46,20 +48,16 @@ class SerperWrapper(BaseModel): async def results(self, queries: list[str], max_results: int = 8) -> dict: """Use aiohttp to run query through Serper and return the results async.""" - def construct_url_and_payload_and_headers() -> Tuple[str, Dict[str, str]]: - payloads = self.get_payloads(queries, max_results) - url = "https://google.serper.dev/search" - headers = self.get_headers() - return url, payloads, headers + payloads = self.get_payloads(queries, max_results) + headers = self.get_headers() - url, payloads, headers = construct_url_and_payload_and_headers() if not self.aiosession: async with aiohttp.ClientSession() as session: - async with session.post(url, data=payloads, headers=headers, proxy=self.proxy) as response: + async with session.post(self.url, data=payloads, headers=headers, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() else: - async with self.aiosession.get.post(url, data=payloads, headers=headers, proxy=self.proxy) as response: + async with self.aiosession.post(self.url, data=payloads, headers=headers, proxy=self.proxy) as response: response.raise_for_status() res = await response.json() From 42da3b1fe8e2200338b5ad476ca4193e2b1d3e12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Mon, 12 Aug 2024 20:28:50 +0800 Subject: [PATCH 06/12] update annotations --- metagpt/utils/repair_llm_raw_output.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index f1607255e..2015b2ed7 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -352,7 +352,7 @@ def extract_state_value_from_output(content: str) -> str: def repair_escape_error(commands): """ Repaires escape errors in command responses. - When role-zero parses a command, the command may contain unknown escape characters. + When RoleZero parses a command, the command may contain unknown escape characters. This function has two steps: 1. Transform unescaped substrings like "\d" and "\(" to "\\\\d" and "\\\\(". @@ -362,7 +362,7 @@ def repair_escape_error(commands): When the original JSON string is " {"content":"\\\\( \\\\frac{1}{2} \\\\)"} ", The "content" will be parsed correctly to "\( \frac{1}{2} \)". - When there is a wrong JSON string like: " {"content":"\( \frac{1}{2} \)"}", + However, if the orginal JSON string is " {"content":"\( \frac{1}{2} \)"}" directly. It will cause a parsing error. To repair the wrong JSON string, the following transformations will be used: From ae4b65fdfde1e0a6a112c77d0722d74ad9a89b00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Mon, 12 Aug 2024 20:30:46 +0800 Subject: [PATCH 07/12] update annotations --- metagpt/utils/repair_llm_raw_output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index 2015b2ed7..68fa73108 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -362,7 +362,7 @@ def repair_escape_error(commands): When the original JSON string is " {"content":"\\\\( \\\\frac{1}{2} \\\\)"} ", The "content" will be parsed correctly to "\( \frac{1}{2} \)". - However, if the orginal JSON string is " {"content":"\( \frac{1}{2} \)"}" directly. + However, if the original JSON string is " {"content":"\( \frac{1}{2} \)"}" directly. It will cause a parsing error. To repair the wrong JSON string, the following transformations will be used: From 46bce295cab987dc5e73b9dadcf21a1fdda9f1bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Tue, 13 Aug 2024 13:43:46 +0800 Subject: [PATCH 08/12] =?UTF-8?q?fixbug:=20linux=E7=8E=AF=E5=A2=83?= =?UTF-8?q?=E5=8F=98=E9=87=8Fkey=E4=B8=8D=E8=83=BD=E4=BD=BF=E7=94=A8-?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/prompts/di/role_zero.py | 4 ++-- metagpt/tools/libs/env.py | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 49ad7a05b..b6faa8cb3 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -152,7 +152,7 @@ Respond with a concise thought, then provide the appropriate response category: """ -QUICK_THINK_EXAMPLES =""" +QUICK_THINK_EXAMPLES = """ # Example 1. Request: "How do I design an online document editing platform that supports real-time collaboration?" @@ -190,4 +190,4 @@ Response Category: AMBIGUOUS. # Instruction """ -QUICK_THINK_PROMPT = QUICK_THINK_PROMPT.format(examples=QUICK_THINK_EXAMPLES) \ No newline at end of file +QUICK_THINK_PROMPT = QUICK_THINK_PROMPT.format(examples=QUICK_THINK_EXAMPLES) diff --git a/metagpt/tools/libs/env.py b/metagpt/tools/libs/env.py index 1fa265d07..c1757c5f9 100644 --- a/metagpt/tools/libs/env.py +++ b/metagpt/tools/libs/env.py @@ -31,6 +31,10 @@ async def default_get_env(key: str, app_name: str = None) -> str: if app_key in os.environ: return os.environ[app_key] + env_app_key = app_key.replace("-", "_") # "-" is not supported by linux environment variable + if env_app_key in os.environ: + return os.environ[env_app_key] + from metagpt.context import Context context = Context() From 2a4e3730e10cd6eb96d718b71a4eee80610e6508 Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Tue, 13 Aug 2024 15:12:24 +0800 Subject: [PATCH 09/12] add search report --- metagpt/actions/search_enhanced_qa.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/metagpt/actions/search_enhanced_qa.py b/metagpt/actions/search_enhanced_qa.py index 1d7944d61..152e615b6 100644 --- a/metagpt/actions/search_enhanced_qa.py +++ b/metagpt/actions/search_enhanced_qa.py @@ -4,7 +4,7 @@ from __future__ import annotations import json -from pydantic import Field, model_validator +from pydantic import Field, PrivateAttr, model_validator from metagpt.actions import Action from metagpt.actions.research import CollectLinks, WebBrowseAndSummarize @@ -90,6 +90,8 @@ class SearchEnhancedQA(Action): description="Maximum number of search results (links) to collect using the collect_links_action. This controls the number of potential sources for answering the question.", ) + _reporter: ThoughtReporter = PrivateAttr(ThoughtReporter()) + @model_validator(mode="after") def initialize(self): if self.web_browse_and_summarize_action is None: @@ -118,13 +120,14 @@ class SearchEnhancedQA(Action): Raises: ValueError: If the query is invalid. """ + async with self._reporter: + await self._reporter.async_report({"type": "search", "stage": "init"}) + self._validate_query(query) - self._validate_query(query) + processed_query = await self._process_query(query, rewrite_query) + context = await self._build_context(processed_query) - processed_query = await self._process_query(query, rewrite_query) - context = await self._build_context(processed_query) - - return await self._generate_answer(processed_query, context) + return await self._generate_answer(processed_query, context) def _validate_query(self, query: str) -> None: """Validate the input query. @@ -203,6 +206,7 @@ class SearchEnhancedQA(Action): """ relevant_urls = await self._collect_relevant_links(query) + await self._reporter.async_report({"type": "search", "stage": "searching", "urls": relevant_urls}) if not relevant_urls: logger.warning(f"No relevant URLs found for query: {query}") return [] @@ -245,10 +249,12 @@ class SearchEnhancedQA(Action): contents = await self._fetch_web_contents(urls) summaries = {} + await self._reporter.async_report( + {"type": "search", "stage": "browsing", "pages": [i.model_dump() for i in contents]} + ) for content in contents: url = content.url inner_text = content.inner_text.replace("\n", "") - if self.web_browse_and_summarize_action._is_content_invalid(inner_text): logger.warning(f"Invalid content detected for URL {url}: {inner_text[:10]}...") continue @@ -276,8 +282,7 @@ class SearchEnhancedQA(Action): system_prompt = SEARCH_ENHANCED_QA_SYSTEM_PROMPT.format(context=context) - async with ThoughtReporter(enable_llm_stream=True) as reporter: - await reporter.async_report({"type": "quick"}) + async with ThoughtReporter(uuid=self._reporter.uuid, enable_llm_stream=True) as reporter: + await reporter.async_report({"type": "search", "stage": "answer"}) rsp = await self._aask(query, [system_prompt]) - return rsp From 3e63ea6fa3f20a98b2c0b0f780063c24eea567ec Mon Sep 17 00:00:00 2001 From: seehi <6580@pm.me> Date: Tue, 13 Aug 2024 16:49:35 +0800 Subject: [PATCH 10/12] update prompt --- metagpt/actions/search_enhanced_qa.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/metagpt/actions/search_enhanced_qa.py b/metagpt/actions/search_enhanced_qa.py index 1d7944d61..fd5e3897b 100644 --- a/metagpt/actions/search_enhanced_qa.py +++ b/metagpt/actions/search_enhanced_qa.py @@ -45,7 +45,9 @@ Follow **Instructions**, generate output and make sure it follows the **Constrai SEARCH_ENHANCED_QA_SYSTEM_PROMPT = """ You are a large language AI assistant built by MGX. You are given a user question, and please write clean, concise and accurate answer to the question. You will be given a set of related contexts to the question, each starting with a reference number like [[citation:x]], where x is a number. Please use the context. -Your answer must be correct, accurate and written by an expert using an unbiased and professional tone. Please limit to 1024 tokens. Do not give any information that is not related to the question, and do not repeat. Say "information is missing on" followed by the related topic, if the given context do not provide sufficient information. Do not include [citation] in your anwser. +Your answer must be correct, accurate and written by an expert using an unbiased and professional tone. Please limit to 1024 tokens. Do not give any information that is not related to the question, and do not repeat. Say "information is missing on" followed by the related topic, if the given context do not provide sufficient information. + +Do not include [citation:x] in your anwser, where x is a number. Other than code and specific names and citations, your answer must be written in the same language as the question. Here are the set of contexts: @@ -93,7 +95,7 @@ class SearchEnhancedQA(Action): @model_validator(mode="after") def initialize(self): if self.web_browse_and_summarize_action is None: - self.web_browser_engine = WebBrowserEngine.from_browser_config( + web_browser_engine = WebBrowserEngine.from_browser_config( self.config.browser, proxy=self.config.proxy, java_script_enabled=self.java_script_enabled, @@ -101,7 +103,7 @@ class SearchEnhancedQA(Action): user_agent=self.user_agent, ) - self.web_browse_and_summarize_action = WebBrowseAndSummarize(web_browser_engine=self.web_browser_engine) + self.web_browse_and_summarize_action = WebBrowseAndSummarize(web_browser_engine=web_browser_engine) return self From b36f34c1443ecad617b5c56eb585e091a2e6e819 Mon Sep 17 00:00:00 2001 From: zhanglei Date: Tue, 13 Aug 2024 16:50:09 +0800 Subject: [PATCH 11/12] fix: result process bug --- metagpt/ext/cr/actions/code_review.py | 1 + 1 file changed, 1 insertion(+) diff --git a/metagpt/ext/cr/actions/code_review.py b/metagpt/ext/cr/actions/code_review.py index 473ea8018..ab86ec239 100644 --- a/metagpt/ext/cr/actions/code_review.py +++ b/metagpt/ext/cr/actions/code_review.py @@ -164,6 +164,7 @@ class CodeReview(Action): system_prompt = [CODE_REVIEW_COMFIRM_SYSTEM_PROMPT.format(code_language=code_language)] resp = await self.llm.aask(prompt, system_msgs=system_prompt) if "True" in resp or "true" in resp: + cmt["code"] = get_code_block_from_patch(patch, code_start_line, code_end_line) new_comments.append(cmt) except Exception: logger.info("False") From e6ae39da57d096111c3f32a1b18fbb26289d72a3 Mon Sep 17 00:00:00 2001 From: zhanglei Date: Tue, 13 Aug 2024 16:53:16 +0800 Subject: [PATCH 12/12] fix --- metagpt/ext/cr/actions/code_review.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metagpt/ext/cr/actions/code_review.py b/metagpt/ext/cr/actions/code_review.py index ab86ec239..e3e6e69f2 100644 --- a/metagpt/ext/cr/actions/code_review.py +++ b/metagpt/ext/cr/actions/code_review.py @@ -164,7 +164,6 @@ class CodeReview(Action): system_prompt = [CODE_REVIEW_COMFIRM_SYSTEM_PROMPT.format(code_language=code_language)] resp = await self.llm.aask(prompt, system_msgs=system_prompt) if "True" in resp or "true" in resp: - cmt["code"] = get_code_block_from_patch(patch, code_start_line, code_end_line) new_comments.append(cmt) except Exception: logger.info("False") @@ -209,6 +208,6 @@ class CodeReview(Action): comments = await self.confirm_comments(patch=patch, comments=comments, points=points) for comment in comments: if comment["code"]: - if not (comment["code"].startswith("-") or comment["code"].isspace()): + if not (comment["code"].isspace()): result.append(comment) return result