From 43ffa3558b89134c7e0c05445b5e55f160546b7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Mon, 12 Aug 2024 13:52:32 +0800 Subject: [PATCH 1/5] add repair_escape_error function to parse commands --- metagpt/prompts/di/role_zero.py | 3 +++ metagpt/roles/di/role_zero.py | 36 ++++++++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index 8443a7960..25ca4637a 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -100,6 +100,9 @@ JSON_REPAIR_PROMPT = """ ## json data {json_data} +## json decode error +{json_decode_error} + ## Output Format ```json diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 773124dcc..e483f03cc 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -311,10 +311,20 @@ class RoleZero(Role): if commands.endswith("]") and not commands.startswith("["): commands = "[" + commands commands = json.loads(repair_llm_raw_output(output=commands, req_keys=[None], repair_type=RepairType.JSON)) - except json.JSONDecodeError: + except json.JSONDecodeError as e: logger.warning(f"Failed to parse JSON for: {self.command_rsp}. Trying to repair...") - commands = await self.llm.aask(msg=JSON_REPAIR_PROMPT.format(json_data=self.command_rsp)) - commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=commands)) + commands = await self.llm.aask( + msg=JSON_REPAIR_PROMPT.format(json_data=self.command_rsp, json_decode_error=str(e)) + ) + try: + commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=commands)) + except json.JSONDecodeError: + # repair escape error of code and math + commands = CodeParser.parse_code(block=None, lang="json", text=self.command_rsp) + new_command = self.repair_escape_error(commands) + commands = json.loads( + repair_llm_raw_output(output=new_command, req_keys=[None], repair_type=RepairType.JSON) + ) except Exception as e: tb = traceback.format_exc() print(tb) @@ -327,6 +337,26 @@ class RoleZero(Role): commands = commands["commands"] if "commands" in commands else [commands] return commands, True + def repair_escape_error(self, commands): + """Repaires escape errors in command responses""" + escape_repair_map = { + "\a": "\\\\a", + "\b": "\\\\b", + "\f": "\\\\f", + "\r": "\\\\r", + "\t": "\\\\t", + "\v": "\\\\v", + } + new_command = "" + for index, ch in enumerate(commands): + if ch == "\\" and index + 1 < len(commands): + if commands[index + 1] not in ["n", '"', " "]: + new_command += "\\" + elif ch in escape_repair_map: + ch = escape_repair_map[ch] + new_command += ch + return commands + async def _run_commands(self, commands) -> str: outputs = [] for cmd in commands: From be6c3b445554bf4e8278392ab9935cc4cd127afd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Mon, 12 Aug 2024 16:53:48 +0800 Subject: [PATCH 2/5] =?UTF-8?q?=E8=B0=83=E6=95=B4repair=5Fescape=5Ferror?= =?UTF-8?q?=E5=87=BD=E6=95=B0=E4=BD=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/roles/di/role_zero.py | 28 ++++++-------------------- metagpt/utils/repair_llm_raw_output.py | 24 ++++++++++++++++++++++ 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 032bf8101..960dfa805 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -35,7 +35,11 @@ from metagpt.tools.libs.editor import Editor from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender from metagpt.tools.tool_registry import register_tool from metagpt.utils.common import CodeParser, any_to_str -from metagpt.utils.repair_llm_raw_output import RepairType, repair_llm_raw_output +from metagpt.utils.repair_llm_raw_output import ( + RepairType, + repair_escape_error, + repair_llm_raw_output, +) from metagpt.utils.report import ThoughtReporter @@ -326,7 +330,7 @@ class RoleZero(Role): except json.JSONDecodeError: # repair escape error of code and math commands = CodeParser.parse_code(block=None, lang="json", text=self.command_rsp) - new_command = self.repair_escape_error(commands) + new_command = repair_escape_error(commands) commands = json.loads( repair_llm_raw_output(output=new_command, req_keys=[None], repair_type=RepairType.JSON) ) @@ -342,26 +346,6 @@ class RoleZero(Role): commands = commands["commands"] if "commands" in commands else [commands] return commands, True - def repair_escape_error(self, commands): - """Repaires escape errors in command responses""" - escape_repair_map = { - "\a": "\\\\a", - "\b": "\\\\b", - "\f": "\\\\f", - "\r": "\\\\r", - "\t": "\\\\t", - "\v": "\\\\v", - } - new_command = "" - for index, ch in enumerate(commands): - if ch == "\\" and index + 1 < len(commands): - if commands[index + 1] not in ["n", '"', " "]: - new_command += "\\" - elif ch in escape_repair_map: - ch = escape_repair_map[ch] - new_command += ch - return commands - async def _run_commands(self, commands) -> str: outputs = [] for cmd in commands: diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index 17e095c5f..fc27448eb 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -347,3 +347,27 @@ def extract_state_value_from_output(content: str) -> str: matches = list(set(matches)) state = matches[0] if len(matches) > 0 else "-1" return state + + +def repair_escape_error(commands): + """ + Repaires escape errors in command responses. + When role-zero parses a command, the command may contain unknown escape characters. + """ + escape_repair_map = { + "\a": "\\\\a", + "\b": "\\\\b", + "\f": "\\\\f", + "\r": "\\\\r", + "\t": "\\\\t", + "\v": "\\\\v", + } + new_command = "" + for index, ch in enumerate(commands): + if ch == "\\" and index + 1 < len(commands): + if commands[index + 1] not in ["n", '"', " "]: + new_command += "\\" + elif ch in escape_repair_map: + ch = escape_repair_map[ch] + new_command += ch + return new_command From 90e1e53bb69a79afd8dafa87cefd6e75e61ef5e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Mon, 12 Aug 2024 20:07:35 +0800 Subject: [PATCH 3/5] add annotations --- metagpt/utils/repair_llm_raw_output.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index fc27448eb..f1607255e 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -353,6 +353,23 @@ def repair_escape_error(commands): """ Repaires escape errors in command responses. When role-zero parses a command, the command may contain unknown escape characters. + + This function has two steps: + 1. Transform unescaped substrings like "\d" and "\(" to "\\\\d" and "\\\\(". + 2. Transform escaped characters like '\f' to substrings like "\\\\f". + + Example: + When the original JSON string is " {"content":"\\\\( \\\\frac{1}{2} \\\\)"} ", + The "content" will be parsed correctly to "\( \frac{1}{2} \)". + + When there is a wrong JSON string like: " {"content":"\( \frac{1}{2} \)"}", + It will cause a parsing error. + + To repair the wrong JSON string, the following transformations will be used: + "\(" ---> "\\\\(" + '\f' ---> "\\\\f" + "\)" ---> "\\\\)" + """ escape_repair_map = { "\a": "\\\\a", From 42da3b1fe8e2200338b5ad476ca4193e2b1d3e12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Mon, 12 Aug 2024 20:28:50 +0800 Subject: [PATCH 4/5] update annotations --- metagpt/utils/repair_llm_raw_output.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index f1607255e..2015b2ed7 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -352,7 +352,7 @@ def extract_state_value_from_output(content: str) -> str: def repair_escape_error(commands): """ Repaires escape errors in command responses. - When role-zero parses a command, the command may contain unknown escape characters. + When RoleZero parses a command, the command may contain unknown escape characters. This function has two steps: 1. Transform unescaped substrings like "\d" and "\(" to "\\\\d" and "\\\\(". @@ -362,7 +362,7 @@ def repair_escape_error(commands): When the original JSON string is " {"content":"\\\\( \\\\frac{1}{2} \\\\)"} ", The "content" will be parsed correctly to "\( \frac{1}{2} \)". - When there is a wrong JSON string like: " {"content":"\( \frac{1}{2} \)"}", + However, if the orginal JSON string is " {"content":"\( \frac{1}{2} \)"}" directly. It will cause a parsing error. To repair the wrong JSON string, the following transformations will be used: From ae4b65fdfde1e0a6a112c77d0722d74ad9a89b00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Mon, 12 Aug 2024 20:30:46 +0800 Subject: [PATCH 5/5] update annotations --- metagpt/utils/repair_llm_raw_output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index 2015b2ed7..68fa73108 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -362,7 +362,7 @@ def repair_escape_error(commands): When the original JSON string is " {"content":"\\\\( \\\\frac{1}{2} \\\\)"} ", The "content" will be parsed correctly to "\( \frac{1}{2} \)". - However, if the orginal JSON string is " {"content":"\( \frac{1}{2} \)"}" directly. + However, if the original JSON string is " {"content":"\( \frac{1}{2} \)"}" directly. It will cause a parsing error. To repair the wrong JSON string, the following transformations will be used: