diff --git a/metagpt/utils/repair_llm_raw_output.py b/metagpt/utils/repair_llm_raw_output.py index 973cffb8a..6da974d96 100644 --- a/metagpt/utils/repair_llm_raw_output.py +++ b/metagpt/utils/repair_llm_raw_output.py @@ -105,23 +105,6 @@ def repair_required_key_pair_missing(output: str, req_key: str = "[/CONTENT]") - return output -def remove_comments_from_line(line): - """ - Remove comments from a single line of string. - Comments are assumed to start with '#' or '//' and are not inside string values. - """ - comment_index = -1 - for match in re.finditer(r"(\".*?\"|\'.*?\')|(#|//)", line): - if match.group(1): # if the string value - continue - if match.group(2): # if comments - comment_index = match.start(2) - break - if comment_index != -1: # if comments, then delete them - return line[:comment_index].rstrip() - return line - - def repair_json_format(output: str) -> str: """ fix extra `[` or `}` in the end @@ -136,13 +119,22 @@ def repair_json_format(output: str) -> str: logger.info(f"repair_json_format: {'}]'}") elif output.startswith("{") and output.endswith("]"): output = output[:-1] + "}" - - # remove comments in output json string + # remove comments in output json string, after json value content, maybe start with #, maybe start with // arr = output.split("\n") new_arr = [] - for line in arr: - new_line = remove_comments_from_line(line) - new_arr.append(new_line) + for json_line in arr: + # look for # or // comments and make sure they are not inside the string value + comment_index = -1 + for match in re.finditer(r"(\".*?\"|\'.*?\')|(#|//)", json_line): + if match.group(1): # if the string value + continue + if match.group(2): # if comments + comment_index = match.start(2) + break + # if comments, then delete them + if comment_index != -1: + json_line = json_line[:comment_index].rstrip() + new_arr.append(json_line) output = "\n".join(new_arr) return output