Merge pull request #799 from HuZixia/huzixia

Solve JSONDecodeError
This commit is contained in:
geekan 2024-01-28 10:27:34 +08:00 committed by GitHub
commit ee0801a116
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 40 additions and 7 deletions

View file

@ -119,15 +119,22 @@ def repair_json_format(output: str) -> str:
logger.info(f"repair_json_format: {'}]'}")
elif output.startswith("{") and output.endswith("]"):
output = output[:-1] + "}"
# remove `#` in output json str, usually appeared in `glm-4`
# remove comments in output json string, after json value content, maybe start with #, maybe start with //
arr = output.split("\n")
new_arr = []
for line in arr:
idx = line.find("#")
if idx >= 0:
line = line[:idx]
new_arr.append(line)
for json_line in arr:
# look for # or // comments and make sure they are not inside the string value
comment_index = -1
for match in re.finditer(r"(\".*?\"|\'.*?\')|(#|//)", json_line):
if match.group(1): # if the string value
continue
if match.group(2): # if comments
comment_index = match.start(2)
break
# if comments, then delete them
if comment_index != -1:
json_line = json_line[:comment_index].rstrip()
new_arr.append(json_line)
output = "\n".join(new_arr)
return output

View file

@ -141,6 +141,32 @@ def test_repair_json_format():
output = repair_llm_raw_output(output=raw_output, req_keys=[None], repair_type=RepairType.JSON)
assert output == target_output
raw_output = """
{
"Language": "en_us", // define language
"Programming Language": "Python" # define code language
}
"""
target_output = """{
"Language": "en_us",
"Programming Language": "Python"
}"""
output = repair_llm_raw_output(output=raw_output, req_keys=[None], repair_type=RepairType.JSON)
assert output == target_output
raw_output = """
{
"Language": "#en_us#", // define language
"Programming Language": "//Python # Code // Language//" # define code language
}
"""
target_output = """{
"Language": "#en_us#",
"Programming Language": "//Python # Code // Language//"
}"""
output = repair_llm_raw_output(output=raw_output, req_keys=[None], repair_type=RepairType.JSON)
assert output == target_output
def test_repair_invalid_json():
from metagpt.utils.repair_llm_raw_output import repair_invalid_json