update repair_llm_raw_output.py

This commit is contained in:
huzixia 2024-01-27 18:23:57 +08:00
parent 11f70ca9b1
commit c3b4c698d8

View file

@ -105,23 +105,6 @@ def repair_required_key_pair_missing(output: str, req_key: str = "[/CONTENT]") -
return output
def remove_comments_from_line(line):
"""
Remove comments from a single line of string.
Comments are assumed to start with '#' or '//' and are not inside string values.
"""
comment_index = -1
for match in re.finditer(r"(\".*?\"|\'.*?\')|(#|//)", line):
if match.group(1): # if the string value
continue
if match.group(2): # if comments
comment_index = match.start(2)
break
if comment_index != -1: # if comments, then delete them
return line[:comment_index].rstrip()
return line
def repair_json_format(output: str) -> str:
"""
fix extra `[` or `}` in the end
@ -136,13 +119,22 @@ def repair_json_format(output: str) -> str:
logger.info(f"repair_json_format: {'}]'}")
elif output.startswith("{") and output.endswith("]"):
output = output[:-1] + "}"
# remove comments in output json string
# remove comments in output json string, after json value content, maybe start with #, maybe start with //
arr = output.split("\n")
new_arr = []
for line in arr:
new_line = remove_comments_from_line(line)
new_arr.append(new_line)
for json_line in arr:
# look for # or // comments and make sure they are not inside the string value
comment_index = -1
for match in re.finditer(r"(\".*?\"|\'.*?\')|(#|//)", json_line):
if match.group(1): # if the string value
continue
if match.group(2): # if comments
comment_index = match.start(2)
break
# if comments, then delete them
if comment_index != -1:
json_line = json_line[:comment_index].rstrip()
new_arr.append(json_line)
output = "\n".join(new_arr)
return output