update zhipu api due to new model and api; repair extra invalid generate output; update its unittest

This commit is contained in:
better629 2024-01-17 16:28:13 +08:00
parent 75cbf9f087
commit 4e13eaca6e
17 changed files with 156 additions and 214 deletions

View file

@ -55,6 +55,7 @@ class FileRepository:
"""
pathname = self.workdir / filename
pathname.parent.mkdir(parents=True, exist_ok=True)
content = content if content else "" # avoid `argument must be str, not None` to make it continue
async with aiofiles.open(str(pathname), mode="w") as writer:
await writer.write(content)
logger.info(f"save to: {str(pathname)}")

View file

@ -120,6 +120,15 @@ def repair_json_format(output: str) -> str:
elif output.startswith("{") and output.endswith("]"):
output = output[:-1] + "}"
# remove `#` in output json str, usually appeared in `glm-4`
arr = output.split("\n")
new_arr = []
for line in arr:
idx = line.find("#")
if idx >= 0:
line = line[:idx]
new_arr.append(line)
output = "\n".join(new_arr)
return output
@ -168,15 +177,17 @@ def repair_invalid_json(output: str, error: str) -> str:
example 1. json.decoder.JSONDecodeError: Expecting ',' delimiter: line 154 column 1 (char 2765)
example 2. xxx.JSONDecodeError: Expecting property name enclosed in double quotes: line 14 column 1 (char 266)
"""
pattern = r"line ([0-9]+)"
pattern = r"line ([0-9]+) column ([0-9]+)"
matches = re.findall(pattern, error, re.DOTALL)
if len(matches) > 0:
line_no = int(matches[0]) - 1
line_no = int(matches[0][0]) - 1
col_no = int(matches[0][1]) - 1
# due to CustomDecoder can handle `"": ''` or `'': ""`, so convert `"""` -> `"`, `'''` -> `'`
output = output.replace('"""', '"').replace("'''", '"')
arr = output.split("\n")
rline = arr[line_no] # raw line
line = arr[line_no].strip()
# different general problems
if line.endswith("],"):
@ -187,9 +198,12 @@ def repair_invalid_json(output: str, error: str) -> str:
new_line = line.replace("}", "")
elif line.endswith("},") and output.endswith("},"):
new_line = line[:-1]
elif '",' not in line and "," not in line:
elif (rline[col_no] in ["'", '"']) and (line.startswith('"') or line.startswith("'")) and "," not in line:
# problem, `"""` or `'''` without `,`
new_line = f",{line}"
elif '",' not in line and "," not in line and '"' not in line:
new_line = f'{line}",'
elif "," not in line:
elif not line.endswith(","):
# problem, miss char `,` at the end.
new_line = f"{line},"
elif "," in line and len(line) == 1:

View file

@ -27,7 +27,8 @@ TOKEN_COSTS = {
"gpt-4-0613": {"prompt": 0.06, "completion": 0.12},
"gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03},
"text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
"chatglm_turbo": {"prompt": 0.0, "completion": 0.00069}, # 32k version, prompt + completion tokens=0.005¥/k-tokens
"glm-3-turbo": {"prompt": 0.0, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens
"glm-4": {"prompt": 0.0, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens
"gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
}