This commit is contained in:
femto 2023-09-08 12:12:33 +08:00
parent e6e72c6e23
commit 6755de0ee8
13 changed files with 290 additions and 573 deletions

View file

@ -1,10 +1,11 @@
import json
import re
from json import scanner, JSONDecodeError
from json import JSONDecodeError
from json.decoder import _decode_uXXXX
NUMBER_RE = re.compile(
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
(re.VERBOSE | re.MULTILINE | re.DOTALL))
NUMBER_RE = re.compile(r"(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?", (re.VERBOSE | re.MULTILINE | re.DOTALL))
def py_make_scanner(context):
parse_object = context.parse_object
parse_array = context.parse_array
@ -26,32 +27,31 @@ def py_make_scanner(context):
if nextchar == '"' or nextchar == "'":
return parse_string(string, idx + 1, strict, delimiter=nextchar)
elif nextchar == '{':
return parse_object((string, idx + 1), strict,
_scan_once, object_hook, object_pairs_hook, memo)
elif nextchar == '[':
elif nextchar == "{":
return parse_object((string, idx + 1), strict, _scan_once, object_hook, object_pairs_hook, memo)
elif nextchar == "[":
return parse_array((string, idx + 1), _scan_once)
elif nextchar == 'n' and string[idx:idx + 4] == 'null':
elif nextchar == "n" and string[idx : idx + 4] == "null":
return None, idx + 4
elif nextchar == 't' and string[idx:idx + 4] == 'true':
elif nextchar == "t" and string[idx : idx + 4] == "true":
return True, idx + 4
elif nextchar == 'f' and string[idx:idx + 5] == 'false':
elif nextchar == "f" and string[idx : idx + 5] == "false":
return False, idx + 5
m = match_number(string, idx)
if m is not None:
integer, frac, exp = m.groups()
if frac or exp:
res = parse_float(integer + (frac or '') + (exp or ''))
res = parse_float(integer + (frac or "") + (exp or ""))
else:
res = parse_int(integer)
return res, m.end()
elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
return parse_constant('NaN'), idx + 3
elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
return parse_constant('Infinity'), idx + 8
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9
elif nextchar == "N" and string[idx : idx + 3] == "NaN":
return parse_constant("NaN"), idx + 3
elif nextchar == "I" and string[idx : idx + 8] == "Infinity":
return parse_constant("Infinity"), idx + 8
elif nextchar == "-" and string[idx : idx + 9] == "-Infinity":
return parse_constant("-Infinity"), idx + 9
else:
raise StopIteration(idx)
@ -62,17 +62,28 @@ def py_make_scanner(context):
memo.clear()
return scan_once
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
STRINGCHUNK_SINGLEQUOTE = re.compile(r'(.*?)([\'\\\x00-\x1f])', FLAGS)
STRINGCHUNK_SINGLEQUOTE = re.compile(r"(.*?)([\'\\\x00-\x1f])", FLAGS)
BACKSLASH = {
'"': '"', '\\': '\\', '/': '/',
'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
'"': '"',
"\\": "\\",
"/": "/",
"b": "\b",
"f": "\f",
"n": "\n",
"r": "\r",
"t": "\t",
}
WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
WHITESPACE_STR = ' \t\n\r'
def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
WHITESPACE = re.compile(r"[ \t\n\r]*", FLAGS)
WHITESPACE_STR = " \t\n\r"
def JSONObject(
s_and_end, strict, scan_once, object_hook, object_pairs_hook, memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR
):
s, end = s_and_end
pairs = []
pairs_append = pairs.append
@ -82,14 +93,14 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
memo_get = memo.setdefault
# Use a slice to prevent IndexError from being raised, the following
# check will raise a more specific ValueError if the string is empty
nextchar = s[end:end + 1]
nextchar = s[end : end + 1]
# Normally we expect nextchar == '"'
if nextchar != '"' and nextchar != "'":
if nextchar in _ws:
end = _w(s, end).end()
nextchar = s[end:end + 1]
nextchar = s[end : end + 1]
# Trivial empty object
if nextchar == '}':
if nextchar == "}":
if object_pairs_hook is not None:
result = object_pairs_hook(pairs)
return result, end + 1
@ -98,17 +109,16 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
pairs = object_hook(pairs)
return pairs, end + 1
elif nextchar != '"':
raise JSONDecodeError(
"Expecting property name enclosed in double quotes", s, end)
raise JSONDecodeError("Expecting property name enclosed in double quotes", s, end)
end += 1
while True:
key, end = scanstring(s, end, strict, delimiter=nextchar)
key = memo_get(key, key)
# To skip some function call overhead we optimize the fast paths where
# the JSON key separator is ": " or just ":".
if s[end:end + 1] != ':':
if s[end : end + 1] != ":":
end = _w(s, end).end()
if s[end:end + 1] != ':':
if s[end : end + 1] != ":":
raise JSONDecodeError("Expecting ':' delimiter", s, end)
end += 1
@ -131,19 +141,18 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
end = _w(s, end + 1).end()
nextchar = s[end]
except IndexError:
nextchar = ''
nextchar = ""
end += 1
if nextchar == '}':
if nextchar == "}":
break
elif nextchar != ',':
elif nextchar != ",":
raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
end = _w(s, end).end()
nextchar = s[end:end + 1]
nextchar = s[end : end + 1]
end += 1
if nextchar != '"':
raise JSONDecodeError(
"Expecting property name enclosed in double quotes", s, end - 1)
raise JSONDecodeError("Expecting property name enclosed in double quotes", s, end - 1)
if object_pairs_hook is not None:
result = object_pairs_hook(pairs)
return result, end
@ -151,8 +160,9 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
if object_hook is not None:
pairs = object_hook(pairs)
return pairs, end
def py_scanstring(s, end, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match,delimiter='"'):
def py_scanstring(s, end, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match, delimiter='"'):
"""Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string.
Unescapes all valid JSON string escape sequences and raises ValueError
@ -181,9 +191,9 @@ def py_scanstring(s, end, strict=True,
# or a backslash denoting that an escape sequence follows
if terminator == delimiter:
break
elif terminator != '\\':
elif terminator != "\\":
if strict:
#msg = "Invalid control character %r at" % (terminator,)
# msg = "Invalid control character %r at" % (terminator,)
msg = "Invalid control character {0!r} at".format(terminator)
raise JSONDecodeError(msg, s, end)
else:
@ -192,10 +202,9 @@ def py_scanstring(s, end, strict=True,
try:
esc = s[end]
except IndexError:
raise JSONDecodeError("Unterminated string starting at",
s, begin) from None
raise JSONDecodeError("Unterminated string starting at", s, begin) from None
# If not a unicode escape sequence, must be in the lookup table
if esc != 'u':
if esc != "u":
try:
char = _b[esc]
except KeyError:
@ -205,26 +214,41 @@ def py_scanstring(s, end, strict=True,
else:
uni = _decode_uXXXX(s, end)
end += 5
if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
if 0xD800 <= uni <= 0xDBFF and s[end : end + 2] == "\\u":
uni2 = _decode_uXXXX(s, end + 1)
if 0xdc00 <= uni2 <= 0xdfff:
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
if 0xDC00 <= uni2 <= 0xDFFF:
uni = 0x10000 + (((uni - 0xD800) << 10) | (uni2 - 0xDC00))
end += 6
char = chr(uni)
_append(char)
return ''.join(chunks), end
return "".join(chunks), end
scanstring = py_scanstring
class CustomDecoder(json.JSONDecoder):
def __init__(self, *, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, strict=True,
object_pairs_hook=None):
super().__init__(object_hook=object_hook, parse_float=parse_float, parse_int=parse_int,
parse_constant=parse_constant, strict=strict,object_pairs_hook=object_pairs_hook)
def __init__(
self,
*,
object_hook=None,
parse_float=None,
parse_int=None,
parse_constant=None,
strict=True,
object_pairs_hook=None
):
super().__init__(
object_hook=object_hook,
parse_float=parse_float,
parse_int=parse_int,
parse_constant=parse_constant,
strict=strict,
object_pairs_hook=object_pairs_hook,
)
self.parse_object = JSONObject
self.parse_string = py_scanstring
self.scan_once = py_make_scanner(self)
def decode(self, s, _w=json.decoder.WHITESPACE.match):
return super().decode(s)