Tôi tin rằng một cách tốt hơn để làm điều đó là sử dụng một cỗ máy trạng thái. Dưới đây là mã mẫu mà tôi đã tìm ra bằng cách chuyển đổi mã NodeJS trên liên kết bên dưới sang Python 3 (từ khóa phi địa phương được sử dụng chỉ có sẵn trong Python 3, mã sẽ không hoạt động trên Python 2)
Chỉnh sửa-1: Đã cập nhật và tạo mã tương thích với Python 2
Chỉnh sửa-2: Đã cập nhật và thêm phiên bản chỉ Python3
https://gist.github.com/creationix/5992451
Phiên bản chỉ dành cho Python 3
# A streaming byte oriented JSON parser.  Feed it a single byte at a time and
# it will emit complete objects as it comes across them.  Whitespace within and
# between objects is ignored.  This means it can parse newline delimited JSON.
import math
def json_machine(emit, next_func=None):
    def _value(byte_data):
        if not byte_data:
            return
        if byte_data == 0x09 or byte_data == 0x0a or byte_data == 0x0d or byte_data == 0x20:
            return _value  # Ignore whitespace
        if byte_data == 0x22:  # "
            return string_machine(on_value)
        if byte_data == 0x2d or (0x30 <= byte_data < 0x40):  # - or 0-9
            return number_machine(byte_data, on_number)
        if byte_data == 0x7b:  #:
            return object_machine(on_value)
        if byte_data == 0x5b:  # [
            return array_machine(on_value)
        if byte_data == 0x74:  # t
            return constant_machine(TRUE, True, on_value)
        if byte_data == 0x66:  # f
            return constant_machine(FALSE, False, on_value)
        if byte_data == 0x6e:  # n
            return constant_machine(NULL, None, on_value)
        if next_func == _value:
            raise Exception("Unexpected 0x" + str(byte_data))
        return next_func(byte_data)
    def on_value(value):
        emit(value)
        return next_func
    def on_number(number, byte):
        emit(number)
        return _value(byte)
    next_func = next_func or _value
    return _value
TRUE = [0x72, 0x75, 0x65]
FALSE = [0x61, 0x6c, 0x73, 0x65]
NULL = [0x75, 0x6c, 0x6c]
def constant_machine(bytes_data, value, emit):
    i = 0
    length = len(bytes_data)
    def _constant(byte_data):
        nonlocal i
        if byte_data != bytes_data[i]:
            i += 1
            raise Exception("Unexpected 0x" + str(byte_data))
        i += 1
        if i < length:
            return _constant
        return emit(value)
    return _constant
def string_machine(emit):
    string = ""
    def _string(byte_data):
        nonlocal string
        if byte_data == 0x22:  # "
            return emit(string)
        if byte_data == 0x5c:  # \
            return _escaped_string
        if byte_data & 0x80:  # UTF-8 handling
            return utf8_machine(byte_data, on_char_code)
        if byte_data < 0x20:  # ASCII control character
            raise Exception("Unexpected control character: 0x" + str(byte_data))
        string += chr(byte_data)
        return _string
    def _escaped_string(byte_data):
        nonlocal string
        if byte_data == 0x22 or byte_data == 0x5c or byte_data == 0x2f:  # " \ /
            string += chr(byte_data)
            return _string
        if byte_data == 0x62:  # b
            string += "\b"
            return _string
        if byte_data == 0x66:  # f
            string += "\f"
            return _string
        if byte_data == 0x6e:  # n
            string += "\n"
            return _string
        if byte_data == 0x72:  # r
            string += "\r"
            return _string
        if byte_data == 0x74:  # t
            string += "\t"
            return _string
        if byte_data == 0x75:  # u
            return hex_machine(on_char_code)
    def on_char_code(char_code):
        nonlocal string
        string += chr(char_code)
        return _string
    return _string
# Nestable state machine for UTF-8 Decoding.
def utf8_machine(byte_data, emit):
    left = 0
    num = 0
    def _utf8(byte_data):
        nonlocal num, left
        if (byte_data & 0xc0) != 0x80:
            raise Exception("Invalid byte in UTF-8 character: 0x" + byte_data.toString(16))
        left = left - 1
        num |= (byte_data & 0x3f) << (left * 6)
        if left:
            return _utf8
        return emit(num)
    if 0xc0 <= byte_data < 0xe0:  # 2-byte UTF-8 Character
        left = 1
        num = (byte_data & 0x1f) << 6
        return _utf8
    if 0xe0 <= byte_data < 0xf0:  # 3-byte UTF-8 Character
        left = 2
        num = (byte_data & 0xf) << 12
        return _utf8
    if 0xf0 <= byte_data < 0xf8:  # 4-byte UTF-8 Character
        left = 3
        num = (byte_data & 0x07) << 18
        return _utf8
    raise Exception("Invalid byte in UTF-8 string: 0x" + str(byte_data))
# Nestable state machine for hex escaped characters
def hex_machine(emit):
    left = 4
    num = 0
    def _hex(byte_data):
        nonlocal num, left
        if 0x30 <= byte_data < 0x40:
            i = byte_data - 0x30
        elif 0x61 <= byte_data <= 0x66:
            i = byte_data - 0x57
        elif 0x41 <= byte_data <= 0x46:
            i = byte_data - 0x37
        else:
            raise Exception("Expected hex char in string hex escape")
        left -= 1
        num |= i << (left * 4)
        if left:
            return _hex
        return emit(num)
    return _hex
def number_machine(byte_data, emit):
    sign = 1
    number = 0
    decimal = 0
    esign = 1
    exponent = 0
    def _mid(byte_data):
        if byte_data == 0x2e:  # .
            return _decimal
        return _later(byte_data)
    def _number(byte_data):
        nonlocal number
        if 0x30 <= byte_data < 0x40:
            number = number * 10 + (byte_data - 0x30)
            return _number
        return _mid(byte_data)
    def _start(byte_data):
        if byte_data == 0x30:
            return _mid
        if 0x30 < byte_data < 0x40:
            return _number(byte_data)
        raise Exception("Invalid number: 0x" + str(byte_data))
    if byte_data == 0x2d:  # -
        sign = -1
        return _start
    def _decimal(byte_data):
        nonlocal decimal
        if 0x30 <= byte_data < 0x40:
            decimal = (decimal + byte_data - 0x30) / 10
            return _decimal
        return _later(byte_data)
    def _later(byte_data):
        if byte_data == 0x45 or byte_data == 0x65:  # E e
            return _esign
        return _done(byte_data)
    def _esign(byte_data):
        nonlocal esign
        if byte_data == 0x2b:  # +
            return _exponent
        if byte_data == 0x2d:  # -
            esign = -1
            return _exponent
        return _exponent(byte_data)
    def _exponent(byte_data):
        nonlocal exponent
        if 0x30 <= byte_data < 0x40:
            exponent = exponent * 10 + (byte_data - 0x30)
            return _exponent
        return _done(byte_data)
    def _done(byte_data):
        value = sign * (number + decimal)
        if exponent:
            value *= math.pow(10, esign * exponent)
        return emit(value, byte_data)
    return _start(byte_data)
def array_machine(emit):
    array_data = []
    def _array(byte_data):
        if byte_data == 0x5d:  # ]
            return emit(array_data)
        return json_machine(on_value, _comma)(byte_data)
    def on_value(value):
        array_data.append(value)
    def _comma(byte_data):
        if byte_data == 0x09 or byte_data == 0x0a or byte_data == 0x0d or byte_data == 0x20:
            return _comma  # Ignore whitespace
        if byte_data == 0x2c:  # ,
            return json_machine(on_value, _comma)
        if byte_data == 0x5d:  # ]
            return emit(array_data)
        raise Exception("Unexpected byte: 0x" + str(byte_data) + " in array body")
    return _array
def object_machine(emit):
    object_data = {}
    key = None
    def _object(byte_data):
        if byte_data == 0x7d:  #
            return emit(object_data)
        return _key(byte_data)
    def _key(byte_data):
        if byte_data == 0x09 or byte_data == 0x0a or byte_data == 0x0d or byte_data == 0x20:
            return _object  # Ignore whitespace
        if byte_data == 0x22:
            return string_machine(on_key)
        raise Exception("Unexpected byte: 0x" + str(byte_data))
    def on_key(result):
        nonlocal key
        key = result
        return _colon
    def _colon(byte_data):
        if byte_data == 0x09 or byte_data == 0x0a or byte_data == 0x0d or byte_data == 0x20:
            return _colon  # Ignore whitespace
        if byte_data == 0x3a:  # :
            return json_machine(on_value, _comma)
        raise Exception("Unexpected byte: 0x" + str(byte_data))
    def on_value(value):
        object_data[key] = value
    def _comma(byte_data):
        if byte_data == 0x09 or byte_data == 0x0a or byte_data == 0x0d or byte_data == 0x20:
            return _comma  # Ignore whitespace
        if byte_data == 0x2c:  # ,
            return _key
        if byte_data == 0x7d:  #
            return emit(object_data)
        raise Exception("Unexpected byte: 0x" + str(byte_data))
    return _object
Phiên bản tương thích Python 2
# A streaming byte oriented JSON parser.  Feed it a single byte at a time and
# it will emit complete objects as it comes across them.  Whitespace within and
# between objects is ignored.  This means it can parse newline delimited JSON.
import math
def json_machine(emit, next_func=None):
    def _value(byte_data):
        if not byte_data:
            return
        if byte_data == 0x09 or byte_data == 0x0a or byte_data == 0x0d or byte_data == 0x20:
            return _value  # Ignore whitespace
        if byte_data == 0x22:  # "
            return string_machine(on_value)
        if byte_data == 0x2d or (0x30 <= byte_data < 0x40):  # - or 0-9
            return number_machine(byte_data, on_number)
        if byte_data == 0x7b:  #:
            return object_machine(on_value)
        if byte_data == 0x5b:  # [
            return array_machine(on_value)
        if byte_data == 0x74:  # t
            return constant_machine(TRUE, True, on_value)
        if byte_data == 0x66:  # f
            return constant_machine(FALSE, False, on_value)
        if byte_data == 0x6e:  # n
            return constant_machine(NULL, None, on_value)
        if next_func == _value:
            raise Exception("Unexpected 0x" + str(byte_data))
        return next_func(byte_data)
    def on_value(value):
        emit(value)
        return next_func
    def on_number(number, byte):
        emit(number)
        return _value(byte)
    next_func = next_func or _value
    return _value
TRUE = [0x72, 0x75, 0x65]
FALSE = [0x61, 0x6c, 0x73, 0x65]
NULL = [0x75, 0x6c, 0x6c]
def constant_machine(bytes_data, value, emit):
    local_data = {"i": 0, "length": len(bytes_data)}
    def _constant(byte_data):
        # nonlocal i, length
        if byte_data != bytes_data[local_data["i"]]:
            local_data["i"] += 1
            raise Exception("Unexpected 0x" + byte_data.toString(16))
        local_data["i"] += 1
        if local_data["i"] < local_data["length"]:
            return _constant
        return emit(value)
    return _constant
def string_machine(emit):
    local_data = {"string": ""}
    def _string(byte_data):
        # nonlocal string
        if byte_data == 0x22:  # "
            return emit(local_data["string"])
        if byte_data == 0x5c:  # \
            return _escaped_string
        if byte_data & 0x80:  # UTF-8 handling
            return utf8_machine(byte_data, on_char_code)
        if byte_data < 0x20:  # ASCII control character
            raise Exception("Unexpected control character: 0x" + byte_data.toString(16))
        local_data["string"] += chr(byte_data)
        return _string
    def _escaped_string(byte_data):
        # nonlocal string
        if byte_data == 0x22 or byte_data == 0x5c or byte_data == 0x2f:  # " \ /
            local_data["string"] += chr(byte_data)
            return _string
        if byte_data == 0x62:  # b
            local_data["string"] += "\b"
            return _string
        if byte_data == 0x66:  # f
            local_data["string"] += "\f"
            return _string
        if byte_data == 0x6e:  # n
            local_data["string"] += "\n"
            return _string
        if byte_data == 0x72:  # r
            local_data["string"] += "\r"
            return _string
        if byte_data == 0x74:  # t
            local_data["string"] += "\t"
            return _string
        if byte_data == 0x75:  # u
            return hex_machine(on_char_code)
    def on_char_code(char_code):
        # nonlocal string
        local_data["string"] += chr(char_code)
        return _string
    return _string
# Nestable state machine for UTF-8 Decoding.
def utf8_machine(byte_data, emit):
    local_data = {"left": 0, "num": 0}
    def _utf8(byte_data):
        # nonlocal num, left
        if (byte_data & 0xc0) != 0x80:
            raise Exception("Invalid byte in UTF-8 character: 0x" + byte_data.toString(16))
        local_data["left"] -= 1
        local_data["num"] |= (byte_data & 0x3f) << (local_data["left"] * 6)
        if local_data["left"]:
            return _utf8
        return emit(local_data["num"])
    if 0xc0 <= byte_data < 0xe0:  # 2-byte UTF-8 Character
        local_data["left"] = 1
        local_data["num"] = (byte_data & 0x1f) << 6
        return _utf8
    if 0xe0 <= byte_data < 0xf0:  # 3-byte UTF-8 Character
        local_data["left"] = 2
        local_data["num"] = (byte_data & 0xf) << 12
        return _utf8
    if 0xf0 <= byte_data < 0xf8:  # 4-byte UTF-8 Character
        local_data["left"] = 3
        local_data["num"] = (byte_data & 0x07) << 18
        return _utf8
    raise Exception("Invalid byte in UTF-8 string: 0x" + str(byte_data))
# Nestable state machine for hex escaped characters
def hex_machine(emit):
    local_data = {"left": 4, "num": 0}
    def _hex(byte_data):
        # nonlocal num, left
        i = 0  # Parse the hex byte
        if 0x30 <= byte_data < 0x40:
            i = byte_data - 0x30
        elif 0x61 <= byte_data <= 0x66:
            i = byte_data - 0x57
        elif 0x41 <= byte_data <= 0x46:
            i = byte_data - 0x37
        else:
            raise Exception("Expected hex char in string hex escape")
        local_data["left"] -= 1
        local_data["num"] |= i << (local_data["left"] * 4)
        if local_data["left"]:
            return _hex
        return emit(local_data["num"])
    return _hex
def number_machine(byte_data, emit):
    local_data = {"sign": 1, "number": 0, "decimal": 0, "esign": 1, "exponent": 0}
    def _mid(byte_data):
        if byte_data == 0x2e:  # .
            return _decimal
        return _later(byte_data)
    def _number(byte_data):
        # nonlocal number
        if 0x30 <= byte_data < 0x40:
            local_data["number"] = local_data["number"] * 10 + (byte_data - 0x30)
            return _number
        return _mid(byte_data)
    def _start(byte_data):
        if byte_data == 0x30:
            return _mid
        if 0x30 < byte_data < 0x40:
            return _number(byte_data)
        raise Exception("Invalid number: 0x" + byte_data.toString(16))
    if byte_data == 0x2d:  # -
        local_data["sign"] = -1
        return _start
    def _decimal(byte_data):
        # nonlocal decimal
        if 0x30 <= byte_data < 0x40:
            local_data["decimal"] = (local_data["decimal"] + byte_data - 0x30) / 10
            return _decimal
        return _later(byte_data)
    def _later(byte_data):
        if byte_data == 0x45 or byte_data == 0x65:  # E e
            return _esign
        return _done(byte_data)
    def _esign(byte_data):
        # nonlocal esign
        if byte_data == 0x2b:  # +
            return _exponent
        if byte_data == 0x2d:  # -
            local_data["esign"] = -1
            return _exponent
        return _exponent(byte_data)
    def _exponent(byte_data):
        # nonlocal exponent
        if 0x30 <= byte_data < 0x40:
            local_data["exponent"] = local_data["exponent"] * 10 + (byte_data - 0x30)
            return _exponent
        return _done(byte_data)
    def _done(byte_data):
        value = local_data["sign"] * (local_data["number"] + local_data["decimal"])
        if local_data["exponent"]:
            value *= math.pow(10, local_data["esign"] * local_data["exponent"])
        return emit(value, byte_data)
    return _start(byte_data)
def array_machine(emit):
    local_data = {"array_data": []}
    def _array(byte_data):
        if byte_data == 0x5d:  # ]
            return emit(local_data["array_data"])
        return json_machine(on_value, _comma)(byte_data)
    def on_value(value):
        # nonlocal array_data
        local_data["array_data"].append(value)
    def _comma(byte_data):
        if byte_data == 0x09 or byte_data == 0x0a or byte_data == 0x0d or byte_data == 0x20:
            return _comma  # Ignore whitespace
        if byte_data == 0x2c:  # ,
            return json_machine(on_value, _comma)
        if byte_data == 0x5d:  # ]
            return emit(local_data["array_data"])
        raise Exception("Unexpected byte: 0x" + str(byte_data) + " in array body")
    return _array
def object_machine(emit):
    local_data = {"object_data": {}, "key": ""}
    def _object(byte_data):
        # nonlocal object_data, key
        if byte_data == 0x7d:  #
            return emit(local_data["object_data"])
        return _key(byte_data)
    def _key(byte_data):
        if byte_data == 0x09 or byte_data == 0x0a or byte_data == 0x0d or byte_data == 0x20:
            return _object  # Ignore whitespace
        if byte_data == 0x22:
            return string_machine(on_key)
        raise Exception("Unexpected byte: 0x" + byte_data.toString(16))
    def on_key(result):
        # nonlocal object_data, key
        local_data["key"] = result
        return _colon
    def _colon(byte_data):
        # nonlocal object_data, key
        if byte_data == 0x09 or byte_data == 0x0a or byte_data == 0x0d or byte_data == 0x20:
            return _colon  # Ignore whitespace
        if byte_data == 0x3a:  # :
            return json_machine(on_value, _comma)
        raise Exception("Unexpected byte: 0x" + str(byte_data))
    def on_value(value):
        # nonlocal object_data, key
        local_data["object_data"][local_data["key"]] = value
    def _comma(byte_data):
        # nonlocal object_data
        if byte_data == 0x09 or byte_data == 0x0a or byte_data == 0x0d or byte_data == 0x20:
            return _comma  # Ignore whitespace
        if byte_data == 0x2c:  # ,
            return _key
        if byte_data == 0x7d:  #
            return emit(local_data["object_data"])
        raise Exception("Unexpected byte: 0x" + str(byte_data))
    return _object
Kiểm tra nó
if __name__ == "__main__":
    test_json = """[1,2,"3"] {"name": 
    "tarun"} 1 2 
    3 [{"name":"a", 
    "data": [1,
    null,2]}]
"""
    def found_json(data):
        print(data)
    state = json_machine(found_json)
    for char in test_json:
        state = state(ord(char))
Đầu ra của cùng là
[1, 2, '3']
{'name': 'tarun'}
1
2
3
[{'name': 'a', 'data': [1, None, 2]}]