diff options
Diffstat (limited to 'lib-python/3/pickletools.py')
-rw-r--r-- | lib-python/3/pickletools.py | 148 |
1 files changed, 114 insertions, 34 deletions
diff --git a/lib-python/3/pickletools.py b/lib-python/3/pickletools.py index ec6cc53ae3..612fa8f27e 100644 --- a/lib-python/3/pickletools.py +++ b/lib-python/3/pickletools.py @@ -13,6 +13,7 @@ dis(pickle, out=None, memo=None, indentlevel=4) import codecs import pickle import re +import sys __all__ = ['dis', 'genops', 'optimize'] @@ -165,8 +166,9 @@ UP_TO_NEWLINE = -1 # Represents the number of bytes consumed by a two-argument opcode where # the first argument gives the number of bytes in the second argument. -TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int -TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int +TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int +TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int +TAKEN_FROM_ARGUMENT4U = -4 # num bytes is 4-byte unsigned little-endian int class ArgumentDescriptor(object): __slots__ = ( @@ -194,7 +196,8 @@ class ArgumentDescriptor(object): assert isinstance(n, int) and (n >= 0 or n in (UP_TO_NEWLINE, TAKEN_FROM_ARGUMENT1, - TAKEN_FROM_ARGUMENT4)) + TAKEN_FROM_ARGUMENT4, + TAKEN_FROM_ARGUMENT4U)) self.n = n self.reader = reader @@ -265,6 +268,27 @@ int4 = ArgumentDescriptor( doc="Four-byte signed integer, little-endian, 2's complement.") +def read_uint4(f): + r""" + >>> import io + >>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00')) + 255 + >>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31 + True + """ + + data = f.read(4) + if len(data) == 4: + return _unpack("<I", data)[0] + raise ValueError("not enough data in stream to read uint4") + +uint4 = ArgumentDescriptor( + name='uint4', + n=4, + reader=read_uint4, + doc="Four-byte unsigned integer, little-endian.") + + def read_stringnl(f, decode=True, stripquotes=True): r""" >>> import io @@ -421,6 +445,67 @@ string1 = ArgumentDescriptor( """) +def read_bytes1(f): + r""" + >>> import io + >>> read_bytes1(io.BytesIO(b"\x00")) + b'' + >>> read_bytes1(io.BytesIO(b"\x03abcdef")) + b'abc' + """ + + n = read_uint1(f) + assert n >= 0 + data = f.read(n) + if len(data) == n: + return data + raise ValueError("expected %d bytes in a bytes1, but only %d remain" % + (n, len(data))) + +bytes1 = ArgumentDescriptor( + name="bytes1", + n=TAKEN_FROM_ARGUMENT1, + reader=read_bytes1, + doc="""A counted bytes string. + + The first argument is a 1-byte unsigned int giving the number + of bytes, and the second argument is that many bytes. + """) + + +def read_bytes4(f): + r""" + >>> import io + >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc")) + b'' + >>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef")) + b'abc' + >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef")) + Traceback (most recent call last): + ... + ValueError: expected 50331648 bytes in a bytes4, but only 6 remain + """ + + n = read_uint4(f) + if n > sys.maxsize: + raise ValueError("bytes4 byte count > sys.maxsize: %d" % n) + data = f.read(n) + if len(data) == n: + return data + raise ValueError("expected %d bytes in a bytes4, but only %d remain" % + (n, len(data))) + +bytes4 = ArgumentDescriptor( + name="bytes4", + n=TAKEN_FROM_ARGUMENT4U, + reader=read_bytes4, + doc="""A counted bytes string. + + The first argument is a 4-byte little-endian unsigned int giving + the number of bytes, and the second argument is that many bytes. + """) + + def read_unicodestringnl(f): r""" >>> import io @@ -464,9 +549,9 @@ def read_unicodestring4(f): ValueError: expected 7 bytes in a unicodestring4, but only 6 remain """ - n = read_int4(f) - if n < 0: - raise ValueError("unicodestring4 byte count < 0: %d" % n) + n = read_uint4(f) + if n > sys.maxsize: + raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n) data = f.read(n) if len(data) == n: return str(data, 'utf-8', 'surrogatepass') @@ -475,7 +560,7 @@ def read_unicodestring4(f): unicodestring4 = ArgumentDescriptor( name="unicodestring4", - n=TAKEN_FROM_ARGUMENT4, + n=TAKEN_FROM_ARGUMENT4U, reader=read_unicodestring4, doc="""A counted Unicode string. @@ -495,25 +580,18 @@ def read_decimalnl_short(f): >>> read_decimalnl_short(io.BytesIO(b"1234L\n56")) Traceback (most recent call last): ... - ValueError: trailing 'L' not allowed in b'1234L' + ValueError: invalid literal for int() with base 10: b'1234L' """ s = read_stringnl(f, decode=False, stripquotes=False) - if s.endswith(b"L"): - raise ValueError("trailing 'L' not allowed in %r" % s) - # It's not necessarily true that the result fits in a Python short int: - # the pickle may have been written on a 64-bit box. There's also a hack - # for True and False here. + # There's a hack for True and False here. if s == b"00": return False elif s == b"01": return True - try: - return int(s) - except OverflowError: - return int(s) + return int(s) def read_decimalnl_long(f): r""" @@ -806,7 +884,7 @@ stackslice = StackObject( obtype=StackObject, doc="""An object representing a contiguous slice of the stack. - This is used in conjuction with markobject, to represent all + This is used in conjunction with markobject, to represent all of the stack following the topmost markobject. For example, the POP_MARK opcode changes the stack from @@ -875,7 +953,7 @@ class OpcodeInfo(object): assert isinstance(x, StackObject) self.stack_after = stack_after - assert isinstance(proto, int) and 0 <= proto <= 3 + assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL self.proto = proto assert isinstance(doc, str) @@ -1041,28 +1119,28 @@ opcodes = [ I(name='BINBYTES', code='B', - arg=string4, + arg=bytes4, stack_before=[], stack_after=[pybytes], proto=3, doc="""Push a Python bytes object. - There are two arguments: the first is a 4-byte little-endian signed int - giving the number of bytes in the string, and the second is that many - bytes, which are taken literally as the bytes content. + There are two arguments: the first is a 4-byte little-endian unsigned int + giving the number of bytes, and the second is that many bytes, which are + taken literally as the bytes content. """), I(name='SHORT_BINBYTES', code='C', - arg=string1, + arg=bytes1, stack_before=[], stack_after=[pybytes], proto=3, - doc="""Push a Python string object. + doc="""Push a Python bytes object. There are two arguments: the first is a 1-byte unsigned int giving - the number of bytes in the string, and the second is that many bytes, - which are taken literally as the string content. + the number of bytes, and the second is that many bytes, which are taken + literally as the string content. """), # Ways to spell None. @@ -1121,7 +1199,7 @@ opcodes = [ proto=1, doc="""Push a Python Unicode string object. - There are two arguments: the first is a 4-byte little-endian signed int + There are two arguments: the first is a 4-byte little-endian unsigned int giving the number of bytes in the string. The second is that many bytes, and is the UTF-8 encoding of the Unicode string. """), @@ -1425,13 +1503,13 @@ opcodes = [ I(name='LONG_BINGET', code='j', - arg=int4, + arg=uint4, stack_before=[], stack_after=[anyobject], proto=1, doc="""Read an object from the memo and push it on the stack. - The index of the memo object to push is given by the 4-byte signed + The index of the memo object to push is given by the 4-byte unsigned little-endian integer following. """), @@ -1462,14 +1540,14 @@ opcodes = [ I(name='LONG_BINPUT', code='r', - arg=int4, + arg=uint4, stack_before=[], stack_after=[], proto=1, doc="""Store the stack top into the memo. The stack is not popped. The index of the memo location to write into is given by the 4-byte - signed little-endian integer following. + unsigned little-endian integer following. """), # Access the extension registry (predefined objects). Akin to the GET @@ -1642,6 +1720,8 @@ opcodes = [ is pushed on the stack. NOTE: checks for __safe_for_unpickling__ went away in Python 2.3. + NOTE: the distinction between old-style and new-style classes does + not make sense in Python 3. """), I(name='OBJ', @@ -1954,12 +2034,12 @@ def dis(pickle, out=None, memo=None, indentlevel=4, annotate=0): stack = [] # crude emulation of unpickler stack if memo is None: - memo = {} # crude emulation of unpicker memo + memo = {} # crude emulation of unpickler memo maxproto = -1 # max protocol number seen markstack = [] # bytecode positions of MARK opcodes indentchunk = ' ' * indentlevel errormsg = None - annocol = annotate # columnt hint for annotations + annocol = annotate # column hint for annotations for opcode, arg, pos in genops(pickle): if pos is not None: print("%5d:" % pos, end=' ', file=out) |