changeset: 103144:231f578dfd3d user: Serhiy Storchaka date: Tue Sep 06 23:55:11 2016 +0300 files: Lib/test/test_pickle.py Misc/NEWS Modules/_pickle.c description: Issue #25761: Improved error reporting about truncated pickle data in C implementation of unpickler. UnpicklingError is now raised instead of AttributeError and ValueError in some cases. diff -r b10b14fc4975 -r 231f578dfd3d Lib/test/test_pickle.py --- a/Lib/test/test_pickle.py Tue Sep 06 13:53:14 2016 -0700 +++ b/Lib/test/test_pickle.py Tue Sep 06 23:55:11 2016 +0300 @@ -139,8 +139,7 @@ class CUnpicklerTests(PyUnpicklerTests): unpickler = _pickle.Unpickler bad_stack_errors = (pickle.UnpicklingError,) - truncated_errors = (pickle.UnpicklingError, EOFError, - AttributeError, ValueError) + truncated_errors = (pickle.UnpicklingError,) class CPicklerTests(PyPicklerTests): pickler = _pickle.Pickler diff -r b10b14fc4975 -r 231f578dfd3d Misc/NEWS --- a/Misc/NEWS Tue Sep 06 13:53:14 2016 -0700 +++ b/Misc/NEWS Tue Sep 06 23:55:11 2016 +0300 @@ -89,6 +89,10 @@ Library ------- +- Issue #25761: Improved error reporting about truncated pickle data in + C implementation of unpickler. UnpicklingError is now raised instead of + AttributeError and ValueError in some cases. + - Issue #26798: Add BLAKE2 (blake2b and blake2s) to hashlib. - Issue #25596: Optimized glob() and iglob() functions in the diff -r b10b14fc4975 -r 231f578dfd3d Modules/_pickle.c --- a/Modules/_pickle.c Tue Sep 06 13:53:14 2016 -0700 +++ b/Modules/_pickle.c Tue Sep 06 23:55:11 2016 +0300 @@ -1092,6 +1092,14 @@ } static int +bad_readline(void) +{ + PickleState *st = _Pickle_GetGlobalState(); + PyErr_SetString(st->UnpicklingError, "pickle data was truncated"); + return -1; +} + +static int _Unpickler_SkipConsumed(UnpicklerObject *self) { Py_ssize_t consumed; @@ -1195,17 +1203,14 @@ /* This case is handled by the _Unpickler_Read() macro for efficiency */ assert(self->next_read_idx + n > self->input_len); - if (!self->read) { - PyErr_Format(PyExc_EOFError, "Ran out of input"); - return -1; - } + if (!self->read) + return bad_readline(); + num_read = _Unpickler_ReadFromFile(self, n); if (num_read < 0) return -1; - if (num_read < n) { - PyErr_Format(PyExc_EOFError, "Ran out of input"); - return -1; - } + if (num_read < n) + return bad_readline(); *s = self->input_buffer; self->next_read_idx = n; return n; @@ -1249,7 +1254,7 @@ } /* Read a line from the input stream/buffer. If we run off the end of the input - before hitting \n, return the data we found. + before hitting \n, raise an error. Returns the number of chars read, or -1 on failure. */ static Py_ssize_t @@ -1265,20 +1270,16 @@ return _Unpickler_CopyLine(self, line_start, num_read, result); } } - if (self->read) { - num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE); - if (num_read < 0) - return -1; - self->next_read_idx = num_read; - return _Unpickler_CopyLine(self, self->input_buffer, num_read, result); - } - - /* If we get here, we've run off the end of the input string. Return the - remaining string and let the caller figure it out. */ - *result = self->input_buffer + self->next_read_idx; - num_read = i - self->next_read_idx; - self->next_read_idx = i; - return num_read; + if (!self->read) + return bad_readline(); + + num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE); + if (num_read < 0) + return -1; + if (num_read == 0 || self->input_buffer[num_read - 1] != '\n') + return bad_readline(); + self->next_read_idx = num_read; + return _Unpickler_CopyLine(self, self->input_buffer, num_read, result); } /* Returns -1 (with an exception set) on failure, 0 on success. The memo array @@ -4600,14 +4601,6 @@ } static int -bad_readline(void) -{ - PickleState *st = _Pickle_GetGlobalState(); - PyErr_SetString(st->UnpicklingError, "pickle data was truncated"); - return -1; -} - -static int load_int(UnpicklerObject *self) { PyObject *value; @@ -6245,8 +6238,13 @@ case opcode: if (load_func(self, (arg)) < 0) break; continue; while (1) { - if (_Unpickler_Read(self, &s, 1) < 0) - break; + if (_Unpickler_Read(self, &s, 1) < 0) { + PickleState *st = _Pickle_GetGlobalState(); + if (PyErr_ExceptionMatches(st->UnpicklingError)) { + PyErr_Format(PyExc_EOFError, "Ran out of input"); + } + return NULL; + } switch ((enum opcode)s[0]) { OP(NONE, load_none) @@ -6318,15 +6316,19 @@ break; default: - if (s[0] == '\0') { - PyErr_SetNone(PyExc_EOFError); - } - else { + { PickleState *st = _Pickle_GetGlobalState(); - PyErr_Format(st->UnpicklingError, - "invalid load key, '%c'.", s[0]); + unsigned char c = (unsigned char) *s; + if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') { + PyErr_Format(st->UnpicklingError, + "invalid load key, '%c'.", c); + } + else { + PyErr_Format(st->UnpicklingError, + "invalid load key, '\\x%02x'.", c); + } + return NULL; } - return NULL; } break; /* and we are done! */