changeset: 101443:f9b85b47f9c8 user: Victor Stinner date: Fri May 20 11:42:37 2016 +0200 files: Doc/whatsnew/3.6.rst Misc/NEWS Modules/_pickle.c description: Optimize pickle.load() and pickle.loads() Issue #27056: Optimize pickle.load() and pickle.loads(), up to 10% faster to deserialize a lot of small objects. diff -r 779563dd701c -r f9b85b47f9c8 Doc/whatsnew/3.6.rst --- a/Doc/whatsnew/3.6.rst Fri May 20 11:36:13 2016 +0200 +++ b/Doc/whatsnew/3.6.rst Fri May 20 11:42:37 2016 +0200 @@ -467,6 +467,9 @@ with a short lifetime, and use :c:func:`malloc` for larger memory blocks. (Contributed by Victor Stinner in :issue:`26249`). +* :func:`pickle.load` and :func:`pickle.loads` are now up to 10% faster when + deserializing many small objects (Contributed by Victor Stinner in + :issue:`27056`). Build and C API Changes ======================= diff -r 779563dd701c -r f9b85b47f9c8 Misc/NEWS --- a/Misc/NEWS Fri May 20 11:36:13 2016 +0200 +++ b/Misc/NEWS Fri May 20 11:42:37 2016 +0200 @@ -16,6 +16,9 @@ Library ------- +- Issue #27056: Optimize pickle.load() and pickle.loads(), up to 10% faster + to deserialize a lot of small objects. + What's New in Python 3.6.0 alpha 1? =================================== @@ -341,7 +344,7 @@ - Issue #26977: Removed unnecessary, and ignored, call to sum of squares helper in statistics.pvariance. -- Issue #26002: Use bisect in statistics.median instead of a linear search. +- Issue #26002: Use bisect in statistics.median instead of a linear search. Patch by Upendra Kuma. - Issue #25974: Make use of new Decimal.as_integer_ratio() method in statistics diff -r 779563dd701c -r f9b85b47f9c8 Modules/_pickle.c --- a/Modules/_pickle.c Fri May 20 11:36:13 2016 +0200 +++ b/Modules/_pickle.c Fri May 20 11:42:37 2016 +0200 @@ -1197,21 +1197,9 @@ return read_size; } -/* Read `n` bytes from the unpickler's data source, storing the result in `*s`. - - This should be used for all data reads, rather than accessing the unpickler's - input buffer directly. This method deals correctly with reading from input - streams, which the input buffer doesn't deal with. - - Note that when reading from a file-like object, self->next_read_idx won't - be updated (it should remain at 0 for the entire unpickling process). You - should use this function's return value to know how many bytes you can - consume. - - Returns -1 (with an exception set) on failure. On success, return the - number of chars read. */ +/* Don't call it directly: use _Unpickler_Read() */ static Py_ssize_t -_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n) +_Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n) { Py_ssize_t num_read; @@ -1222,11 +1210,10 @@ "read would overflow (invalid bytecode)"); return -1; } - if (self->next_read_idx + n <= self->input_len) { - *s = self->input_buffer + self->next_read_idx; - self->next_read_idx += n; - return n; - } + + /* This case is handled by the _Unpickler_Read() macro for efficiency */ + assert(self->next_read_idx + n > self->input_len); + if (!self->read) { PyErr_Format(PyExc_EOFError, "Ran out of input"); return -1; @@ -1243,6 +1230,26 @@ return n; } +/* Read `n` bytes from the unpickler's data source, storing the result in `*s`. + + This should be used for all data reads, rather than accessing the unpickler's + input buffer directly. This method deals correctly with reading from input + streams, which the input buffer doesn't deal with. + + Note that when reading from a file-like object, self->next_read_idx won't + be updated (it should remain at 0 for the entire unpickling process). You + should use this function's return value to know how many bytes you can + consume. + + Returns -1 (with an exception set) on failure. On success, return the + number of chars read. */ +#define _Unpickler_Read(self, s, n) \ + (((self)->next_read_idx + (n) <= (self)->input_len) \ + ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \ + (self)->next_read_idx += (n), \ + (n)) \ + : _Unpickler_ReadImpl(self, (s), (n))) + static Py_ssize_t _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len, char **result)