changeset: 95316:8d86dfe53b97 branch: 3.4 parent: 95309:28b778b0a132 user: Serhiy Storchaka date: Tue Mar 31 13:12:37 2015 +0300 files: Lib/_compat_pickle.py Lib/pickle.py Lib/test/pickletester.py Lib/test/test_pickle.py Misc/NEWS Modules/_pickle.c description: Issue #18473: Fixed 2to3 and 3to2 compatible pickle mappings. Fixed ambigious reverse mappings. Added many new mappings. Import mapping is no longer applied to modules already mapped with full name mapping. Added tests for compatible pickling and unpickling and for consistency of _compat_pickle mappings. diff -r 28b778b0a132 -r 8d86dfe53b97 Lib/_compat_pickle.py --- a/Lib/_compat_pickle.py Mon Mar 30 20:29:28 2015 -0700 +++ b/Lib/_compat_pickle.py Tue Mar 31 13:12:37 2015 +0300 @@ -6,18 +6,13 @@ # lib2to3 and use the mapping defined there, because lib2to3 uses pickle. # Thus, this could cause the module to be imported recursively. IMPORT_MAPPING = { - 'StringIO': 'io', - 'cStringIO': 'io', - 'cPickle': 'pickle', '__builtin__' : 'builtins', 'copy_reg': 'copyreg', 'Queue': 'queue', 'SocketServer': 'socketserver', 'ConfigParser': 'configparser', 'repr': 'reprlib', - 'FileDialog': 'tkinter.filedialog', 'tkFileDialog': 'tkinter.filedialog', - 'SimpleDialog': 'tkinter.simpledialog', 'tkSimpleDialog': 'tkinter.simpledialog', 'tkColorChooser': 'tkinter.colorchooser', 'tkCommonDialog': 'tkinter.commondialog', @@ -39,7 +34,6 @@ 'dbm': 'dbm.ndbm', 'gdbm': 'dbm.gnu', 'xmlrpclib': 'xmlrpc.client', - 'DocXMLRPCServer': 'xmlrpc.server', 'SimpleXMLRPCServer': 'xmlrpc.server', 'httplib': 'http.client', 'htmlentitydefs' : 'html.entities', @@ -47,16 +41,13 @@ 'Cookie': 'http.cookies', 'cookielib': 'http.cookiejar', 'BaseHTTPServer': 'http.server', - 'SimpleHTTPServer': 'http.server', - 'CGIHTTPServer': 'http.server', 'test.test_support': 'test.support', 'commands': 'subprocess', - 'UserString' : 'collections', - 'UserList' : 'collections', 'urlparse' : 'urllib.parse', 'robotparser' : 'urllib.robotparser', - 'whichdb': 'dbm', - 'anydbm': 'dbm' + 'urllib2': 'urllib.request', + 'anydbm': 'dbm', + '_abcoll' : 'collections.abc', } @@ -68,12 +59,35 @@ ('__builtin__', 'reduce'): ('functools', 'reduce'), ('__builtin__', 'intern'): ('sys', 'intern'), ('__builtin__', 'unichr'): ('builtins', 'chr'), - ('__builtin__', 'basestring'): ('builtins', 'str'), + ('__builtin__', 'unicode'): ('builtins', 'str'), ('__builtin__', 'long'): ('builtins', 'int'), ('itertools', 'izip'): ('builtins', 'zip'), ('itertools', 'imap'): ('builtins', 'map'), ('itertools', 'ifilter'): ('builtins', 'filter'), ('itertools', 'ifilterfalse'): ('itertools', 'filterfalse'), + ('itertools', 'izip_longest'): ('itertools', 'zip_longest'), + ('UserDict', 'IterableUserDict'): ('collections', 'UserDict'), + ('UserList', 'UserList'): ('collections', 'UserList'), + ('UserString', 'UserString'): ('collections', 'UserString'), + ('whichdb', 'whichdb'): ('dbm', 'whichdb'), + ('_socket', 'fromfd'): ('socket', 'fromfd'), + ('_multiprocessing', 'Connection'): ('multiprocessing.connection', 'Connection'), + ('multiprocessing.process', 'Process'): ('multiprocessing.context', 'Process'), + ('multiprocessing.forking', 'Popen'): ('multiprocessing.popen_fork', 'Popen'), + ('urllib', 'ContentTooShortError'): ('urllib.error', 'ContentTooShortError'), + ('urllib', 'getproxies'): ('urllib.request', 'getproxies'), + ('urllib', 'pathname2url'): ('urllib.request', 'pathname2url'), + ('urllib', 'quote_plus'): ('urllib.parse', 'quote_plus'), + ('urllib', 'quote'): ('urllib.parse', 'quote'), + ('urllib', 'unquote_plus'): ('urllib.parse', 'unquote_plus'), + ('urllib', 'unquote'): ('urllib.parse', 'unquote'), + ('urllib', 'url2pathname'): ('urllib.request', 'url2pathname'), + ('urllib', 'urlcleanup'): ('urllib.request', 'urlcleanup'), + ('urllib', 'urlencode'): ('urllib.parse', 'urlencode'), + ('urllib', 'urlopen'): ('urllib.request', 'urlopen'), + ('urllib', 'urlretrieve'): ('urllib.request', 'urlretrieve'), + ('urllib2', 'HTTPError'): ('urllib.error', 'HTTPError'), + ('urllib2', 'URLError'): ('urllib.error', 'URLError'), } PYTHON2_EXCEPTIONS = ( @@ -130,8 +144,87 @@ for excname in PYTHON2_EXCEPTIONS: NAME_MAPPING[("exceptions", excname)] = ("builtins", excname) -NAME_MAPPING[("exceptions", "StandardError")] = ("builtins", "Exception") +MULTIPROCESSING_EXCEPTIONS = ( + 'AuthenticationError', + 'BufferTooShort', + 'ProcessError', + 'TimeoutError', +) + +for excname in MULTIPROCESSING_EXCEPTIONS: + NAME_MAPPING[("multiprocessing", excname)] = ("multiprocessing.context", excname) # Same, but for 3.x to 2.x REVERSE_IMPORT_MAPPING = dict((v, k) for (k, v) in IMPORT_MAPPING.items()) +assert len(REVERSE_IMPORT_MAPPING) == len(IMPORT_MAPPING) REVERSE_NAME_MAPPING = dict((v, k) for (k, v) in NAME_MAPPING.items()) +assert len(REVERSE_NAME_MAPPING) == len(NAME_MAPPING) + +# Non-mutual mappings. + +IMPORT_MAPPING.update({ + 'cPickle': 'pickle', + '_elementtree': 'xml.etree.ElementTree', + 'FileDialog': 'tkinter.filedialog', + 'SimpleDialog': 'tkinter.simpledialog', + 'DocXMLRPCServer': 'xmlrpc.server', + 'SimpleHTTPServer': 'http.server', + 'CGIHTTPServer': 'http.server', +}) + +REVERSE_IMPORT_MAPPING.update({ + '_bz2': 'bz2', + '_dbm': 'dbm', + '_functools': 'functools', + '_gdbm': 'gdbm', + '_pickle': 'pickle', +}) + +NAME_MAPPING.update({ + ('__builtin__', 'basestring'): ('builtins', 'str'), + ('exceptions', 'StandardError'): ('builtins', 'Exception'), + ('UserDict', 'UserDict'): ('collections', 'UserDict'), + ('socket', '_socketobject'): ('socket', 'SocketType'), +}) + +REVERSE_NAME_MAPPING.update({ + ('_functools', 'reduce'): ('__builtin__', 'reduce'), + ('tkinter.filedialog', 'FileDialog'): ('FileDialog', 'FileDialog'), + ('tkinter.filedialog', 'LoadFileDialog'): ('FileDialog', 'LoadFileDialog'), + ('tkinter.filedialog', 'SaveFileDialog'): ('FileDialog', 'SaveFileDialog'), + ('tkinter.simpledialog', 'SimpleDialog'): ('SimpleDialog', 'SimpleDialog'), + ('xmlrpc.server', 'ServerHTMLDoc'): ('DocXMLRPCServer', 'ServerHTMLDoc'), + ('xmlrpc.server', 'XMLRPCDocGenerator'): + ('DocXMLRPCServer', 'XMLRPCDocGenerator'), + ('xmlrpc.server', 'DocXMLRPCRequestHandler'): + ('DocXMLRPCServer', 'DocXMLRPCRequestHandler'), + ('xmlrpc.server', 'DocXMLRPCServer'): + ('DocXMLRPCServer', 'DocXMLRPCServer'), + ('xmlrpc.server', 'DocCGIXMLRPCRequestHandler'): + ('DocXMLRPCServer', 'DocCGIXMLRPCRequestHandler'), + ('http.server', 'SimpleHTTPRequestHandler'): + ('SimpleHTTPServer', 'SimpleHTTPRequestHandler'), + ('http.server', 'CGIHTTPRequestHandler'): + ('CGIHTTPServer', 'CGIHTTPRequestHandler'), + ('_socket', 'socket'): ('socket', '_socketobject'), +}) + +PYTHON3_OSERROR_EXCEPTIONS = ( + 'BrokenPipeError', + 'ChildProcessError', + 'ConnectionAbortedError', + 'ConnectionError', + 'ConnectionRefusedError', + 'ConnectionResetError', + 'FileExistsError', + 'FileNotFoundError', + 'InterruptedError', + 'IsADirectoryError', + 'NotADirectoryError', + 'PermissionError', + 'ProcessLookupError', + 'TimeoutError', +) + +for excname in PYTHON3_OSERROR_EXCEPTIONS: + REVERSE_NAME_MAPPING[('builtins', excname)] = ('exceptions', 'OSError') diff -r 28b778b0a132 -r 8d86dfe53b97 Lib/pickle.py --- a/Lib/pickle.py Mon Mar 30 20:29:28 2015 -0700 +++ b/Lib/pickle.py Tue Mar 31 13:12:37 2015 +0300 @@ -944,7 +944,7 @@ r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING if (module_name, name) in r_name_mapping: module_name, name = r_name_mapping[(module_name, name)] - if module_name in r_import_mapping: + elif module_name in r_import_mapping: module_name = r_import_mapping[module_name] try: write(GLOBAL + bytes(module_name, "ascii") + b'\n' + @@ -1370,7 +1370,7 @@ if self.proto < 3 and self.fix_imports: if (module, name) in _compat_pickle.NAME_MAPPING: module, name = _compat_pickle.NAME_MAPPING[(module, name)] - if module in _compat_pickle.IMPORT_MAPPING: + elif module in _compat_pickle.IMPORT_MAPPING: module = _compat_pickle.IMPORT_MAPPING[module] __import__(module, level=0) return _getattribute(sys.modules[module], name, diff -r 28b778b0a132 -r 8d86dfe53b97 Lib/test/pickletester.py --- a/Lib/test/pickletester.py Mon Mar 30 20:29:28 2015 -0700 +++ b/Lib/test/pickletester.py Tue Mar 31 13:12:37 2015 +0300 @@ -1,8 +1,10 @@ +import collections import copyreg +import dbm import io +import functools import pickle import pickletools -import random import struct import sys import unittest @@ -1645,6 +1647,51 @@ unpickled = self.loads(self.dumps(method, proto)) self.assertEqual(method(*args), unpickled(*args)) + def test_compat_pickle(self): + tests = [ + (range(1, 7), '__builtin__', 'xrange'), + (map(int, '123'), 'itertools', 'imap'), + (functools.reduce, '__builtin__', 'reduce'), + (dbm.whichdb, 'whichdb', 'whichdb'), + (Exception(), 'exceptions', 'Exception'), + (collections.UserDict(), 'UserDict', 'IterableUserDict'), + (collections.UserList(), 'UserList', 'UserList'), + (collections.defaultdict(), 'collections', 'defaultdict'), + ] + for val, mod, name in tests: + for proto in range(3): + with self.subTest(type=type(val), proto=proto): + pickled = self.dumps(val, proto) + self.assertIn(('c%s\n%s' % (mod, name)).encode(), pickled) + self.assertIs(type(self.loads(pickled)), type(val)) + + def test_compat_unpickle(self): + # xrange(1, 7) + pickled = b'\x80\x02c__builtin__\nxrange\nK\x01K\x07K\x01\x87R.' + unpickled = self.loads(pickled) + self.assertIs(type(unpickled), range) + self.assertEqual(unpickled, range(1, 7)) + self.assertEqual(list(unpickled), [1, 2, 3, 4, 5, 6]) + # reduce + pickled = b'\x80\x02c__builtin__\nreduce\n.' + self.assertIs(self.loads(pickled), functools.reduce) + # whichdb.whichdb + pickled = b'\x80\x02cwhichdb\nwhichdb\n.' + self.assertIs(self.loads(pickled), dbm.whichdb) + # Exception(), StandardError() + for name in (b'Exception', b'StandardError'): + pickled = (b'\x80\x02cexceptions\n' + name + b'\nU\x03ugh\x85R.') + unpickled = self.loads(pickled) + self.assertIs(type(unpickled), Exception) + self.assertEqual(str(unpickled), 'ugh') + # UserDict.UserDict({1: 2}), UserDict.IterableUserDict({1: 2}) + for name in (b'UserDict', b'IterableUserDict'): + pickled = (b'\x80\x02(cUserDict\n' + name + + b'\no}U\x04data}K\x01K\x02ssb.') + unpickled = self.loads(pickled) + self.assertIs(type(unpickled), collections.UserDict) + self.assertEqual(unpickled, collections.UserDict({1: 2})) + class BigmemPickleTests(unittest.TestCase): diff -r 28b778b0a132 -r 8d86dfe53b97 Lib/test/test_pickle.py --- a/Lib/test/test_pickle.py Mon Mar 30 20:29:28 2015 -0700 +++ b/Lib/test/test_pickle.py Tue Mar 31 13:12:37 2015 +0300 @@ -1,3 +1,6 @@ +from _compat_pickle import (IMPORT_MAPPING, REVERSE_IMPORT_MAPPING, + NAME_MAPPING, REVERSE_NAME_MAPPING) +import builtins import pickle import io import collections @@ -207,9 +210,156 @@ check(u, stdsize + 32 * P + 2 + 1) +ALT_IMPORT_MAPPING = { + ('_elementtree', 'xml.etree.ElementTree'), + ('cPickle', 'pickle'), +} + +ALT_NAME_MAPPING = { + ('__builtin__', 'basestring', 'builtins', 'str'), + ('exceptions', 'StandardError', 'builtins', 'Exception'), + ('UserDict', 'UserDict', 'collections', 'UserDict'), + ('socket', '_socketobject', 'socket', 'SocketType'), +} + +def mapping(module, name): + if (module, name) in NAME_MAPPING: + module, name = NAME_MAPPING[(module, name)] + elif module in IMPORT_MAPPING: + module = IMPORT_MAPPING[module] + return module, name + +def reverse_mapping(module, name): + if (module, name) in REVERSE_NAME_MAPPING: + module, name = REVERSE_NAME_MAPPING[(module, name)] + elif module in REVERSE_IMPORT_MAPPING: + module = REVERSE_IMPORT_MAPPING[module] + return module, name + +def getmodule(module): + try: + return sys.modules[module] + except KeyError: + __import__(module) + return sys.modules[module] + +def getattribute(module, name): + obj = getmodule(module) + for n in name.split('.'): + obj = getattr(obj, n) + return obj + +def get_exceptions(mod): + for name in dir(mod): + attr = getattr(mod, name) + if isinstance(attr, type) and issubclass(attr, BaseException): + yield name, attr + +class CompatPickleTests(unittest.TestCase): + def test_import(self): + modules = set(IMPORT_MAPPING.values()) + modules |= set(REVERSE_IMPORT_MAPPING) + modules |= {module for module, name in REVERSE_NAME_MAPPING} + modules |= {module for module, name in NAME_MAPPING.values()} + for module in modules: + try: + getmodule(module) + except ImportError as exc: + if support.verbose: + print(exc) + + def test_import_mapping(self): + for module3, module2 in REVERSE_IMPORT_MAPPING.items(): + with self.subTest((module3, module2)): + try: + getmodule(module3) + except ImportError as exc: + if support.verbose: + print(exc) + if module3[:1] != '_': + self.assertIn(module2, IMPORT_MAPPING) + self.assertEqual(IMPORT_MAPPING[module2], module3) + + def test_name_mapping(self): + for (module3, name3), (module2, name2) in REVERSE_NAME_MAPPING.items(): + with self.subTest(((module3, name3), (module2, name2))): + attr = getattribute(module3, name3) + if (module2, name2) == ('exceptions', 'OSError'): + self.assertTrue(issubclass(attr, OSError)) + else: + module, name = mapping(module2, name2) + if module3[:1] != '_': + self.assertEqual((module, name), (module3, name3)) + self.assertEqual(getattribute(module, name), attr) + + def test_reverse_import_mapping(self): + for module2, module3 in IMPORT_MAPPING.items(): + with self.subTest((module2, module3)): + try: + getmodule(module3) + except ImportError as exc: + if support.verbose: + print(exc) + if ((module2, module3) not in ALT_IMPORT_MAPPING and + REVERSE_IMPORT_MAPPING.get(module3, None) != module2): + for (m3, n3), (m2, n2) in REVERSE_NAME_MAPPING.items(): + if (module3, module2) == (m3, m2): + break + else: + self.fail('No reverse mapping from %r to %r' % + (module3, module2)) + module = REVERSE_IMPORT_MAPPING.get(module3, module3) + module = IMPORT_MAPPING.get(module, module) + self.assertEqual(module, module3) + + def test_reverse_name_mapping(self): + for (module2, name2), (module3, name3) in NAME_MAPPING.items(): + with self.subTest(((module2, name2), (module3, name3))): + attr = getattribute(module3, name3) + module, name = reverse_mapping(module3, name3) + if (module2, name2, module3, name3) not in ALT_NAME_MAPPING: + self.assertEqual((module, name), (module2, name2)) + module, name = mapping(module, name) + self.assertEqual((module, name), (module3, name3)) + + def test_exceptions(self): + self.assertEqual(mapping('exceptions', 'StandardError'), + ('builtins', 'Exception')) + self.assertEqual(mapping('exceptions', 'Exception'), + ('builtins', 'Exception')) + self.assertEqual(reverse_mapping('builtins', 'Exception'), + ('exceptions', 'Exception')) + self.assertEqual(mapping('exceptions', 'OSError'), + ('builtins', 'OSError')) + self.assertEqual(reverse_mapping('builtins', 'OSError'), + ('exceptions', 'OSError')) + + for name, exc in get_exceptions(builtins): + with self.subTest(name): + if exc in (BlockingIOError, ResourceWarning): + continue + if exc is not OSError and issubclass(exc, OSError): + self.assertEqual(reverse_mapping('builtins', name), + ('exceptions', 'OSError')) + else: + self.assertEqual(reverse_mapping('builtins', name), + ('exceptions', name)) + self.assertEqual(mapping('exceptions', name), + ('builtins', name)) + + import multiprocessing.context + for name, exc in get_exceptions(multiprocessing.context): + with self.subTest(name): + self.assertEqual(reverse_mapping('multiprocessing.context', name), + ('multiprocessing', name)) + self.assertEqual(mapping('multiprocessing', name), + ('multiprocessing.context', name)) + + def test_main(): tests = [PickleTests, PyPicklerTests, PyPersPicklerTests, - PyDispatchTableTests, PyChainDispatchTableTests] + PyDispatchTableTests, PyChainDispatchTableTests, + CompatPickleTests] if has_c_implementation: tests.extend([CPicklerTests, CPersPicklerTests, CDumpPickle_LoadPickle, DumpPickle_CLoadPickle, diff -r 28b778b0a132 -r 8d86dfe53b97 Misc/NEWS --- a/Misc/NEWS Mon Mar 30 20:29:28 2015 -0700 +++ b/Misc/NEWS Tue Mar 31 13:12:37 2015 +0300 @@ -21,6 +21,10 @@ Library ------- +- Issue #18473: Fixed 2to3 and 3to2 compatible pickle mappings. Fixed + ambigious reverse mappings. Added many new mappings. Import mapping is no + longer applied to modules already mapped with full name mapping. + - Issue #23745: The new email header parser now handles duplicate MIME parameter names without error, similar to how get_param behaves. diff -r 28b778b0a132 -r 8d86dfe53b97 Modules/_pickle.c --- a/Modules/_pickle.c Mon Mar 30 20:29:28 2015 -0700 +++ b/Modules/_pickle.c Tue Mar 31 13:12:37 2015 +0300 @@ -3026,6 +3026,7 @@ Py_INCREF(fixed_global_name); *module_name = fixed_module_name; *global_name = fixed_global_name; + return 0; } else if (PyErr_Occurred()) { return -1; @@ -6278,20 +6279,21 @@ else if (PyErr_Occurred()) { return NULL; } - - /* Check if the module was renamed. */ - item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name); - if (item) { - if (!PyUnicode_Check(item)) { - PyErr_Format(PyExc_RuntimeError, - "_compat_pickle.IMPORT_MAPPING values should be " - "strings, not %.200s", Py_TYPE(item)->tp_name); + else { + /* Check if the module was renamed. */ + item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name); + if (item) { + if (!PyUnicode_Check(item)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.IMPORT_MAPPING values should be " + "strings, not %.200s", Py_TYPE(item)->tp_name); + return NULL; + } + module_name = item; + } + else if (PyErr_Occurred()) { return NULL; } - module_name = item; - } - else if (PyErr_Occurred()) { - return NULL; } }