changeset: 101055:e281a57d5b29 parent: 101053:ee9921b29fd8 parent: 101054:d08d6b776694 user: Lars Gustäbel date: Tue Apr 19 08:53:14 2016 +0200 files: Lib/tarfile.py Lib/test/test_tarfile.py Misc/NEWS description: Issue #24838: Merge tarfile fix from 3.5. diff -r ee9921b29fd8 -r e281a57d5b29 Lib/tarfile.py --- a/Lib/tarfile.py Mon Apr 18 21:21:10 2016 +0300 +++ b/Lib/tarfile.py Tue Apr 19 08:53:14 2016 +0200 @@ -815,11 +815,11 @@ """ info["magic"] = POSIX_MAGIC - if len(info["linkname"]) > LENGTH_LINK: + if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: raise ValueError("linkname is too long") - if len(info["name"]) > LENGTH_NAME: - info["prefix"], info["name"] = self._posix_split_name(info["name"]) + if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: + info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors) return self._create_header(info, USTAR_FORMAT, encoding, errors) @@ -829,10 +829,10 @@ info["magic"] = GNU_MAGIC buf = b"" - if len(info["linkname"]) > LENGTH_LINK: + if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) - if len(info["name"]) > LENGTH_NAME: + if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) return buf + self._create_header(info, GNU_FORMAT, encoding, errors) @@ -892,19 +892,20 @@ """ return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") - def _posix_split_name(self, name): + def _posix_split_name(self, name, encoding, errors): """Split a name longer than 100 chars into a prefix and a name part. """ - prefix = name[:LENGTH_PREFIX + 1] - while prefix and prefix[-1] != "/": - prefix = prefix[:-1] + components = name.split("/") + for i in range(1, len(components)): + prefix = "/".join(components[:i]) + name = "/".join(components[i:]) + if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \ + len(name.encode(encoding, errors)) <= LENGTH_NAME: + break + else: + raise ValueError("name is too long") - name = name[len(prefix):] - prefix = prefix[:-1] - - if not prefix or len(name) > LENGTH_NAME: - raise ValueError("name is too long") return prefix, name @staticmethod diff -r ee9921b29fd8 -r e281a57d5b29 Lib/test/test_tarfile.py --- a/Lib/test/test_tarfile.py Mon Apr 18 21:21:10 2016 +0300 +++ b/Lib/test/test_tarfile.py Tue Apr 19 08:53:14 2016 +0200 @@ -1667,9 +1667,7 @@ tar.close() -class UstarUnicodeTest(unittest.TestCase): - - format = tarfile.USTAR_FORMAT +class UnicodeTest: def test_iso8859_1_filename(self): self._test_unicode_filename("iso8859-1") @@ -1750,7 +1748,86 @@ tar.close() -class GNUUnicodeTest(UstarUnicodeTest): +class UstarUnicodeTest(UnicodeTest, unittest.TestCase): + + format = tarfile.USTAR_FORMAT + + # Test whether the utf-8 encoded version of a filename exceeds the 100 + # bytes name field limit (every occurrence of '\xff' will be expanded to 2 + # bytes). + def test_unicode_name1(self): + self._test_ustar_name("0123456789" * 10) + self._test_ustar_name("0123456789" * 10 + "0", ValueError) + self._test_ustar_name("0123456789" * 9 + "01234567\xff") + self._test_ustar_name("0123456789" * 9 + "012345678\xff", ValueError) + + def test_unicode_name2(self): + self._test_ustar_name("0123456789" * 9 + "012345\xff\xff") + self._test_ustar_name("0123456789" * 9 + "0123456\xff\xff", ValueError) + + # Test whether the utf-8 encoded version of a filename exceeds the 155 + # bytes prefix + '/' + 100 bytes name limit. + def test_unicode_longname1(self): + self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 10) + self._test_ustar_name("0123456789" * 15 + "0123/4" + "0123456789" * 10, ValueError) + self._test_ustar_name("0123456789" * 15 + "012\xff/" + "0123456789" * 10) + self._test_ustar_name("0123456789" * 15 + "0123\xff/" + "0123456789" * 10, ValueError) + + def test_unicode_longname2(self): + self._test_ustar_name("0123456789" * 15 + "01\xff/2" + "0123456789" * 10, ValueError) + self._test_ustar_name("0123456789" * 15 + "01\xff\xff/" + "0123456789" * 10, ValueError) + + def test_unicode_longname3(self): + self._test_ustar_name("0123456789" * 15 + "01\xff\xff/2" + "0123456789" * 10, ValueError) + self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "01234567\xff") + self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "012345678\xff", ValueError) + + def test_unicode_longname4(self): + self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "012345\xff\xff") + self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "0123456\xff\xff", ValueError) + + def _test_ustar_name(self, name, exc=None): + with tarfile.open(tmpname, "w", format=self.format, encoding="utf-8") as tar: + t = tarfile.TarInfo(name) + if exc is None: + tar.addfile(t) + else: + self.assertRaises(exc, tar.addfile, t) + + if exc is None: + with tarfile.open(tmpname, "r") as tar: + for t in tar: + self.assertEqual(name, t.name) + break + + # Test the same as above for the 100 bytes link field. + def test_unicode_link1(self): + self._test_ustar_link("0123456789" * 10) + self._test_ustar_link("0123456789" * 10 + "0", ValueError) + self._test_ustar_link("0123456789" * 9 + "01234567\xff") + self._test_ustar_link("0123456789" * 9 + "012345678\xff", ValueError) + + def test_unicode_link2(self): + self._test_ustar_link("0123456789" * 9 + "012345\xff\xff") + self._test_ustar_link("0123456789" * 9 + "0123456\xff\xff", ValueError) + + def _test_ustar_link(self, name, exc=None): + with tarfile.open(tmpname, "w", format=self.format, encoding="utf-8") as tar: + t = tarfile.TarInfo("foo") + t.linkname = name + if exc is None: + tar.addfile(t) + else: + self.assertRaises(exc, tar.addfile, t) + + if exc is None: + with tarfile.open(tmpname, "r") as tar: + for t in tar: + self.assertEqual(name, t.linkname) + break + + +class GNUUnicodeTest(UnicodeTest, unittest.TestCase): format = tarfile.GNU_FORMAT @@ -1768,7 +1845,7 @@ self.fail("unable to read bad GNU tar pax header") -class PAXUnicodeTest(UstarUnicodeTest): +class PAXUnicodeTest(UnicodeTest, unittest.TestCase): format = tarfile.PAX_FORMAT diff -r ee9921b29fd8 -r e281a57d5b29 Misc/NEWS --- a/Misc/NEWS Mon Apr 18 21:21:10 2016 +0300 +++ b/Misc/NEWS Tue Apr 19 08:53:14 2016 +0200 @@ -245,6 +245,9 @@ Library ------- +- Issue #24838: tarfile's ustar and gnu formats now correctly calculate name + and link field limits for multibyte character encodings like utf-8. + - Issue #26657: Fix directory traversal vulnerability with http.server on Windows. This fixes a regression that was introduced in 3.3.4rc1 and 3.4.0rc1. Based on patch by Philipp Hagemeister.