changeset: 99420:9e800b2aeeac parent: 99417:a89f92ac7d19 parent: 99419:a0e2376768dc user: Serhiy Storchaka date: Thu Dec 03 01:05:52 2015 +0200 files: Misc/NEWS Objects/unicodeobject.c description: Issue #25709: Fixed problem with in-place string concatenation and utf-8 cache. diff -r a89f92ac7d19 -r 9e800b2aeeac Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Thu Dec 03 00:58:13 2015 +0200 +++ b/Lib/test/test_unicode.py Thu Dec 03 01:05:52 2015 +0200 @@ -2702,6 +2702,23 @@ self.assertTrue(astral >= bmp2) self.assertFalse(astral >= astral2) + @support.cpython_only + def test_pep393_utf8_caching_bug(self): + # Issue #25709: Problem with string concatenation and utf-8 cache + from _testcapi import getargs_s_hash + for k in 0x24, 0xa4, 0x20ac, 0x1f40d: + s = '' + for i in range(5): + # Due to CPython specific optimization the 's' string can be + # resized in-place. + s += chr(k) + # Parsing with the "s#" format code calls indirectly + # PyUnicode_AsUTF8AndSize() which creates the UTF-8 + # encoded string cached in the Unicode object. + self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) + # Check that the second call returns the same result + self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1)) + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): diff -r a89f92ac7d19 -r 9e800b2aeeac Misc/NEWS --- a/Misc/NEWS Thu Dec 03 00:58:13 2015 +0200 +++ b/Misc/NEWS Thu Dec 03 01:05:52 2015 +0200 @@ -10,6 +10,8 @@ Core and Builtins ----------------- +- Issue #25709: Fixed problem with in-place string concatenation and utf-8 cache. + - Issue #5319: New Py_FinalizeEx() API allowing Python to set an exit status of 120 on failure to flush buffered streams. diff -r a89f92ac7d19 -r 9e800b2aeeac Objects/unicodeobject.c --- a/Objects/unicodeobject.c Thu Dec 03 00:58:13 2015 +0200 +++ b/Objects/unicodeobject.c Thu Dec 03 01:05:52 2015 +0200 @@ -885,6 +885,11 @@ } new_size = (struct_size + (length + 1) * char_size); + if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { + PyObject_DEL(_PyUnicode_UTF8(unicode)); + _PyUnicode_UTF8(unicode) = NULL; + _PyUnicode_UTF8_LENGTH(unicode) = 0; + } _Py_DEC_REFTOTAL; _Py_ForgetReference(unicode);