changeset: 100379:27ba9ba5deb1 branch: 3.5 parent: 100377:6d6c0a7b71f5 user: Victor Stinner date: Tue Mar 01 21:30:30 2016 +0100 files: Lib/test/test_unicode.py Misc/NEWS Objects/unicodeobject.c description: Fix str.translate() Issue #26464: Fix str.translate() when string is ASCII and first replacements removes character, but next replacement uses a non-ASCII character or a string longer than 1 character. Regression introduced in Python 3.5.0. diff -r 6d6c0a7b71f5 -r 27ba9ba5deb1 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Tue Mar 01 10:25:45 2016 +0200 +++ b/Lib/test/test_unicode.py Tue Mar 01 21:30:30 2016 +0100 @@ -347,6 +347,10 @@ "[a]") self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': None})), "[]") + self.assertEqual('axb'.translate(str.maketrans({'a': None, 'b': '123'})), + "x123") + self.assertEqual('axb'.translate(str.maketrans({'a': None, 'b': '\xe9'})), + "x\xe9") # invalid Unicode characters invalid_char = 0x10ffff+1 diff -r 6d6c0a7b71f5 -r 27ba9ba5deb1 Misc/NEWS --- a/Misc/NEWS Tue Mar 01 10:25:45 2016 +0200 +++ b/Misc/NEWS Tue Mar 01 21:30:30 2016 +0100 @@ -10,6 +10,10 @@ Core and Builtins ----------------- +- Issue #26464: Fix str.translate() when string is ASCII and first replacements + removes character, but next replacement uses a non-ASCII character or a + string longer than 1 character. Regression introduced in Python 3.5.0. + - Issue #22836: Ensure exception reports from PyErr_Display() and PyErr_WriteUnraisable() are sensible even when formatting them produces secondary errors. This affects the reports produced by diff -r 6d6c0a7b71f5 -r 27ba9ba5deb1 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Tue Mar 01 10:25:45 2016 +0200 +++ b/Objects/unicodeobject.c Tue Mar 01 21:30:30 2016 +0100 @@ -8574,7 +8574,8 @@ translated into writer, raise an exception and return -1 on error. */ static int unicode_fast_translate(PyObject *input, PyObject *mapping, - _PyUnicodeWriter *writer, int ignore) + _PyUnicodeWriter *writer, int ignore, + Py_ssize_t *input_pos) { Py_UCS1 ascii_table[128], ch, ch2; Py_ssize_t len; @@ -8621,6 +8622,7 @@ exit: writer->pos = out - PyUnicode_1BYTE_DATA(writer->buffer); + *input_pos = in - PyUnicode_1BYTE_DATA(input); return res; } @@ -8666,7 +8668,7 @@ ignore = (errors != NULL && strcmp(errors, "ignore") == 0); - res = unicode_fast_translate(input, mapping, &writer, ignore); + res = unicode_fast_translate(input, mapping, &writer, ignore, &i); if (res < 0) { _PyUnicodeWriter_Dealloc(&writer); return NULL; @@ -8674,7 +8676,6 @@ if (res == 1) return _PyUnicodeWriter_Finish(&writer); - i = writer.pos; while (i