changeset: 92648:8ba7e5f43952 branch: 3.3 parent: 92645:e834b32f0422 user: Benjamin Peterson date: Mon Sep 29 23:02:15 2014 -0400 files: Misc/NEWS Objects/unicodeobject.c description: prevent overflow in unicode_repr (closes #22520) diff -r e834b32f0422 -r 8ba7e5f43952 Misc/NEWS --- a/Misc/NEWS Mon Sep 29 22:46:57 2014 -0400 +++ b/Misc/NEWS Mon Sep 29 23:02:15 2014 -0400 @@ -10,6 +10,9 @@ Core and Builtins ----------------- +- Issue #22520: Fix overflow checking when generating the repr of a unicode + object. + - Issue #22519: Fix overflow checking in PyBytes_Repr. - Issue #22518: Fix integer overflow issues in latin-1 encoding. diff -r e834b32f0422 -r 8ba7e5f43952 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Mon Sep 29 22:46:57 2014 -0400 +++ b/Objects/unicodeobject.c Mon Sep 29 23:02:15 2014 -0400 @@ -12000,28 +12000,34 @@ ikind = PyUnicode_KIND(unicode); for (i = 0; i < isize; i++) { Py_UCS4 ch = PyUnicode_READ(ikind, idata, i); + Py_ssize_t incr = 1; switch (ch) { - case '\'': squote++; osize++; break; - case '"': dquote++; osize++; break; + case '\'': squote++; break; + case '"': dquote++; break; case '\\': case '\t': case '\r': case '\n': - osize += 2; break; + incr = 2; + break; default: /* Fast-path ASCII */ if (ch < ' ' || ch == 0x7f) - osize += 4; /* \xHH */ + incr = 4; /* \xHH */ else if (ch < 0x7f) - osize++; - else if (Py_UNICODE_ISPRINTABLE(ch)) { - osize++; + ; + else if (Py_UNICODE_ISPRINTABLE(ch)) max = ch > max ? ch : max; - } else if (ch < 0x100) - osize += 4; /* \xHH */ + incr = 4; /* \xHH */ else if (ch < 0x10000) - osize += 6; /* \uHHHH */ + incr = 6; /* \uHHHH */ else - osize += 10; /* \uHHHHHHHH */ - } + incr = 10; /* \uHHHHHHHH */ + } + if (osize > PY_SSIZE_T_MAX - incr) { + PyErr_SetString(PyExc_OverflowError, + "string is too long to generate repr"); + return NULL; + } + osize += incr; } quote = '\'';