changeset: 92629:b2e68274aa8e branch: 2.7 parent: 92619:0ad19246d16d user: Benjamin Peterson date: Mon Sep 29 18:18:57 2014 -0400 files: Objects/unicodeobject.c description: cleanup overflowing handling in unicode_decode_call_errorhandler and unicode_encode_ucs1 (closes #22518) diff -r 0ad19246d16d -r b2e68274aa8e Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sun Sep 28 12:48:46 2014 -0400 +++ b/Objects/unicodeobject.c Mon Sep 29 18:18:57 2014 -0400 @@ -1510,9 +1510,15 @@ when there are no errors in the rest of the string) */ repptr = PyUnicode_AS_UNICODE(repunicode); repsize = PyUnicode_GET_SIZE(repunicode); - requiredsize = *outpos + repsize + insize-newpos; + requiredsize = *outpos; + if (requiredsize > PY_SSIZE_T_MAX - repsize) + goto overflow; + requiredsize += repsize; + if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) + goto overflow; + requiredsize += insize - newpos; if (requiredsize > outsize) { - if (requiredsize<2*outsize) + if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) requiredsize = 2*outsize; if (_PyUnicode_Resize(output, requiredsize) < 0) goto onError; @@ -1529,6 +1535,11 @@ onError: Py_XDECREF(restuple); return res; + + overflow: + PyErr_SetString(PyExc_OverflowError, + "decoded result is too long for a Python string"); + goto onError; } /* --- UTF-7 Codec -------------------------------------------------------- */ @@ -3646,7 +3657,7 @@ const Py_UNICODE *collstart = p; const Py_UNICODE *collend = p; /* find all unecodable characters */ - while ((collend < endp) && ((*collend)>=limit)) + while ((collend < endp) && ((*collend) >= limit)) ++collend; /* cache callback name lookup (if not done yet, i.e. it's the first error) */ if (known_errorHandler==-1) { @@ -3666,34 +3677,41 @@ raise_encode_exception(&exc, encoding, startp, size, collstart-startp, collend-startp, reason); goto onError; case 2: /* replace */ - while (collstart++ PY_SSIZE_T_MAX - incr) + goto overflow; + requiredsize += incr; } - requiredsize = respos+repsize+(endp-collend); + if (requiredsize > PY_SSIZE_T_MAX - (endp - collend)) + goto overflow; + requiredsize += endp - collend; if (requiredsize > ressize) { - if (requiredsize<2*ressize) + if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) requiredsize = 2*ressize; if (_PyString_Resize(&res, requiredsize)) goto onError; @@ -3716,11 +3734,16 @@ /* need more space? (at least enough for what we have+the replacement+the rest of the string, so we won't have to check space for encodable characters) */ - respos = str-PyString_AS_STRING(res); + respos = str - PyString_AS_STRING(res); repsize = PyUnicode_GET_SIZE(repunicode); - requiredsize = respos+repsize+(endp-collend); + if (respos > PY_SSIZE_T_MAX - repsize) + goto overflow; + requiredsize = respos + repsize; + if (requiredsize > PY_SSIZE_T_MAX - (endp - collend)) + goto overflow; + requiredsize += endp - collend; if (requiredsize > ressize) { - if (requiredsize<2*ressize) + if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) requiredsize = 2*ressize; if (_PyString_Resize(&res, requiredsize)) { Py_DECREF(repunicode); @@ -3731,7 +3754,7 @@ } /* check if there is anything unencodable in the replacement and copy it to the output */ - for (uni2 = PyUnicode_AS_UNICODE(repunicode);repsize-->0; ++uni2, ++str) { + for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2, ++str) { c = *uni2; if (c >= limit) { raise_encode_exception(&exc, encoding, startp, size, @@ -3747,14 +3770,18 @@ } } /* Resize if we allocated to much */ - respos = str-PyString_AS_STRING(res); - if (respos