changeset: 92631:3c67d19c624f branch: 3.3 parent: 92453:a4e0aee1a9b5 user: Benjamin Peterson date: Mon Sep 29 18:18:57 2014 -0400 files: Objects/unicodeobject.c description: cleanup overflowing handling in unicode_decode_call_errorhandler and unicode_encode_ucs1 (closes #22518) diff -r a4e0aee1a9b5 -r 3c67d19c624f Objects/unicodeobject.c --- a/Objects/unicodeobject.c Wed Sep 17 16:29:29 2014 +0800 +++ b/Objects/unicodeobject.c Mon Sep 29 18:18:57 2014 -0400 @@ -4168,9 +4168,15 @@ at the new input position), so we won't have to check space when there are no errors in the rest of the string) */ Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode); - requiredsize = *outpos + replen + insize-newpos; + requiredsize = *outpos; + if (requiredsize > PY_SSIZE_T_MAX - replen) + goto overflow; + requiredsize += replen; + if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) + goto overflow; + requiredsize += insize - newpos; if (requiredsize > outsize) { - if (requiredsize<2*outsize) + if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) requiredsize = 2*outsize; if (unicode_resize(output, requiredsize) < 0) goto onError; @@ -4191,9 +4197,15 @@ have+the replacement+the rest of the string (starting at the new input position), so we won't have to check space when there are no errors in the rest of the string) */ - requiredsize = *outpos + repwlen + insize-newpos; + requiredsize = *outpos; + if (requiredsize > PY_SSIZE_T_MAX - repwlen) + goto overflow; + requiredsize += repwlen; + if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) + goto overflow; + requiredsize += insize - newpos; if (requiredsize > outsize) { - if (requiredsize < 2*outsize) + if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) requiredsize = 2*outsize; if (unicode_resize(output, requiredsize) < 0) goto onError; @@ -4210,6 +4222,11 @@ onError: Py_XDECREF(restuple); return res; + + overflow: + PyErr_SetString(PyExc_OverflowError, + "decoded result is too long for a Python string"); + goto onError; } /* --- UTF-7 Codec -------------------------------------------------------- */ @@ -6358,7 +6375,7 @@ Py_ssize_t collstart = pos; Py_ssize_t collend = pos; /* find all unecodable characters */ - while ((collend < size) && (PyUnicode_READ(kind, data, collend)>=limit)) + while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit)) ++collend; /* cache callback name lookup (if not done yet, i.e. it's the first error) */ if (known_errorHandler==-1) { @@ -6378,36 +6395,43 @@ raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason); goto onError; case 2: /* replace */ - while (collstart++ PY_SSIZE_T_MAX - incr) + goto overflow; + requiredsize += incr; + } + if (requiredsize > PY_SSIZE_T_MAX - (size - collend)) + goto overflow; + requiredsize += size - collend; if (requiredsize > ressize) { - if (requiredsize<2*ressize) + if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) requiredsize = 2*ressize; if (_PyBytes_Resize(&res, requiredsize)) goto onError; @@ -6433,6 +6457,10 @@ if (repsize > 1) { /* Make room for all additional bytes. */ respos = str - PyBytes_AS_STRING(res); + if (ressize > PY_SSIZE_T_MAX - repsize - 1) { + Py_DECREF(repunicode); + goto overflow; + } if (_PyBytes_Resize(&res, ressize+repsize-1)) { Py_DECREF(repunicode); goto onError; @@ -6451,9 +6479,15 @@ we won't have to check space for encodable characters) */ respos = str - PyBytes_AS_STRING(res); repsize = PyUnicode_GET_LENGTH(repunicode); - requiredsize = respos+repsize+(size-collend); + requiredsize = respos; + if (requiredsize > PY_SSIZE_T_MAX - repsize) + goto overflow; + requiredsize += repsize; + if (requiredsize > PY_SSIZE_T_MAX - (size - collend)) + goto overflow; + requiredsize += size - collend; if (requiredsize > ressize) { - if (requiredsize<2*ressize) + if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) requiredsize = 2*ressize; if (_PyBytes_Resize(&res, requiredsize)) { Py_DECREF(repunicode); @@ -6491,6 +6525,10 @@ Py_XDECREF(exc); return res; + overflow: + PyErr_SetString(PyExc_OverflowError, + "encoded result is too long for a Python string"); + onError: Py_XDECREF(res); Py_XDECREF(errorHandler);