changeset:   105466:b431d39da67f
branch:      3.5
parent:      105452:dee2a4ca62f7
user:        Serhiy Storchaka <storchaka@gmail.com>
date:        Tue Dec 06 00:13:34 2016 +0200
files:       Doc/c-api/unicode.rst Include/unicodeobject.h Misc/NEWS Objects/unicodeobject.c
description:
Issue #28808: PyUnicode_CompareWithASCIIString() now never raises exceptions.


diff -r dee2a4ca62f7 -r b431d39da67f Doc/c-api/unicode.rst
--- a/Doc/c-api/unicode.rst	Sun Dec 04 15:42:13 2016 +0200
+++ b/Doc/c-api/unicode.rst	Tue Dec 06 00:13:34 2016 +0200
@@ -1652,8 +1652,7 @@
    ASCII-encoded strings, but the function interprets the input string as
    ISO-8859-1 if it contains non-ASCII characters.
 
-   This function returns ``-1`` upon failure, so one should call
-   :c:func:`PyErr_Occurred` to check for errors.
+   This function does not raise exceptions.
 
 
 .. c:function:: PyObject* PyUnicode_RichCompare(PyObject *left,  PyObject *right,  int op)
diff -r dee2a4ca62f7 -r b431d39da67f Include/unicodeobject.h
--- a/Include/unicodeobject.h	Sun Dec 04 15:42:13 2016 +0200
+++ b/Include/unicodeobject.h	Tue Dec 06 00:13:34 2016 +0200
@@ -2023,7 +2023,7 @@
    equal, and greater than, respectively.  It is best to pass only
    ASCII-encoded strings, but the function interprets the input string as
    ISO-8859-1 if it contains non-ASCII characters.
-   Raise an exception and return -1 on error. */
+   This function does not raise exceptions. */
 
 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
     PyObject *left,
diff -r dee2a4ca62f7 -r b431d39da67f Misc/NEWS
--- a/Misc/NEWS	Sun Dec 04 15:42:13 2016 +0200
+++ b/Misc/NEWS	Tue Dec 06 00:13:34 2016 +0200
@@ -506,6 +506,8 @@
 C API
 -----
 
+- Issue #28808: PyUnicode_CompareWithASCIIString() now never raises exceptions.
+
 - Issue #26754: PyUnicode_FSDecoder() accepted a filename argument encoded as
   an iterable of integers. Now only strings and bytes-like objects are accepted.
 
diff -r dee2a4ca62f7 -r b431d39da67f Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Sun Dec 04 15:42:13 2016 +0200
+++ b/Objects/unicodeobject.c	Tue Dec 06 00:13:34 2016 +0200
@@ -10793,10 +10793,24 @@
     Py_ssize_t i;
     int kind;
     Py_UCS4 chr;
+    const unsigned char *ustr = (const unsigned char *)str;
 
     assert(_PyUnicode_CHECK(uni));
-    if (PyUnicode_READY(uni) == -1)
-        return -1;
+    if (!PyUnicode_IS_READY(uni)) {
+        const wchar_t *ws = _PyUnicode_WSTR(uni);
+        /* Compare Unicode string and source character set string */
+        for (i = 0; (chr = ws[i]) && ustr[i]; i++) {
+            if (chr != ustr[i])
+                return (chr < ustr[i]) ? -1 : 1;
+        }
+        /* This check keeps Python strings that end in '\0' from comparing equal
+         to C strings identical up to that point. */
+        if (_PyUnicode_WSTR_LENGTH(uni) != i || chr)
+            return 1; /* uni is longer */
+        if (ustr[i])
+            return -1; /* str is longer */
+        return 0;
+    }
     kind = PyUnicode_KIND(uni);
     if (kind == PyUnicode_1BYTE_KIND) {
         const void *data = PyUnicode_1BYTE_DATA(uni);