changeset:   105468:db56e39ea067
parent:      105465:90d318d40a83
parent:      105467:5bdc8e1a50c8
user:        Serhiy Storchaka <storchaka@gmail.com>
date:        Tue Dec 06 00:20:26 2016 +0200
files:       Include/unicodeobject.h Misc/NEWS Objects/unicodeobject.c
description:
Issue #28808: PyUnicode_CompareWithASCIIString() now never raises exceptions.


diff -r 90d318d40a83 -r db56e39ea067 Doc/c-api/unicode.rst
--- a/Doc/c-api/unicode.rst	Mon Dec 05 18:23:27 2016 +0100
+++ b/Doc/c-api/unicode.rst	Tue Dec 06 00:20:26 2016 +0200
@@ -1657,8 +1657,7 @@
    ASCII-encoded strings, but the function interprets the input string as
    ISO-8859-1 if it contains non-ASCII characters.
 
-   This function returns ``-1`` upon failure, so one should call
-   :c:func:`PyErr_Occurred` to check for errors.
+   This function does not raise exceptions.
 
 
 .. c:function:: PyObject* PyUnicode_RichCompare(PyObject *left,  PyObject *right,  int op)
diff -r 90d318d40a83 -r db56e39ea067 Include/unicodeobject.h
--- a/Include/unicodeobject.h	Mon Dec 05 18:23:27 2016 +0100
+++ b/Include/unicodeobject.h	Tue Dec 06 00:20:26 2016 +0200
@@ -2055,7 +2055,7 @@
    equal, and greater than, respectively.  It is best to pass only
    ASCII-encoded strings, but the function interprets the input string as
    ISO-8859-1 if it contains non-ASCII characters.
-   Raise an exception and return -1 on error. */
+   This function does not raise exceptions. */
 
 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
     PyObject *left,
diff -r 90d318d40a83 -r db56e39ea067 Misc/NEWS
--- a/Misc/NEWS	Mon Dec 05 18:23:27 2016 +0100
+++ b/Misc/NEWS	Tue Dec 06 00:20:26 2016 +0200
@@ -480,6 +480,8 @@
 C API
 -----
 
+- Issue #28808: PyUnicode_CompareWithASCIIString() now never raises exceptions.
+
 - Issue #28761: The fields name and doc of structures PyMemberDef, PyGetSetDef,
   PyStructSequence_Field, PyStructSequence_Desc, and wrapperbase are now of
   type ``const char *`` rather of ``char *``.
diff -r 90d318d40a83 -r db56e39ea067 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Mon Dec 05 18:23:27 2016 +0100
+++ b/Objects/unicodeobject.c	Tue Dec 06 00:20:26 2016 +0200
@@ -10977,10 +10977,24 @@
     Py_ssize_t i;
     int kind;
     Py_UCS4 chr;
+    const unsigned char *ustr = (const unsigned char *)str;
 
     assert(_PyUnicode_CHECK(uni));
-    if (PyUnicode_READY(uni) == -1)
-        return -1;
+    if (!PyUnicode_IS_READY(uni)) {
+        const wchar_t *ws = _PyUnicode_WSTR(uni);
+        /* Compare Unicode string and source character set string */
+        for (i = 0; (chr = ws[i]) && ustr[i]; i++) {
+            if (chr != ustr[i])
+                return (chr < ustr[i]) ? -1 : 1;
+        }
+        /* This check keeps Python strings that end in '\0' from comparing equal
+         to C strings identical up to that point. */
+        if (_PyUnicode_WSTR_LENGTH(uni) != i || chr)
+            return 1; /* uni is longer */
+        if (ustr[i])
+            return -1; /* str is longer */
+        return 0;
+    }
     kind = PyUnicode_KIND(uni);
     if (kind == PyUnicode_1BYTE_KIND) {
         const void *data = PyUnicode_1BYTE_DATA(uni);