@@ -134,6 +134,23 @@ char _PyIO_get_console_type(PyObject *path_or_fd) {
134134 return m ;
135135}
136136
137+ static DWORD
138+ _find_last_utf8_boundary (const char * buf , DWORD len )
139+ {
140+ /* This function never returns 0, returns the original len instead */
141+ DWORD count = 1 ;
142+ if (len == 0 || (buf [len - 1 ] & 0x80 ) == 0 ) {
143+ return len ;
144+ }
145+ for (;; count ++ ) {
146+ if (count > 3 || count >= len ) {
147+ return len ;
148+ }
149+ if ((buf [len - count ] & 0xc0 ) != 0x80 ) {
150+ return len - count ;
151+ }
152+ }
153+ }
137154
138155/*[clinic input]
139156module _io
@@ -975,7 +992,7 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, PyTypeObject *cls,
975992{
976993 BOOL res = TRUE;
977994 wchar_t * wbuf ;
978- DWORD len , wlen , orig_len , n = 0 ;
995+ DWORD len , wlen , n = 0 ;
979996 HANDLE handle ;
980997
981998 if (self -> fd == -1 )
@@ -1007,21 +1024,8 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, PyTypeObject *cls,
10071024 have to reduce and recalculate. */
10081025 while (wlen > 32766 / sizeof (wchar_t )) {
10091026 len /= 2 ;
1010- orig_len = len ;
1011- /* Reduce the length until we hit the final byte of a UTF-8 sequence
1012- * (top bit is unset). Fix for github issue 82052.
1013- */
1014- while (len > 0 && (((char * )b -> buf )[len - 1 ] & 0x80 ) != 0 )
1015- -- len ;
1016- /* If we hit a length of 0, something has gone wrong. This shouldn't
1017- * be possible, as valid UTF-8 can have at most 3 non-final bytes
1018- * before a final one, and our buffer is way longer than that.
1019- * But to be on the safe side, if we hit this issue we just restore
1020- * the original length and let the console API sort it out.
1021- */
1022- if (len == 0 ) {
1023- len = orig_len ;
1024- }
1027+ /* Fix for github issues gh-110913 and gh-82052. */
1028+ len = _find_last_utf8_boundary (b -> buf , len );
10251029 wlen = MultiByteToWideChar (CP_UTF8 , 0 , b -> buf , len , NULL , 0 );
10261030 }
10271031 Py_END_ALLOW_THREADS
0 commit comments