changeset: 101031:1f2cfcd5a83f branch: 3.5 parent: 101027:386712b16c74 user: Martin Panter date: Sun Apr 17 02:17:03 2016 +0000 files: Lib/test/test_wsgiref.py Lib/wsgiref/simple_server.py Misc/ACKS Misc/NEWS description: Issue #26717: Stop encoding Latin-1-ized WSGI paths with UTF-8 Patch by Anthony Sottile. diff -r 386712b16c74 -r 1f2cfcd5a83f Lib/test/test_wsgiref.py --- a/Lib/test/test_wsgiref.py Sat Apr 16 23:38:25 2016 +0000 +++ b/Lib/test/test_wsgiref.py Sun Apr 17 02:17:03 2016 +0000 @@ -1,3 +1,4 @@ +from unittest import mock from unittest import TestCase from wsgiref.util import setup_testing_defaults from wsgiref.headers import Headers @@ -221,6 +222,29 @@ b"data", out) + def test_cp1252_url(self): + def app(e, s): + s("200 OK", [ + ("Content-Type", "text/plain"), + ("Date", "Wed, 24 Dec 2008 13:29:32 GMT"), + ]) + # PEP3333 says environ variables are decoded as latin1. + # Encode as latin1 to get original bytes + return [e["PATH_INFO"].encode("latin1")] + + out, err = run_amock( + validator(app), data=b"GET /\x80%80 HTTP/1.0") + self.assertEqual( + [ + b"HTTP/1.0 200 OK", + mock.ANY, + b"Content-Type: text/plain", + b"Date: Wed, 24 Dec 2008 13:29:32 GMT", + b"", + b"/\x80\x80", + ], + out.splitlines()) + class UtilityTests(TestCase): diff -r 386712b16c74 -r 1f2cfcd5a83f Lib/wsgiref/simple_server.py --- a/Lib/wsgiref/simple_server.py Sat Apr 16 23:38:25 2016 +0000 +++ b/Lib/wsgiref/simple_server.py Sun Apr 17 02:17:03 2016 +0000 @@ -82,7 +82,7 @@ else: path,query = self.path,'' - env['PATH_INFO'] = urllib.parse.unquote_to_bytes(path).decode('iso-8859-1') + env['PATH_INFO'] = urllib.parse.unquote(path, 'iso-8859-1') env['QUERY_STRING'] = query host = self.address_string() diff -r 386712b16c74 -r 1f2cfcd5a83f Misc/ACKS --- a/Misc/ACKS Sat Apr 16 23:38:25 2016 +0000 +++ b/Misc/ACKS Sun Apr 17 02:17:03 2016 +0000 @@ -1376,6 +1376,7 @@ Paul Sokolovsky Evgeny Sologubov Cody Somerville +Anthony Sottile Edoardo Spadolini Geoffrey Spear Clay Spence diff -r 386712b16c74 -r 1f2cfcd5a83f Misc/NEWS --- a/Misc/NEWS Sat Apr 16 23:38:25 2016 +0000 +++ b/Misc/NEWS Sun Apr 17 02:17:03 2016 +0000 @@ -107,6 +107,9 @@ Library ------- +- Issue #26717: Stop encoding Latin-1-ized WSGI paths with UTF-8. Patch by + Anthony Sottile. + - Issue #26735: Fix :func:`os.urandom` on Solaris 11.3 and newer when reading more than 1,024 bytes: call ``getrandom()`` multiple times with a limit of 1024 bytes per call.