python · vstinner · Jun 20, 2017 · serhiy-storchaka · Jun 28, 2017 · vstinner
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
@@ -981,6 +981,15 @@ def test_splittype(self):
         self.assertEqual(splittype('type:'), ('type', ''))
         self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
 
+        # bpo-30713: The newline character U+000A is invalid in URLs
+        for url in (
+            '\ntype:string',
+            'ty\npe:string',
+        ):
+            self.assertEqual(splittype(url), (None, url))
+        self.assertEqual(splittype('data:xxx\nyyy'), ('data', 'xxx\nyyy'))
+        self.assertEqual(splittype('data:xxxyyy\n'), ('data', 'xxxyyy\n'))
+
     def test_splithost(self):
         splithost = urllib.parse.splithost
         self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
@@ -1010,6 +1019,15 @@ def test_splithost(self):
         self.assertEqual(splithost("//example.net/file#"),
                          ('example.net', '/file#'))
 
+        # bpo-30713: The newline character U+000A is invalid in URLs
+        for url in (
+            '\n//hostname/url',
+            '//host\nname/url',
+            '//hostname/u\nrl',
+            '//hostname/url\n',
+        ):
+            self.assertEqual(splithost(url), (None, url))
+
     def test_splituser(self):
         splituser = urllib.parse.splituser
         self.assertEqual(splituser('User:[email protected]:080'),
@@ -1052,6 +1070,15 @@ def test_splitport(self):
         self.assertEqual(splitport('[::1]'), ('[::1]', None))
         self.assertEqual(splitport(':88'), ('', '88'))
 
+        # bpo-30713: The newline character U+000A is invalid in URLs
+        for url in (
+            '\nparrot:88',
+            'par\nrot:88',
+            'parrot:8\n8',
+            'parrot:88\n',
+        ):
+            self.assertEqual(splitport(url), (url, None))
+
     def test_splitnport(self):
         splitnport = urllib.parse.splitnport
         self.assertEqual(splitnport('parrot:88'), ('parrot', 88))

diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
@@ -934,9 +934,9 @@ def splittype(url):
     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
     global _typeprog
     if _typeprog is None:
-        _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)
+        _typeprog = re.compile('([^/:\n]+):(.*)', re.DOTALL)
 
-    match = _typeprog.match(url)
+    match = _typeprog.fullmatch(url)
     if match:
         scheme, data = match.groups()
         return scheme.lower(), data
@@ -947,9 +947,9 @@ def splithost(url):
     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
     global _hostprog
     if _hostprog is None:
-        _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)
+        _hostprog = re.compile('//([^/#?\n]*)(.*)')
 
-    match = _hostprog.match(url)
+    match = _hostprog.fullmatch(url)
     if match:
         host_port, path = match.groups()
         if path and path[0] != '/':
@@ -973,9 +973,9 @@ def splitport(host):
     """splitport('host:port') --> 'host', 'port'."""
     global _portprog
     if _portprog is None:
-        _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL)
+        _portprog = re.compile('(.*):([0-9]*)')
 
-    match = _portprog.match(host)
+    match = _portprog.fullmatch(host)
     if match:
         host, port = match.groups()
         if port:

diff --git a/Misc/NEWS.d/next/Security/2017-06-28-03-50-42.bpo-30713.9tfV5r.rst b/Misc/NEWS.d/next/Security/2017-06-28-03-50-42.bpo-30713.9tfV5r.rst
@@ -0,0 +1,3 @@
+The splittype(), splitport() and splithost() functions of the urllib.parse
+module now reject URLs which contain a newline character, but splittype()
+accepts newlines after the type.