python · vstinner · Jun 20, 2017
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
@@ -981,6 +981,15 @@ def test_splittype(self):
         self.assertEqual(splittype('type:'), ('type', ''))
         self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
 
+        # bpo-30713: The newline character U+000A is invalid in URLs
+        for url in (
+            '\ntype:string',
+            'ty\npe:string',
+            'type:str\ning',
+            'type:string\n',
+        ):
+            self.assertEqual(splittype(url), (None, url))
+
     def test_splithost(self):
         splithost = urllib.parse.splithost
         self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
@@ -1010,6 +1019,15 @@ def test_splithost(self):
         self.assertEqual(splithost("//example.net/file#"),
                          ('example.net', '/file#'))
 
+        # bpo-30713: The newline character U+000A is invalid in URLs
+        for url in (
+            '\n//hostname/url',
+            '//host\nname/url',
+            '//hostname/u\nrl',
+            '//hostname/url\n',
+        ):
+            self.assertEqual(splithost(url), (None, url))
+
     def test_splituser(self):
         splituser = urllib.parse.splituser
         self.assertEqual(splituser('User:[email protected]:080'),
@@ -1052,6 +1070,15 @@ def test_splitport(self):
         self.assertEqual(splitport('[::1]'), ('[::1]', None))
         self.assertEqual(splitport(':88'), ('', '88'))
 
+        # bpo-30713: The newline character U+000A is invalid in URLs
+        for url in (
+            '\nparrot:88',
+            'par\nrot:88',
+            'parrot:8\n8',
+            'parrot:88\n',
+        ):
+            self.assertEqual(splitport(url), (url, None))
+
     def test_splitnport(self):
         splitnport = urllib.parse.splitnport
         self.assertEqual(splitnport('parrot:88'), ('parrot', 88))

diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
@@ -934,9 +934,9 @@ def splittype(url):
     """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
     global _typeprog
     if _typeprog is None:
-        _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)
+        _typeprog = re.compile('([^/:\n]+):(.*)')
 
-    match = _typeprog.match(url)
+    match = _typeprog.fullmatch(url)
     if match:
         scheme, data = match.groups()
         return scheme.lower(), data
@@ -947,9 +947,9 @@ def splithost(url):
     """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
     global _hostprog
     if _hostprog is None:
-        _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)
+        _hostprog = re.compile('//([^/#?\n]*)(.*)')
 
-    match = _hostprog.match(url)
+    match = _hostprog.fullmatch(url)
     if match:
         host_port, path = match.groups()
         if path and path[0] != '/':
@@ -973,9 +973,9 @@ def splitport(host):
     """splitport('host:port') --> 'host', 'port'."""
     global _portprog
     if _portprog is None:
-        _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL)
+        _portprog = re.compile('(.*):([0-9]*)')
 
-    match = _portprog.match(host)
+    match = _portprog.fullmatch(host)
     if match:
         host, port = match.groups()
         if port:

diff --git a/Misc/NEWS b/Misc/NEWS
@@ -368,6 +368,9 @@ Extension Modules
 Library
 -------
 
+- [Security] bpo-30713: The splittype(), splitport() and splithost() functions
+  of the urllib.parse module now reject URLs which contain a newline character.
+
 - bpo-29755: Fixed the lgettext() family of functions in the gettext module.
   They now always return bytes.