-
-
Notifications
You must be signed in to change notification settings - Fork 33.7k
[Security] bpo-30713: Reject newline in urllib.parse #2303
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -981,6 +981,15 @@ def test_splittype(self): | |
| self.assertEqual(splittype('type:'), ('type', '')) | ||
| self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string')) | ||
|
|
||
| # bpo-30713: The newline character U+000A is invalid in URLs | ||
| for url in ( | ||
| '\ntype:string', | ||
| 'ty\npe:string', | ||
| ): | ||
| self.assertEqual(splittype(url), (None, url)) | ||
| self.assertEqual(splittype('data:xxx\nyyy'), ('data', 'xxx\nyyy')) | ||
| self.assertEqual(splittype('data:xxxyyy\n'), ('data', 'xxxyyy\n')) | ||
|
|
||
| def test_splithost(self): | ||
| splithost = urllib.parse.splithost | ||
| self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'), | ||
|
|
@@ -1010,6 +1019,15 @@ def test_splithost(self): | |
| self.assertEqual(splithost("//example.net/file#"), | ||
| ('example.net', '/file#')) | ||
|
|
||
| # bpo-30713: The newline character U+000A is invalid in URLs | ||
| for url in ( | ||
| '\n//hostname/url', | ||
| '//host\nname/url', | ||
| '//hostname/u\nrl', | ||
| '//hostname/url\n', | ||
| ): | ||
| self.assertEqual(splithost(url), (None, url)) | ||
|
|
||
| def test_splituser(self): | ||
| splituser = urllib.parse.splituser | ||
| self.assertEqual(splituser('User:[email protected]:080'), | ||
|
|
@@ -1052,6 +1070,15 @@ def test_splitport(self): | |
| self.assertEqual(splitport('[::1]'), ('[::1]', None)) | ||
| self.assertEqual(splitport(':88'), ('', '88')) | ||
|
|
||
| # bpo-30713: The newline character U+000A is invalid in URLs | ||
| for url in ( | ||
| '\nparrot:88', | ||
| 'par\nrot:88', | ||
| 'parrot:8\n8', | ||
| 'parrot:88\n', | ||
| ): | ||
| self.assertEqual(splitport(url), (url, None)) | ||
|
|
||
| def test_splitnport(self): | ||
| splitnport = urllib.parse.splitnport | ||
| self.assertEqual(splitnport('parrot:88'), ('parrot', 88)) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -934,9 +934,9 @@ def splittype(url): | |
| """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" | ||
| global _typeprog | ||
| if _typeprog is None: | ||
| _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL) | ||
| _typeprog = re.compile('([^/:\n]+):(.*)', re.DOTALL) | ||
|
|
||
| match = _typeprog.match(url) | ||
| match = _typeprog.fullmatch(url) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no difference between |
||
| if match: | ||
| scheme, data = match.groups() | ||
| return scheme.lower(), data | ||
|
|
@@ -947,9 +947,9 @@ def splithost(url): | |
| """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" | ||
| global _hostprog | ||
| if _hostprog is None: | ||
| _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL) | ||
| _hostprog = re.compile('//([^/#?\n]*)(.*)') | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will caused just returning a tuple If you want to make
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Right now, we don't raise an exception if an URL looks "invalid". So I tried to fit into the current behaviour: return (None, invalid_url). Maybe we should raise an exception instead? |
||
|
|
||
| match = _hostprog.match(url) | ||
| match = _hostprog.fullmatch(url) | ||
| if match: | ||
| host_port, path = match.groups() | ||
| if path and path[0] != '/': | ||
|
|
@@ -973,9 +973,9 @@ def splitport(host): | |
| """splitport('host:port') --> 'host', 'port'.""" | ||
| global _portprog | ||
| if _portprog is None: | ||
| _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL) | ||
| _portprog = re.compile('(.*):([0-9]*)') | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same as for |
||
|
|
||
| match = _portprog.match(host) | ||
| match = _portprog.fullmatch(host) | ||
| if match: | ||
| host, port = match.groups() | ||
| if port: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| The splittype(), splitport() and splithost() functions of the urllib.parse | ||
| module now reject URLs which contain a newline character, but splittype() | ||
| accepts newlines after the type. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is wrong with
'\n'in a type? In any case it is compared with fixed set of supported schemes.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't make sense to me to have a newline in a type, it really looks like an attempt to compromise a server.