changeset: 94282:25ecf3d0ea03 branch: 3.4 parent: 94277:2de90090e486 user: Benjamin Peterson date: Sun Jan 25 23:30:30 2015 -0500 files: Lib/email/feedparser.py Lib/test/test_email/test_email.py Lib/test/test_httplib.py Misc/NEWS description: handle headers with no key (closes #19996) Patch by Cory Benfield. diff -r 2de90090e486 -r 25ecf3d0ea03 Lib/email/feedparser.py --- a/Lib/email/feedparser.py Mon Jan 26 01:22:54 2015 +0200 +++ b/Lib/email/feedparser.py Sun Jan 25 23:30:30 2015 -0500 @@ -33,7 +33,7 @@ NLCRE_crack = re.compile('(\r\n|\r|\n)') # RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character # except controls, SP, and ":". -headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])') +headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])') EMPTYSTRING = '' NL = '\n' @@ -511,6 +511,15 @@ # There will always be a colon, because if there wasn't the part of # the parser that calls us would have started parsing the body. i = line.find(':') + + # If the colon is on the start of the line the header is clearly + # malformed, but we might be able to salvage the rest of the + # message. Track the error but keep going. + if i == 0: + defect = errors.InvalidHeaderDefect("Missing header name.") + self._cur.defects.append(defect) + continue + assert i>0, "_parse_headers fed line with no : and no leading WS" lastheader = line[:i] lastvalue = [line] diff -r 2de90090e486 -r 25ecf3d0ea03 Lib/test/test_email/test_email.py --- a/Lib/test/test_email/test_email.py Mon Jan 26 01:22:54 2015 +0200 +++ b/Lib/test/test_email/test_email.py Sun Jan 25 23:30:30 2015 -0500 @@ -3389,6 +3389,12 @@ feedparser.feed(chunk) return feedparser.close() + def test_empty_header_name_handled(self): + # Issue 19996 + msg = self.parse("First: val\n: bad\nSecond: val") + self.assertEqual(msg['First'], 'val') + self.assertEqual(msg['Second'], 'val') + def test_newlines(self): m = self.parse(['a:\nb:\rc:\r\nd:\n']) self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) diff -r 2de90090e486 -r 25ecf3d0ea03 Lib/test/test_httplib.py --- a/Lib/test/test_httplib.py Mon Jan 26 01:22:54 2015 +0200 +++ b/Lib/test/test_httplib.py Sun Jan 25 23:30:30 2015 -0500 @@ -167,6 +167,16 @@ conn.request('GET', '/foo') self.assertTrue(sock.data.startswith(expected)) + def test_malformed_headers_coped_with(self): + # Issue 19996 + body = "HTTP/1.1 200 OK\r\nFirst: val\r\n: nval\r\nSecond: val\r\n\r\n" + sock = FakeSocket(body) + resp = client.HTTPResponse(sock) + resp.begin() + + self.assertEqual(resp.getheader('First'), 'val') + self.assertEqual(resp.getheader('Second'), 'val') + class BasicTest(TestCase): def test_status_lines(self): diff -r 2de90090e486 -r 25ecf3d0ea03 Misc/NEWS --- a/Misc/NEWS Mon Jan 26 01:22:54 2015 +0200 +++ b/Misc/NEWS Sun Jan 25 23:30:30 2015 -0500 @@ -47,6 +47,9 @@ Library ------- +- Issue #19996: :class:`email.feedparser.FeedParser` now handles (malformed) + headers with no key rather than amusing the body has started. + - Issue #23248: Update ssl error codes from latest OpenSSL git master. - Issue #23098: 64-bit dev_t is now supported in the os module.