Skip to content

Commit 88ebfb1

Browse files
committed
#15114: The html.parser module now raises a DeprecationWarning when the strict argument of HTMLParser or the HTMLParser.error method are used.
1 parent 28f0bea commit 88ebfb1

File tree

4 files changed

+29
-9
lines changed

4 files changed

+29
-9
lines changed

‎Doc/library/html.parser.rst‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ as they are encountered::
7474
def handle_data(self, data):
7575
print("Encountered some data :", data)
7676

77-
parser = MyHTMLParser(strict=False)
77+
parser = MyHTMLParser()
7878
parser.feed('<html><head><title>Test</title></head>'
7979
'<body><h1>Parse me!</h1></body></html>')
8080

@@ -272,7 +272,7 @@ examples::
272272
def handle_decl(self, data):
273273
print("Decl :", data)
274274

275-
parser = MyHTMLParser(strict=False)
275+
parser = MyHTMLParser()
276276

277277
Parsing a doctype::
278278

‎Lib/html/parser.py‎

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ def __str__(self):
9494
return result
9595

9696

97+
_strict_sentinel = object()
98+
9799
class HTMLParser(_markupbase.ParserBase):
98100
"""Find tags and other markup and call handler functions.
99101
@@ -116,16 +118,18 @@ class HTMLParser(_markupbase.ParserBase):
116118

117119
CDATA_CONTENT_ELEMENTS = ("script", "style")
118120

119-
def __init__(self, strict=False):
121+
def __init__(self, strict=_strict_sentinel):
120122
"""Initialize and reset this instance.
121123
122124
If strict is set to False (the default) the parser will parse invalid
123125
markup, otherwise it will raise an error. Note that the strict mode
124-
is deprecated.
126+
and argument are deprecated.
125127
"""
126-
if strict:
127-
warnings.warn("The strict mode is deprecated.",
128+
if strict is not _strict_sentinel:
129+
warnings.warn("The strict argument and mode are deprecated.",
128130
DeprecationWarning, stacklevel=2)
131+
else:
132+
strict = False # default
129133
self.strict = strict
130134
self.reset()
131135

@@ -151,6 +155,8 @@ def close(self):
151155
self.goahead(1)
152156

153157
def error(self, message):
158+
warnings.warn("The 'error' method is deprecated.",
159+
DeprecationWarning, stacklevel=2)
154160
raise HTMLParseError(message, self.getpos())
155161

156162
__starttag_text = None

‎Lib/test/test_htmlparser.py‎

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,9 @@ def parse(source=source):
9696
parser = self.get_collector()
9797
parser.feed(source)
9898
parser.close()
99-
self.assertRaises(html.parser.HTMLParseError, parse)
99+
with self.assertRaises(html.parser.HTMLParseError):
100+
with self.assertWarns(DeprecationWarning):
101+
parse()
100102

101103

102104
class HTMLParserStrictTestCase(TestCaseBase):
@@ -360,7 +362,16 @@ def test_condcoms(self):
360362
class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):
361363

362364
def get_collector(self):
363-
return EventCollector(strict=False)
365+
return EventCollector()
366+
367+
def test_deprecation_warnings(self):
368+
with self.assertWarns(DeprecationWarning):
369+
EventCollector(strict=True)
370+
with self.assertWarns(DeprecationWarning):
371+
EventCollector(strict=False)
372+
with self.assertRaises(html.parser.HTMLParseError):
373+
with self.assertWarns(DeprecationWarning):
374+
EventCollector().error('test')
364375

365376
def test_tolerant_parsing(self):
366377
self._run_check('<html <html>te>>xt&a<<bc</a></html>\n'
@@ -676,7 +687,7 @@ def test_entityrefs_in_attributes(self):
676687
class AttributesTolerantTestCase(AttributesStrictTestCase):
677688

678689
def get_collector(self):
679-
return EventCollector(strict=False)
690+
return EventCollector()
680691

681692
def test_attr_funky_names2(self):
682693
self._run_check(

‎Misc/NEWS‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ Core and Builtins
3131
Library
3232
-------
3333

34+
- Issue #15114: The html.parser module now raises a DeprecationWarning when the
35+
strict argument of HTMLParser or the HTMLParser.error method are used.
36+
3437
- Issue #19410: Undo the special-casing removal of '' for
3538
importlib.machinery.FileFinder.
3639

0 commit comments

Comments
 (0)