Skip to content

Commit 74c0166

Browse files
authored
Fix escaping of HTML special chars (<, >, &) in .toc_tokens
Fixes #906.
1 parent 1de3083 commit 74c0166

File tree

3 files changed

+44
-16
lines changed

3 files changed

+44
-16
lines changed

‎docs/change_log/index.md‎

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@ title: Change Log
33
Python-Markdown Change Log
44
=========================
55

6-
Under Development: Released version 3.1.1 (a bug-fix release).
6+
Under Development: Released version 3.2.1 (a bug-fix release).
7+
8+
* The `name` property in `toc_tokens` from the TOC extension now
9+
escapes HTML special characters (`<`, `>`, and `&`).
710

811
Feb 7, 2020: Released version 3.2 ([Notes](release-3.2.md)).
912

‎markdown/extensions/toc.py‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from . import Extension
1717
from ..treeprocessors import Treeprocessor
18-
from ..util import parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE
18+
from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE
1919
from ..postprocessors import UnescapePostprocessor
2020
import re
2121
import unicodedata
@@ -264,7 +264,8 @@ def run(self, doc):
264264
'level': int(el.tag[-1]),
265265
'id': el.attrib["id"],
266266
'name': unescape(stashedHTML2text(
267-
el.attrib.get('data-toc-label', text), self.md, strip_entities=False
267+
code_escape(el.attrib.get('data-toc-label', text)),
268+
self.md, strip_entities=False
268269
))
269270
})
270271

‎tests/test_extensions.py‎

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,25 @@ def testHtmlEntities(self):
827827
{'level': 1, 'id': 'foo-bar', 'name': 'Foo &amp; bar', 'children': []},
828828
])
829829

830+
def testHtmlSpecialChars(self):
831+
""" Test Headers with HTML special characters. """
832+
text = '# Foo > & bar'
833+
self.assertEqual(
834+
self.md.convert(text),
835+
'<h1 id="foo-bar">Foo &gt; &amp; bar</h1>'
836+
)
837+
self.assertEqual(
838+
self.md.toc,
839+
'<div class="toc">\n'
840+
'<ul>\n' # noqa
841+
'<li><a href="#foo-bar">Foo &gt; &amp; bar</a></li>\n' # noqa
842+
'</ul>\n' # noqa
843+
'</div>\n'
844+
)
845+
self.assertEqual(self.md.toc_tokens, [
846+
{'level': 1, 'id': 'foo-bar', 'name': 'Foo &gt; &amp; bar', 'children': []},
847+
])
848+
830849
def testRawHtml(self):
831850
""" Test Headers with raw HTML. """
832851
text = '# Foo <b>Bar</b> Baz.'
@@ -1000,35 +1019,40 @@ def testWithAttrList(self):
10001019
md = markdown.Markdown(extensions=['toc', 'attr_list'])
10011020
text = ('# Header 1\n\n'
10021021
'## Header 2 { #foo }\n\n'
1003-
'## Header 3 { data-toc-label="Foo Bar"}\n\n'
1004-
'# Header 4 { data-toc-label="Foo <b>Baz</b>" }')
1022+
'## Header 3 { data-toc-label="Foo Bar" }\n\n'
1023+
'# Header 4 { data-toc-label="Foo > Baz" }\n\n'
1024+
'# Header 5 { data-toc-label="Foo <b>Quux</b>" }')
1025+
10051026
self.assertEqual(
10061027
md.convert(text),
10071028
'<h1 id="header-1">Header 1</h1>\n'
10081029
'<h2 id="foo">Header 2</h2>\n'
10091030
'<h2 id="header-3">Header 3</h2>\n'
1010-
'<h1 id="header-4">Header 4</h1>'
1031+
'<h1 id="header-4">Header 4</h1>\n'
1032+
'<h1 id="header-5">Header 5</h1>'
10111033
)
10121034
self.assertEqual(
10131035
md.toc,
10141036
'<div class="toc">\n'
1015-
'<ul>\n' # noqa
1016-
'<li><a href="#header-1">Header 1</a>' # noqa
1017-
'<ul>\n' # noqa
1018-
'<li><a href="#foo">Header 2</a></li>\n' # noqa
1019-
'<li><a href="#header-3">Foo Bar</a></li>\n' # noqa
1020-
'</ul>\n' # noqa
1021-
'</li>\n' # noqa
1022-
'<li><a href="#header-4">Foo Baz</a></li>\n' # noqa
1023-
'</ul>\n' # noqa
1037+
'<ul>\n' # noqa
1038+
'<li><a href="#header-1">Header 1</a>' # noqa
1039+
'<ul>\n' # noqa
1040+
'<li><a href="#foo">Header 2</a></li>\n' # noqa
1041+
'<li><a href="#header-3">Foo Bar</a></li>\n' # noqa
1042+
'</ul>\n' # noqa
1043+
'</li>\n' # noqa
1044+
'<li><a href="#header-4">Foo &gt; Baz</a></li>\n' # noqa
1045+
'<li><a href="#header-5">Foo Quux</a></li>\n' # noqa
1046+
'</ul>\n' # noqa
10241047
'</div>\n'
10251048
)
10261049
self.assertEqual(md.toc_tokens, [
10271050
{'level': 1, 'id': 'header-1', 'name': 'Header 1', 'children': [
10281051
{'level': 2, 'id': 'foo', 'name': 'Header 2', 'children': []},
10291052
{'level': 2, 'id': 'header-3', 'name': 'Foo Bar', 'children': []}
10301053
]},
1031-
{'level': 1, 'id': 'header-4', 'name': 'Foo Baz', 'children': []},
1054+
{'level': 1, 'id': 'header-4', 'name': 'Foo &gt; Baz', 'children': []},
1055+
{'level': 1, 'id': 'header-5', 'name': 'Foo Quux', 'children': []},
10321056
])
10331057

10341058
def testUniqueFunc(self):

0 commit comments

Comments
 (0)