Skip to content

Commit c0f6e5a

Browse files
authored
Move backslash unescaping to treeprocessor
By unescaping backslash escapes in a treeprocessor, the text is properly escaped during serialization. Fixes #1131. As it is recognized that various third-party extensions may be calling the old class at `postprocessors.UnescapePostprocessor` the old class remains in the codebase, but has been deprecated and will be removed in a future release. The new class `treeprocessors.UnescapeTreeprocessor` should be used instead.
1 parent 77fb7f1 commit c0f6e5a

File tree

6 files changed

+82
-8
lines changed

6 files changed

+82
-8
lines changed

‎docs/change_log/release-3.4.md‎

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,18 @@ markdown.markdown(src, extensions=[TableExtension(use_align_attribute=True)])
3030

3131
In addition, tests were moved to the modern test environment.
3232

33+
### Backslash unescaping moved to Treeprocessor (#1131).
34+
35+
Unescaping backslash escapes has been moved to a Treeprocessor. However, it is
36+
recognized that various third-party extensions may be calling the old class at
37+
`postprocessors.UnescapePostprocessor`. Therefore, the old class remains in the
38+
code base, but has been deprecated and will be removed in a future release. The
39+
new class `treeprocessors.UnescapeTreeprocessor` should be used instead.
40+
3341
### Previously deprecated objects have been removed
3442

3543
Various objects were deprecated in version 3.0 and began raising deprecation
36-
warnings (see the [version 3.0 release notes] for details). Any of those object
44+
warnings (see the [version 3.0 release notes] for details). Any of those objects
3745
which remained in version 3.3 have been removed from the code base in version 3.4
3846
and will now raise errors. A summary of the objects are provided below.
3947

‎markdown/extensions/toc.py‎

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from . import Extension
1717
from ..treeprocessors import Treeprocessor
1818
from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, AtomicString
19-
from ..postprocessors import UnescapePostprocessor
19+
from ..treeprocessors import UnescapeTreeprocessor
2020
import re
2121
import html
2222
import unicodedata
@@ -84,8 +84,8 @@ def _html_sub(m):
8484

8585
def unescape(text):
8686
""" Unescape escaped text. """
87-
c = UnescapePostprocessor()
88-
return c.run(text)
87+
c = UnescapeTreeprocessor()
88+
return c.unescape(text)
8989

9090

9191
def nest_toc_tokens(toc_list):
@@ -289,10 +289,10 @@ def run(self, doc):
289289
toc_tokens.append({
290290
'level': int(el.tag[-1]),
291291
'id': el.attrib["id"],
292-
'name': unescape(stashedHTML2text(
292+
'name': stashedHTML2text(
293293
code_escape(el.attrib.get('data-toc-label', text)),
294294
self.md, strip_entities=False
295-
))
295+
)
296296
})
297297

298298
# Remove the data-toc-label attribute as it is no longer needed

‎markdown/postprocessors.py‎

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ def build_postprocessors(md, **kwargs):
3737
postprocessors = util.Registry()
3838
postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30)
3939
postprocessors.register(AndSubstitutePostprocessor(), 'amp_substitute', 20)
40-
postprocessors.register(UnescapePostprocessor(), 'unescape', 10)
4140
return postprocessors
4241

4342

@@ -122,6 +121,10 @@ def run(self, text):
122121
return text
123122

124123

124+
@util.deprecated(
125+
"This class will be removed in the future; "
126+
"use 'treeprocessors.UnescapeTreeprocessor' instead."
127+
)
125128
class UnescapePostprocessor(Postprocessor):
126129
""" Restore escaped chars """
127130

‎markdown/treeprocessors.py‎

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
License: BSD (see LICENSE.md for details).
2020
"""
2121

22+
import re
2223
import xml.etree.ElementTree as etree
2324
from . import util
2425
from . import inlinepatterns
@@ -29,6 +30,7 @@ def build_treeprocessors(md, **kwargs):
2930
treeprocessors = util.Registry()
3031
treeprocessors.register(InlineProcessor(md), 'inline', 20)
3132
treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10)
33+
treeprocessors.register(UnescapeTreeprocessor(md), 'unescape', 0)
3234
return treeprocessors
3335

3436

@@ -429,3 +431,28 @@ def run(self, root):
429431
# Only prettify code containing text only
430432
if not len(code) and code.text is not None:
431433
code.text = util.AtomicString(code.text.rstrip() + '\n')
434+
435+
436+
class UnescapeTreeprocessor(Treeprocessor):
437+
""" Restore escaped chars """
438+
439+
RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX))
440+
441+
def _unescape(self, m):
442+
return chr(int(m.group(1)))
443+
444+
def unescape(self, text):
445+
return self.RE.sub(self._unescape, text)
446+
447+
def run(self, root):
448+
""" Loop over all elements and unescape all text. """
449+
for elem in root.iter():
450+
# Unescape text content
451+
if elem.text and not elem.tag == 'code':
452+
elem.text = self.unescape(elem.text)
453+
# Unescape tail content
454+
if elem.tail:
455+
elem.tail = self.unescape(elem.tail)
456+
# Unescape attribute values
457+
for key, value in elem.items():
458+
elem.set(key, self.unescape(value))

‎tests/basic/backlash-escapes.html‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
<p>Right bracket: ]</p>
1010
<p>Left paren: (</p>
1111
<p>Right paren: )</p>
12-
<p>Greater-than: ></p>
12+
<p>Greater-than: &gt;</p>
1313
<p>Hash: #</p>
1414
<p>Period: .</p>
1515
<p>Bang: !</p>
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Python Markdown
4+
5+
A Python implementation of John Gruber's Markdown.
6+
7+
Documentation: https://python-markdown.github.io/
8+
GitHub: https://github.com/Python-Markdown/markdown/
9+
PyPI: https://pypi.org/project/Markdown/
10+
11+
Started by Manfred Stienstra (http://www.dwerg.net/).
12+
Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
13+
Currently maintained by Waylan Limberg (https://github.com/waylan),
14+
Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
15+
16+
Copyright 2007-2022 The Python Markdown Project (v. 1.7 and later)
17+
Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
18+
Copyright 2004 Manfred Stienstra (the original version)
19+
20+
License: BSD (see LICENSE.md for details).
21+
"""
22+
23+
from markdown.test_tools import TestCase
24+
25+
26+
class TestSmarty(TestCase):
27+
28+
default_kwargs = {'extensions': ['smarty']}
29+
30+
def test_escaped_attr(self):
31+
self.assertMarkdownRenders(
32+
'![x\"x](x)',
33+
'<p><img alt="x&quot;x" src="x" /></p>'
34+
)
35+
36+
# TODO: Move rest of smarty tests here.

0 commit comments

Comments
 (0)