changeset: 104650:cea983246919 branch: 3.6 parent: 104645:3937502c149d user: Serhiy Storchaka date: Sun Oct 23 12:11:19 2016 +0300 files: Lib/sre_parse.py Lib/test/test_re.py Misc/NEWS description: Issue #25953: re.sub() now raises an error for invalid numerical group reference in replacement template even if the pattern is not found in the string. Error message for invalid group reference now includes the group index and the position of the reference. Based on patch by SilentGhost. diff -r 3937502c149d -r cea983246919 Lib/sre_parse.py --- a/Lib/sre_parse.py Sat Oct 22 07:56:58 2016 -0700 +++ b/Lib/sre_parse.py Sun Oct 23 12:11:19 2016 +0300 @@ -395,7 +395,7 @@ len(escape)) state.checklookbehindgroup(group, source) return GROUPREF, group - raise source.error("invalid group reference", len(escape)) + raise source.error("invalid group reference %d" % group, len(escape) - 1) if len(escape) == 2: if c in ASCIILETTERS: raise source.error("bad escape %s" % escape, len(escape)) @@ -725,8 +725,8 @@ raise source.error("bad group number", len(condname) + 1) if condgroup >= MAXGROUPS: - raise source.error("invalid group reference", - len(condname) + 1) + msg = "invalid group reference %d" % condgroup + raise source.error(msg, len(condname) + 1) state.checklookbehindgroup(condgroup, source) elif char in FLAGS or char == "-": # flags @@ -883,7 +883,9 @@ literals = [] literal = [] lappend = literal.append - def addgroup(index): + def addgroup(index, pos): + if index > pattern.groups: + raise s.error("invalid group reference %d" % index, pos) if literal: literals.append(''.join(literal)) del literal[:] @@ -916,9 +918,9 @@ raise s.error("bad character in group name %r" % name, len(name) + 1) from None if index >= MAXGROUPS: - raise s.error("invalid group reference", + raise s.error("invalid group reference %d" % index, len(name) + 1) - addgroup(index) + addgroup(index, len(name) + 1) elif c == "0": if s.next in OCTDIGITS: this += sget() @@ -939,7 +941,7 @@ 'range 0-0o377' % this, len(this)) lappend(chr(c)) if not isoctal: - addgroup(int(this[1:])) + addgroup(int(this[1:]), len(this) - 1) else: try: this = chr(ESCAPES[this][1]) @@ -966,5 +968,5 @@ for index, group in groups: literals[index] = g(group) or empty except IndexError: - raise error("invalid group reference") + raise error("invalid group reference %d" % index) return empty.join(literals) diff -r 3937502c149d -r cea983246919 Lib/test/test_re.py --- a/Lib/test/test_re.py Sat Oct 22 07:56:58 2016 -0700 +++ b/Lib/test/test_re.py Sun Oct 23 12:11:19 2016 +0300 @@ -5,7 +5,6 @@ import re from re import Scanner import sre_compile -import sre_constants import sys import string import traceback @@ -186,18 +185,19 @@ r'octal escape value \777 outside of ' r'range 0-0o377', 0) - self.checkTemplateError('x', r'\1', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\8', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\9', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\11', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\18', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\90', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\99', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\118', 'x', 'invalid group reference') # r'\11' + '8' - self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\181', 'x', 'invalid group reference') # r'\18' + '1' - self.checkTemplateError('x', r'\800', 'x', 'invalid group reference') # r'\80' + '0' + self.checkTemplateError('x', r'\1', 'x', 'invalid group reference 1', 1) + self.checkTemplateError('x', r'\8', 'x', 'invalid group reference 8', 1) + self.checkTemplateError('x', r'\9', 'x', 'invalid group reference 9', 1) + self.checkTemplateError('x', r'\11', 'x', 'invalid group reference 11', 1) + self.checkTemplateError('x', r'\18', 'x', 'invalid group reference 18', 1) + self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference 1', 1) + self.checkTemplateError('x', r'\90', 'x', 'invalid group reference 90', 1) + self.checkTemplateError('x', r'\99', 'x', 'invalid group reference 99', 1) + self.checkTemplateError('x', r'\118', 'x', 'invalid group reference 11', 1) + self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference 11', 1) + self.checkTemplateError('x', r'\181', 'x', 'invalid group reference 18', 1) + self.checkTemplateError('x', r'\800', 'x', 'invalid group reference 80', 1) + self.checkTemplateError('x', r'\8', '', 'invalid group reference 8', 1) # in python2.3 (etc), these loop endlessly in sre_parser.py self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x') @@ -271,9 +271,9 @@ self.checkTemplateError('(?Px)', r'\g<1a1>', 'xx', "bad character in group name '1a1'", 3) self.checkTemplateError('(?Px)', r'\g<2>', 'xx', - 'invalid group reference') + 'invalid group reference 2', 3) self.checkTemplateError('(?Px)', r'\2', 'xx', - 'invalid group reference') + 'invalid group reference 2', 1) with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"): re.sub('(?Px)', r'\g', 'xx') self.assertEqual(re.sub('(?Px)|(?Py)', r'\g', 'xx'), '') @@ -558,10 +558,11 @@ 'two branches', 10) def test_re_groupref_overflow(self): - self.checkTemplateError('()', r'\g<%s>' % sre_constants.MAXGROUPS, 'xx', - 'invalid group reference', 3) - self.checkPatternError(r'(?P)(?(%d))' % sre_constants.MAXGROUPS, - 'invalid group reference', 10) + from sre_constants import MAXGROUPS + self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx', + 'invalid group reference %d' % MAXGROUPS, 3) + self.checkPatternError(r'(?P)(?(%d))' % MAXGROUPS, + 'invalid group reference %d' % MAXGROUPS, 10) def test_re_groupref(self): self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(), @@ -1007,7 +1008,7 @@ self.checkPatternError(r"\567", r'octal escape value \567 outside of ' r'range 0-0o377', 0) - self.checkPatternError(r"\911", 'invalid group reference', 0) + self.checkPatternError(r"\911", 'invalid group reference 91', 1) self.checkPatternError(r"\x1", r'incomplete escape \x1', 0) self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0) self.checkPatternError(r"\u123", r'incomplete escape \u123', 0) @@ -1061,7 +1062,7 @@ self.checkPatternError(br"\567", r'octal escape value \567 outside of ' r'range 0-0o377', 0) - self.checkPatternError(br"\911", 'invalid group reference', 0) + self.checkPatternError(br"\911", 'invalid group reference 91', 1) self.checkPatternError(br"\x1", r'incomplete escape \x1', 0) self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0) diff -r 3937502c149d -r cea983246919 Misc/NEWS --- a/Misc/NEWS Sat Oct 22 07:56:58 2016 -0700 +++ b/Misc/NEWS Sun Oct 23 12:11:19 2016 +0300 @@ -23,6 +23,12 @@ Library ------- +- Issue #25953: re.sub() now raises an error for invalid numerical group + reference in replacement template even if the pattern is not found in + the string. Error message for invalid group reference now includes the + group index and the position of the reference. + Based on patch by SilentGhost. + - Issue #18219: Optimize csv.DictWriter for large number of columns. Patch by Mariatta Wijaya.