diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index e1c749634b7..608f9a26642 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -404,7 +404,7 @@ def _escape(source, escape, state): pass raise source.error("bad escape %s" % escape, len(escape)) -def _parse_sub(source, state, verbose, nested=True): +def _parse_sub(source, state, verbose, nested): # parse an alternation: a|b|c items = [] @@ -412,7 +412,8 @@ def _parse_sub(source, state, verbose, nested=True): sourcematch = source.match start = source.tell() while True: - itemsappend(_parse(source, state, verbose, not nested and not items)) + itemsappend(_parse(source, state, verbose, nested + 1, + not nested and not items)) if not sourcematch("|"): break @@ -454,10 +455,10 @@ def _parse_sub(source, state, verbose, nested=True): subpattern.append((BRANCH, (None, items))) return subpattern -def _parse_sub_cond(source, state, condgroup, verbose): - item_yes = _parse(source, state, verbose) +def _parse_sub_cond(source, state, condgroup, verbose, nested): + item_yes = _parse(source, state, verbose, nested + 1) if source.match("|"): - item_no = _parse(source, state, verbose) + item_no = _parse(source, state, verbose, nested + 1) if source.next == "|": raise source.error("conditional backref with more than two branches") else: @@ -466,7 +467,7 @@ def _parse_sub_cond(source, state, condgroup, verbose): subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) return subpattern -def _parse(source, state, verbose, first=False): +def _parse(source, state, verbose, nested, first=False): # parse a simple pattern subpattern = SubPattern(state) @@ -692,7 +693,7 @@ def _parse(source, state, verbose, first=False): lookbehindgroups = state.lookbehindgroups if lookbehindgroups is None: state.lookbehindgroups = state.groups - p = _parse_sub(source, state, verbose) + p = _parse_sub(source, state, verbose, nested + 1) if dir < 0: if lookbehindgroups is None: state.lookbehindgroups = None @@ -739,7 +740,7 @@ def _parse(source, state, verbose, first=False): source.string[:20], # truncate long regexes ' (truncated)' if len(source.string) > 20 else '', ), - DeprecationWarning, stacklevel=7 + DeprecationWarning, stacklevel=nested + 6 ) if (state.flags & SRE_FLAG_VERBOSE) and not verbose: raise Verbose @@ -757,11 +758,11 @@ def _parse(source, state, verbose, first=False): except error as err: raise source.error(err.msg, len(name) + 1) from None if condgroup: - p = _parse_sub_cond(source, state, condgroup, verbose) + p = _parse_sub_cond(source, state, condgroup, verbose, nested + 1) else: sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and not (del_flags & SRE_FLAG_VERBOSE)) - p = _parse_sub(source, state, sub_verbose) + p = _parse_sub(source, state, sub_verbose, nested + 1) if not source.match(")"): raise source.error("missing ), unterminated subpattern", source.tell() - start) @@ -851,7 +852,7 @@ def parse(str, flags=0, pattern=None): pattern.str = str try: - p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, False) + p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0) except Verbose: # the VERBOSE flag was switched on inside the pattern. to be # on the safe side, we'll parse the whole thing again... @@ -859,7 +860,7 @@ def parse(str, flags=0, pattern=None): pattern.flags = flags | SRE_FLAG_VERBOSE pattern.str = str source.seek(0) - p = _parse_sub(source, pattern, True, False) + p = _parse_sub(source, pattern, True, 0) p.pattern.flags = fix_flags(str, p.pattern.flags) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index c52417ba000..e88d0b3dcf2 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1348,6 +1348,7 @@ class ReTests(unittest.TestCase): str(warns.warnings[0].message), 'Flags not at the start of the expression %s' % p ) + self.assertEqual(warns.warnings[0].filename, __file__) p = upper_char + '(?i)%s' % ('.?' * 100) with self.assertWarns(DeprecationWarning) as warns: @@ -1356,6 +1357,7 @@ class ReTests(unittest.TestCase): str(warns.warnings[0].message), 'Flags not at the start of the expression %s (truncated)' % p[:20] ) + self.assertEqual(warns.warnings[0].filename, __file__) with self.assertWarns(DeprecationWarning): self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char)) @@ -1367,14 +1369,23 @@ class ReTests(unittest.TestCase): self.assertTrue(re.match('^(?i)' + upper_char, lower_char)) with self.assertWarns(DeprecationWarning): self.assertTrue(re.match('$|(?i)' + upper_char, lower_char)) - with self.assertWarns(DeprecationWarning): + with self.assertWarns(DeprecationWarning) as warns: self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char)) - with self.assertWarns(DeprecationWarning): + self.assertRegex(str(warns.warnings[0].message), + 'Flags not at the start') + self.assertEqual(warns.warnings[0].filename, __file__) + with self.assertWarns(DeprecationWarning) as warns: self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')', lower_char)) - with self.assertWarns(DeprecationWarning): + self.assertRegex(str(warns.warnings[0].message), + 'Flags not at the start') + self.assertEqual(warns.warnings[0].filename, __file__) + with self.assertWarns(DeprecationWarning) as warns: self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')', lower_char)) + self.assertRegex(str(warns.warnings[0].message), + 'Flags not at the start') + self.assertEqual(warns.warnings[0].filename, __file__) def test_dollar_matches_twice(self): diff --git a/Misc/NEWS b/Misc/NEWS index bfb14c13653..567f40976e2 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -36,6 +36,10 @@ Core and Builtins Library ------- +- bpo-30375: Warnings emitted when compile a regular expression now always + point to the line in the user code. Previously they could point into inners + of the re module if emitted from inside of groups or conditionals. + - bpo-30048: Fixed ``Task.cancel()`` can be ignored when the task is running coroutine and the coroutine returned without any more ``await``.