diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 6aa49c3bf6f..e1c749634b7 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -412,7 +412,7 @@ def _parse_sub(source, state, verbose, nested=True): sourcematch = source.match start = source.tell() while True: - itemsappend(_parse(source, state, verbose)) + itemsappend(_parse(source, state, verbose, not nested and not items)) if not sourcematch("|"): break @@ -466,7 +466,7 @@ def _parse_sub_cond(source, state, condgroup, verbose): subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) return subpattern -def _parse(source, state, verbose): +def _parse(source, state, verbose, first=False): # parse a simple pattern subpattern = SubPattern(state) @@ -730,10 +730,9 @@ def _parse(source, state, verbose): state.checklookbehindgroup(condgroup, source) elif char in FLAGS or char == "-": # flags - pos = source.pos flags = _parse_flags(source, state, char) if flags is None: # global flags - if pos != 3: # "(?x" + if not first or subpattern: import warnings warnings.warn( 'Flags not at the start of the expression %s%s' % ( @@ -742,6 +741,8 @@ def _parse(source, state, verbose): ), DeprecationWarning, stacklevel=7 ) + if (state.flags & SRE_FLAG_VERBOSE) and not verbose: + raise Verbose continue add_flags, del_flags = flags group = None @@ -795,9 +796,6 @@ def _parse_flags(source, state, char): msg = "unknown flag" if char.isalpha() else "missing -, : or )" raise source.error(msg, len(char)) if char == ")": - if ((add_flags & SRE_FLAG_VERBOSE) and - not (state.flags & SRE_FLAG_VERBOSE)): - raise Verbose state.flags |= add_flags return None if add_flags & GLOBAL_FLAGS: diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index b945cf094e9..c52417ba000 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1303,32 +1303,43 @@ class ReTests(unittest.TestCase): upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below lower_char = '\u1ea1' # Latin Small Letter A with Dot Below - p = re.compile(upper_char, re.I | re.U) - q = p.match(lower_char) + p = re.compile('.' + upper_char, re.I | re.S) + q = p.match('\n' + lower_char) self.assertTrue(q) - p = re.compile(lower_char, re.I | re.U) - q = p.match(upper_char) + p = re.compile('.' + lower_char, re.I | re.S) + q = p.match('\n' + upper_char) self.assertTrue(q) - p = re.compile('(?i)' + upper_char, re.U) - q = p.match(lower_char) + p = re.compile('(?i).' + upper_char, re.S) + q = p.match('\n' + lower_char) self.assertTrue(q) - p = re.compile('(?i)' + lower_char, re.U) - q = p.match(upper_char) + p = re.compile('(?i).' + lower_char, re.S) + q = p.match('\n' + upper_char) self.assertTrue(q) - p = re.compile('(?iu)' + upper_char) - q = p.match(lower_char) + p = re.compile('(?is).' + upper_char) + q = p.match('\n' + lower_char) self.assertTrue(q) - p = re.compile('(?iu)' + lower_char) - q = p.match(upper_char) + p = re.compile('(?is).' + lower_char) + q = p.match('\n' + upper_char) self.assertTrue(q) - self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char)) - self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char)) + p = re.compile('(?s)(?i).' + upper_char) + q = p.match('\n' + lower_char) + self.assertTrue(q) + + p = re.compile('(?s)(?i).' + lower_char) + q = p.match('\n' + upper_char) + self.assertTrue(q) + + self.assertTrue(re.match('(?ix) ' + upper_char, lower_char)) + self.assertTrue(re.match('(?ix) ' + lower_char, upper_char)) + self.assertTrue(re.match(' (?i) ' + upper_char, lower_char, re.X)) + self.assertTrue(re.match('(?x) (?i) ' + upper_char, lower_char)) + self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char, re.X)) p = upper_char + '(?i)' with self.assertWarns(DeprecationWarning) as warns: @@ -1346,6 +1357,26 @@ class ReTests(unittest.TestCase): 'Flags not at the start of the expression %s (truncated)' % p[:20] ) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.match('(?i) ' + upper_char + ' (?x)', lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.match('^(?i)' + upper_char, lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.match('$|(?i)' + upper_char, lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')', + lower_char)) + with self.assertWarns(DeprecationWarning): + self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')', + lower_char)) + + def test_dollar_matches_twice(self): "$ matches the end of string, and just before the terminating \n" pattern = re.compile('$') diff --git a/Misc/NEWS b/Misc/NEWS index 64974ef2eb2..e7090fc1784 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -36,6 +36,12 @@ Core and Builtins Library ------- +- bpo-30298: Weaken the condition of deprecation warnings for inline modifiers. + Now allowed several subsequential inline modifiers at the start of the + pattern (e.g. ``'(?i)(?s)...'``). In verbose mode whitespaces and comments + now are allowed before and between inline modifiers (e.g. + ``'(?x) (?i) (?s)...'``). + - bpo-29990: Fix range checking in GB18030 decoder. Original patch by Ma Lin. - Revert bpo-26293 for zipfile breakage. See also bpo-29094.