gh-56166: Deprecate passing confusing positional arguments in re functions (#107778)

Deprecate passing optional arguments maxsplit, count and flags in
module-level functions re.split(), re.sub() and re.subn() as positional.
They should only be passed by keyword.
This commit is contained in:
Serhiy Storchaka 2023-08-16 23:35:35 +03:00 committed by GitHub
parent fb8fe377c4
commit 882cb79afa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 155 additions and 21 deletions

View file

@ -898,7 +898,7 @@ Functions
['Words', 'words', 'words', ''] ['Words', 'words', 'words', '']
>>> re.split(r'(\W+)', 'Words, words, words.') >>> re.split(r'(\W+)', 'Words, words, words.')
['Words', ', ', 'words', ', ', 'words', '.', ''] ['Words', ', ', 'words', ', ', 'words', '.', '']
>>> re.split(r'\W+', 'Words, words, words.', 1) >>> re.split(r'\W+', 'Words, words, words.', maxsplit=1)
['Words', 'words, words.'] ['Words', 'words, words.']
>>> re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE) >>> re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)
['0', '3', '9'] ['0', '3', '9']
@ -929,6 +929,11 @@ Functions
.. versionchanged:: 3.7 .. versionchanged:: 3.7
Added support of splitting on a pattern that could match an empty string. Added support of splitting on a pattern that could match an empty string.
.. deprecated:: 3.13
Passing *maxsplit* and *flags* as positional arguments is deprecated.
In future Python versions they will be
:ref:`keyword-only parameters <keyword-only_parameter>`.
.. function:: findall(pattern, string, flags=0) .. function:: findall(pattern, string, flags=0)
@ -1027,8 +1032,6 @@ Functions
.. versionchanged:: 3.7 .. versionchanged:: 3.7
Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter
now are errors. now are errors.
.. versionchanged:: 3.7
Empty matches for the pattern are replaced when adjacent to a previous Empty matches for the pattern are replaced when adjacent to a previous
non-empty match. non-empty match.
@ -1037,18 +1040,17 @@ Functions
In :class:`bytes` replacement strings, group *name* can only contain bytes In :class:`bytes` replacement strings, group *name* can only contain bytes
in the ASCII range (``b'\x00'``-``b'\x7f'``). in the ASCII range (``b'\x00'``-``b'\x7f'``).
.. deprecated:: 3.13
Passing *count* and *flags* as positional arguments is deprecated.
In future Python versions they will be
:ref:`keyword-only parameters <keyword-only_parameter>`.
.. function:: subn(pattern, repl, string, count=0, flags=0) .. function:: subn(pattern, repl, string, count=0, flags=0)
Perform the same operation as :func:`sub`, but return a tuple ``(new_string, Perform the same operation as :func:`sub`, but return a tuple ``(new_string,
number_of_subs_made)``. number_of_subs_made)``.
.. versionchanged:: 3.1
Added the optional flags argument.
.. versionchanged:: 3.5
Unmatched groups are replaced with an empty string.
.. function:: escape(pattern) .. function:: escape(pattern)
@ -1656,7 +1658,7 @@ because the address has spaces, our splitting pattern, in it:
.. doctest:: .. doctest::
:options: +NORMALIZE_WHITESPACE :options: +NORMALIZE_WHITESPACE
>>> [re.split(":? ", entry, 3) for entry in entries] >>> [re.split(":? ", entry, maxsplit=3) for entry in entries]
[['Ross', 'McFluff', '834.345.1254', '155 Elm Street'], [['Ross', 'McFluff', '834.345.1254', '155 Elm Street'],
['Ronald', 'Heathmore', '892.345.3428', '436 Finley Avenue'], ['Ronald', 'Heathmore', '892.345.3428', '436 Finley Avenue'],
['Frank', 'Burger', '925.541.7625', '662 South Dogwood Way'], ['Frank', 'Burger', '925.541.7625', '662 South Dogwood Way'],
@ -1669,7 +1671,7 @@ house number from the street name:
.. doctest:: .. doctest::
:options: +NORMALIZE_WHITESPACE :options: +NORMALIZE_WHITESPACE
>>> [re.split(":? ", entry, 4) for entry in entries] >>> [re.split(":? ", entry, maxsplit=4) for entry in entries]
[['Ross', 'McFluff', '834.345.1254', '155', 'Elm Street'], [['Ross', 'McFluff', '834.345.1254', '155', 'Elm Street'],
['Ronald', 'Heathmore', '892.345.3428', '436', 'Finley Avenue'], ['Ronald', 'Heathmore', '892.345.3428', '436', 'Finley Avenue'],
['Frank', 'Burger', '925.541.7625', '662', 'South Dogwood Way'], ['Frank', 'Burger', '925.541.7625', '662', 'South Dogwood Way'],

View file

@ -832,6 +832,13 @@ Porting to Python 3.13
Deprecated Deprecated
---------- ----------
* Passing optional arguments *maxsplit*, *count* and *flags* in module-level
functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional
arguments is now deprecated.
In future Python versions these parameters will be
:ref:`keyword-only <keyword-only_parameter>`.
(Contributed by Serhiy Storchaka in :gh:`56166`.)
* Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly * Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly
the :c:type:`wchar_t` type instead. Since Python 3.3, ``Py_UNICODE`` and the :c:type:`wchar_t` type instead. Since Python 3.3, ``Py_UNICODE`` and
``PY_UNICODE_TYPE`` are just aliases to :c:type:`wchar_t`. ``PY_UNICODE_TYPE`` are just aliases to :c:type:`wchar_t`.

View file

@ -175,16 +175,39 @@ def search(pattern, string, flags=0):
a Match object, or None if no match was found.""" a Match object, or None if no match was found."""
return _compile(pattern, flags).search(string) return _compile(pattern, flags).search(string)
def sub(pattern, repl, string, count=0, flags=0): class _ZeroSentinel(int):
pass
_zero_sentinel = _ZeroSentinel()
def sub(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return the string obtained by replacing the leftmost """Return the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in string by the non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable; replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is if a string, backslash escapes in it are processed. If it is
a callable, it's passed the Match object and must return a callable, it's passed the Match object and must return
a replacement string to be used.""" a replacement string to be used."""
return _compile(pattern, flags).sub(repl, string, count) if args:
if count is not _zero_sentinel:
raise TypeError("sub() got multiple values for argument 'count'")
count, *args = args
if args:
if flags is not _zero_sentinel:
raise TypeError("sub() got multiple values for argument 'flags'")
flags, *args = args
if args:
raise TypeError("sub() takes from 3 to 5 positional arguments "
"but %d were given" % (5 + len(args)))
def subn(pattern, repl, string, count=0, flags=0): import warnings
warnings.warn(
"'count' is passed as positional argument",
DeprecationWarning, stacklevel=2
)
return _compile(pattern, flags).sub(repl, string, count)
sub.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return a 2-tuple containing (new_string, number). """Return a 2-tuple containing (new_string, number).
new_string is the string obtained by replacing the leftmost new_string is the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in the source non-overlapping occurrences of the pattern in the source
@ -193,9 +216,28 @@ def subn(pattern, repl, string, count=0, flags=0):
callable; if a string, backslash escapes in it are processed. callable; if a string, backslash escapes in it are processed.
If it is a callable, it's passed the Match object and must If it is a callable, it's passed the Match object and must
return a replacement string to be used.""" return a replacement string to be used."""
return _compile(pattern, flags).subn(repl, string, count) if args:
if count is not _zero_sentinel:
raise TypeError("subn() got multiple values for argument 'count'")
count, *args = args
if args:
if flags is not _zero_sentinel:
raise TypeError("subn() got multiple values for argument 'flags'")
flags, *args = args
if args:
raise TypeError("subn() takes from 3 to 5 positional arguments "
"but %d were given" % (5 + len(args)))
def split(pattern, string, maxsplit=0, flags=0): import warnings
warnings.warn(
"'count' is passed as positional argument",
DeprecationWarning, stacklevel=2
)
return _compile(pattern, flags).subn(repl, string, count)
subn.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel):
"""Split the source string by the occurrences of the pattern, """Split the source string by the occurrences of the pattern,
returning a list containing the resulting substrings. If returning a list containing the resulting substrings. If
capturing parentheses are used in pattern, then the text of all capturing parentheses are used in pattern, then the text of all
@ -203,7 +245,26 @@ def split(pattern, string, maxsplit=0, flags=0):
list. If maxsplit is nonzero, at most maxsplit splits occur, list. If maxsplit is nonzero, at most maxsplit splits occur,
and the remainder of the string is returned as the final element and the remainder of the string is returned as the final element
of the list.""" of the list."""
if args:
if maxsplit is not _zero_sentinel:
raise TypeError("split() got multiple values for argument 'maxsplit'")
maxsplit, *args = args
if args:
if flags is not _zero_sentinel:
raise TypeError("split() got multiple values for argument 'flags'")
flags, *args = args
if args:
raise TypeError("split() takes from 2 to 4 positional arguments "
"but %d were given" % (4 + len(args)))
import warnings
warnings.warn(
"'maxsplit' is passed as positional argument",
DeprecationWarning, stacklevel=2
)
return _compile(pattern, flags).split(string, maxsplit) return _compile(pattern, flags).split(string, maxsplit)
split.__text_signature__ = '(pattern, string, maxsplit=0, flags=0)'
def findall(pattern, string, flags=0): def findall(pattern, string, flags=0):
"""Return a list of all non-overlapping matches in the string. """Return a list of all non-overlapping matches in the string.

View file

@ -127,8 +127,10 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
'9.3 -3 24x100y') '9.3 -3 24x100y')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), with self.assertWarns(DeprecationWarning) as w:
'9.3 -3 23x99y') self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
'9.3 -3 23x99y')
self.assertEqual(w.filename, __file__)
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3), self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
'9.3 -3 23x99y') '9.3 -3 23x99y')
@ -235,9 +237,42 @@ class ReTests(unittest.TestCase):
def test_qualified_re_sub(self): def test_qualified_re_sub(self):
self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb') self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa') with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
self.assertEqual(w.filename, __file__)
self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa') self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')
with self.assertRaisesRegex(TypeError,
r"sub\(\) got multiple values for argument 'count'"):
re.sub('a', 'b', 'aaaaa', 1, count=1)
with self.assertRaisesRegex(TypeError,
r"sub\(\) got multiple values for argument 'flags'"):
re.sub('a', 'b', 'aaaaa', 1, 0, flags=0)
with self.assertRaisesRegex(TypeError,
r"sub\(\) takes from 3 to 5 positional arguments but 6 "
r"were given"):
re.sub('a', 'b', 'aaaaa', 1, 0, 0)
def test_misuse_flags(self):
with self.assertWarns(DeprecationWarning) as w:
result = re.sub('a', 'b', 'aaaaa', re.I)
self.assertEqual(result, re.sub('a', 'b', 'aaaaa', count=int(re.I)))
self.assertEqual(str(w.warning),
"'count' is passed as positional argument")
self.assertEqual(w.filename, __file__)
with self.assertWarns(DeprecationWarning) as w:
result = re.subn("b*", "x", "xyz", re.I)
self.assertEqual(result, re.subn("b*", "x", "xyz", count=int(re.I)))
self.assertEqual(str(w.warning),
"'count' is passed as positional argument")
self.assertEqual(w.filename, __file__)
with self.assertWarns(DeprecationWarning) as w:
result = re.split(":", ":a:b::c", re.I)
self.assertEqual(result, re.split(":", ":a:b::c", maxsplit=int(re.I)))
self.assertEqual(str(w.warning),
"'maxsplit' is passed as positional argument")
self.assertEqual(w.filename, __file__)
def test_bug_114660(self): def test_bug_114660(self):
self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'), self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
'hello there') 'hello there')
@ -344,9 +379,22 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0)) self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
self.assertEqual(w.filename, __file__)
self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2)) self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))
with self.assertRaisesRegex(TypeError,
r"subn\(\) got multiple values for argument 'count'"):
re.subn('a', 'b', 'aaaaa', 1, count=1)
with self.assertRaisesRegex(TypeError,
r"subn\(\) got multiple values for argument 'flags'"):
re.subn('a', 'b', 'aaaaa', 1, 0, flags=0)
with self.assertRaisesRegex(TypeError,
r"subn\(\) takes from 3 to 5 positional arguments but 6 "
r"were given"):
re.subn('a', 'b', 'aaaaa', 1, 0, 0)
def test_re_split(self): def test_re_split(self):
for string in ":a:b::c", S(":a:b::c"): for string in ":a:b::c", S(":a:b::c"):
self.assertTypedEqual(re.split(":", string), self.assertTypedEqual(re.split(":", string),
@ -401,7 +449,9 @@ class ReTests(unittest.TestCase):
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected) self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
def test_qualified_re_split(self): def test_qualified_re_split(self):
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
self.assertEqual(w.filename, __file__)
self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c']) self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d']) self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2), self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
@ -411,6 +461,17 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2), self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
['', ':', '', '', 'a:b::c']) ['', ':', '', '', 'a:b::c'])
with self.assertRaisesRegex(TypeError,
r"split\(\) got multiple values for argument 'maxsplit'"):
re.split(":", ":a:b::c", 2, maxsplit=2)
with self.assertRaisesRegex(TypeError,
r"split\(\) got multiple values for argument 'flags'"):
re.split(":", ":a:b::c", 2, 0, flags=0)
with self.assertRaisesRegex(TypeError,
r"split\(\) takes from 2 to 4 positional arguments but 5 "
r"were given"):
re.split(":", ":a:b::c", 2, 0, 0)
def test_re_findall(self): def test_re_findall(self):
self.assertEqual(re.findall(":+", "abc"), []) self.assertEqual(re.findall(":+", "abc"), [])
for string in "a:b::c:::d", S("a:b::c:::d"): for string in "a:b::c:::d", S("a:b::c:::d"):

View file

@ -0,0 +1,3 @@
Deprecate passing optional arguments *maxsplit*, *count* and *flags* in
module-level functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional.
They should only be passed by keyword.