mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00
gh-124130: Fix a bug in matching regular expression \B in empty string (GH-127007)
This commit is contained in:
parent
8d16919a06
commit
a3711d1541
5 changed files with 15 additions and 25 deletions
|
@ -572,11 +572,8 @@ character ``'$'``.
|
|||
Word boundaries are determined by the current locale
|
||||
if the :py:const:`~re.LOCALE` flag is used.
|
||||
|
||||
.. note::
|
||||
|
||||
Note that ``\B`` does not match an empty string, which differs from
|
||||
RE implementations in other programming languages such as Perl.
|
||||
This behavior is kept for compatibility reasons.
|
||||
.. versionchanged:: next
|
||||
``\B`` now matches empty input string.
|
||||
|
||||
.. index:: single: \d; in regular expressions
|
||||
|
||||
|
|
|
@ -245,6 +245,10 @@ Other language changes
|
|||
making it a :term:`generic type`.
|
||||
(Contributed by Brian Schubert in :gh:`126012`.)
|
||||
|
||||
* ``\B`` in :mod:`regular expression <re>` now matches empty input string.
|
||||
Now it is always the opposite of ``\b``.
|
||||
(Contributed by Serhiy Storchaka in :gh:`124130`.)
|
||||
|
||||
* iOS and macOS apps can now be configured to redirect ``stdout`` and
|
||||
``stderr`` content to the system log. (Contributed by Russell Keith-Magee in
|
||||
:gh:`127592`.)
|
||||
|
|
|
@ -978,18 +978,15 @@ class ReTests(unittest.TestCase):
|
|||
self.assertIsNone(re.fullmatch(br".+\B", b"abc", re.LOCALE))
|
||||
self.assertIsNone(re.fullmatch(r".+\B", "ьюя"))
|
||||
self.assertTrue(re.fullmatch(r".+\B", "ьюя", re.ASCII))
|
||||
# However, an empty string contains no word boundaries, and also no
|
||||
# non-boundaries.
|
||||
# However, an empty string contains no word boundaries.
|
||||
self.assertIsNone(re.search(r"\b", ""))
|
||||
self.assertIsNone(re.search(r"\b", "", re.ASCII))
|
||||
self.assertIsNone(re.search(br"\b", b""))
|
||||
self.assertIsNone(re.search(br"\b", b"", re.LOCALE))
|
||||
# This one is questionable and different from the perlre behaviour,
|
||||
# but describes current behavior.
|
||||
self.assertIsNone(re.search(r"\B", ""))
|
||||
self.assertIsNone(re.search(r"\B", "", re.ASCII))
|
||||
self.assertIsNone(re.search(br"\B", b""))
|
||||
self.assertIsNone(re.search(br"\B", b"", re.LOCALE))
|
||||
self.assertTrue(re.search(r"\B", ""))
|
||||
self.assertTrue(re.search(r"\B", "", re.ASCII))
|
||||
self.assertTrue(re.search(br"\B", b""))
|
||||
self.assertTrue(re.search(br"\B", b"", re.LOCALE))
|
||||
# A single word-character string has two boundaries, but no
|
||||
# non-boundary gaps.
|
||||
self.assertEqual(len(re.findall(r"\b", "a")), 2)
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
Fix a bug in matching regular expression ``\B`` in empty input string.
|
||||
Now it is always the opposite of ``\b``.
|
||||
To get an old behavior, use ``(?!\A\Z)\B``.
|
||||
To get a new behavior in old Python versions, use ``(?!\b)``.
|
|
@ -42,8 +42,6 @@ SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
|
|||
return ((void*) ptr == state->end);
|
||||
|
||||
case SRE_AT_BOUNDARY:
|
||||
if (state->beginning == state->end)
|
||||
return 0;
|
||||
thatp = ((void*) ptr > state->beginning) ?
|
||||
SRE_IS_WORD((int) ptr[-1]) : 0;
|
||||
thisp = ((void*) ptr < state->end) ?
|
||||
|
@ -51,8 +49,6 @@ SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
|
|||
return thisp != thatp;
|
||||
|
||||
case SRE_AT_NON_BOUNDARY:
|
||||
if (state->beginning == state->end)
|
||||
return 0;
|
||||
thatp = ((void*) ptr > state->beginning) ?
|
||||
SRE_IS_WORD((int) ptr[-1]) : 0;
|
||||
thisp = ((void*) ptr < state->end) ?
|
||||
|
@ -60,8 +56,6 @@ SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
|
|||
return thisp == thatp;
|
||||
|
||||
case SRE_AT_LOC_BOUNDARY:
|
||||
if (state->beginning == state->end)
|
||||
return 0;
|
||||
thatp = ((void*) ptr > state->beginning) ?
|
||||
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
|
||||
thisp = ((void*) ptr < state->end) ?
|
||||
|
@ -69,8 +63,6 @@ SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
|
|||
return thisp != thatp;
|
||||
|
||||
case SRE_AT_LOC_NON_BOUNDARY:
|
||||
if (state->beginning == state->end)
|
||||
return 0;
|
||||
thatp = ((void*) ptr > state->beginning) ?
|
||||
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
|
||||
thisp = ((void*) ptr < state->end) ?
|
||||
|
@ -78,8 +70,6 @@ SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
|
|||
return thisp == thatp;
|
||||
|
||||
case SRE_AT_UNI_BOUNDARY:
|
||||
if (state->beginning == state->end)
|
||||
return 0;
|
||||
thatp = ((void*) ptr > state->beginning) ?
|
||||
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
|
||||
thisp = ((void*) ptr < state->end) ?
|
||||
|
@ -87,8 +77,6 @@ SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
|
|||
return thisp != thatp;
|
||||
|
||||
case SRE_AT_UNI_NON_BOUNDARY:
|
||||
if (state->beginning == state->end)
|
||||
return 0;
|
||||
thatp = ((void*) ptr > state->beginning) ?
|
||||
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
|
||||
thisp = ((void*) ptr < state->end) ?
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue