mirror of
https://github.com/python/cpython.git
synced 2025-11-09 14:06:30 +00:00
#10713: Improve documentation for \b and \B and add a few tests. Initial patch and tests by Martin Pool.
This commit is contained in:
parent
0555cde98d
commit
38ae5b2392
2 changed files with 36 additions and 5 deletions
|
|
@ -325,14 +325,19 @@ the second character. For example, ``\$`` matches the character ``'$'``.
|
||||||
Matches the empty string, but only at the beginning or end of a word. A word is
|
Matches the empty string, but only at the beginning or end of a word. A word is
|
||||||
defined as a sequence of alphanumeric or underscore characters, so the end of a
|
defined as a sequence of alphanumeric or underscore characters, so the end of a
|
||||||
word is indicated by whitespace or a non-alphanumeric, non-underscore character.
|
word is indicated by whitespace or a non-alphanumeric, non-underscore character.
|
||||||
Note that ``\b`` is defined as the boundary between ``\w`` and ``\W``, so the
|
Note that formally, ``\b`` is defined as the boundary between a ``\w`` and
|
||||||
precise set of characters deemed to be alphanumeric depends on the values of the
|
a ``\W`` character (or vice versa), or between ``\w`` and the beginning/end
|
||||||
``UNICODE`` and ``LOCALE`` flags. Inside a character range, ``\b`` represents
|
of the string, so the precise set of characters deemed to be alphanumeric
|
||||||
the backspace character, for compatibility with Python's string literals.
|
depends on the values of the ``UNICODE`` and ``LOCALE`` flags.
|
||||||
|
For example, ``r'\bfoo\b'`` matches ``'foo'``, ``'foo.'``, ``'(foo)'``,
|
||||||
|
``'bar foo baz'`` but not ``'foobar'`` or ``'foo3'``.
|
||||||
|
Inside a character range, ``\b`` represents the backspace character, for compatibility with Python's string literals.
|
||||||
|
|
||||||
``\B``
|
``\B``
|
||||||
Matches the empty string, but only when it is *not* at the beginning or end of a
|
Matches the empty string, but only when it is *not* at the beginning or end of a
|
||||||
word. This is just the opposite of ``\b``, so is also subject to the settings
|
word. This means that ``r'py\B'`` matches ``'python'``, ``'py3'``, ``'py2'``,
|
||||||
|
but not ``'py'``, ``'py.'``, or ``'py!'``.
|
||||||
|
``\B`` is just the opposite of ``\b``, so is also subject to the settings
|
||||||
of ``LOCALE`` and ``UNICODE``.
|
of ``LOCALE`` and ``UNICODE``.
|
||||||
|
|
||||||
``\d``
|
``\d``
|
||||||
|
|
|
||||||
|
|
@ -373,6 +373,32 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(re.search(r"\d\D\w\W\s\S",
|
self.assertEqual(re.search(r"\d\D\w\W\s\S",
|
||||||
"1aa! a", re.UNICODE).group(0), "1aa! a")
|
"1aa! a", re.UNICODE).group(0), "1aa! a")
|
||||||
|
|
||||||
|
def test_string_boundaries(self):
|
||||||
|
# See http://bugs.python.org/issue10713
|
||||||
|
self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
|
||||||
|
"abc")
|
||||||
|
# There's a word boundary at the start of a string.
|
||||||
|
self.assertTrue(re.match(r"\b", "abc"))
|
||||||
|
# A non-empty string includes a non-boundary zero-length match.
|
||||||
|
self.assertTrue(re.search(r"\B", "abc"))
|
||||||
|
# There is no non-boundary match at the start of a string.
|
||||||
|
self.assertFalse(re.match(r"\B", "abc"))
|
||||||
|
# However, an empty string contains no word boundaries, and also no
|
||||||
|
# non-boundaries.
|
||||||
|
self.assertEqual(re.search(r"\B", ""), None)
|
||||||
|
# This one is questionable and different from the perlre behaviour,
|
||||||
|
# but describes current behavior.
|
||||||
|
self.assertEqual(re.search(r"\b", ""), None)
|
||||||
|
# A single word-character string has two boundaries, but no
|
||||||
|
# non-boundary gaps.
|
||||||
|
self.assertEqual(len(re.findall(r"\b", "a")), 2)
|
||||||
|
self.assertEqual(len(re.findall(r"\B", "a")), 0)
|
||||||
|
# If there are no words, there are no boundaries
|
||||||
|
self.assertEqual(len(re.findall(r"\b", " ")), 0)
|
||||||
|
self.assertEqual(len(re.findall(r"\b", " ")), 0)
|
||||||
|
# Can match around the whitespace.
|
||||||
|
self.assertEqual(len(re.findall(r"\B", " ")), 2)
|
||||||
|
|
||||||
def test_bigcharset(self):
|
def test_bigcharset(self):
|
||||||
self.assertEqual(re.match(u"([\u2222\u2223])",
|
self.assertEqual(re.match(u"([\u2222\u2223])",
|
||||||
u"\u2222").group(1), u"\u2222")
|
u"\u2222").group(1), u"\u2222")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue