bpo-30397: Add re.Pattern and re.Match. (#1646)

This commit is contained in:
Serhiy Storchaka 2017-10-04 20:09:49 +03:00 committed by GitHub
parent 8d5a3aad2f
commit 0b5e61ddca
13 changed files with 120 additions and 107 deletions

View file

@ -402,7 +402,7 @@ should store the result in a variable for later use. ::
>>> m = p.match('tempo') >>> m = p.match('tempo')
>>> m #doctest: +ELLIPSIS >>> m #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(0, 5), match='tempo'> <re.Match object; span=(0, 5), match='tempo'>
Now you can query the :ref:`match object <match-objects>` for information Now you can query the :ref:`match object <match-objects>` for information
about the matching string. :ref:`match object <match-objects>` instances about the matching string. :ref:`match object <match-objects>` instances
@ -441,7 +441,7 @@ case. ::
>>> print(p.match('::: message')) >>> print(p.match('::: message'))
None None
>>> m = p.search('::: message'); print(m) #doctest: +ELLIPSIS >>> m = p.search('::: message'); print(m) #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(4, 11), match='message'> <re.Match object; span=(4, 11), match='message'>
>>> m.group() >>> m.group()
'message' 'message'
>>> m.span() >>> m.span()
@ -493,7 +493,7 @@ the RE string added as the first argument, and still return either ``None`` or a
>>> print(re.match(r'From\s+', 'Fromage amk')) >>> print(re.match(r'From\s+', 'Fromage amk'))
None None
>>> re.match(r'From\s+', 'From amk Thu May 14 19:12:10 1998') #doctest: +ELLIPSIS >>> re.match(r'From\s+', 'From amk Thu May 14 19:12:10 1998') #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(0, 5), match='From '> <re.Match object; span=(0, 5), match='From '>
Under the hood, these functions simply create a pattern object for you Under the hood, these functions simply create a pattern object for you
and call the appropriate method on it. They also store the compiled and call the appropriate method on it. They also store the compiled
@ -685,7 +685,7 @@ given location, they can obviously be matched an infinite number of times.
line, the RE to use is ``^From``. :: line, the RE to use is ``^From``. ::
>>> print(re.search('^From', 'From Here to Eternity')) #doctest: +ELLIPSIS >>> print(re.search('^From', 'From Here to Eternity')) #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(0, 4), match='From'> <re.Match object; span=(0, 4), match='From'>
>>> print(re.search('^From', 'Reciting From Memory')) >>> print(re.search('^From', 'Reciting From Memory'))
None None
@ -697,11 +697,11 @@ given location, they can obviously be matched an infinite number of times.
or any location followed by a newline character. :: or any location followed by a newline character. ::
>>> print(re.search('}$', '{block}')) #doctest: +ELLIPSIS >>> print(re.search('}$', '{block}')) #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(6, 7), match='}'> <re.Match object; span=(6, 7), match='}'>
>>> print(re.search('}$', '{block} ')) >>> print(re.search('}$', '{block} '))
None None
>>> print(re.search('}$', '{block}\n')) #doctest: +ELLIPSIS >>> print(re.search('}$', '{block}\n')) #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(6, 7), match='}'> <re.Match object; span=(6, 7), match='}'>
To match a literal ``'$'``, use ``\$`` or enclose it inside a character class, To match a literal ``'$'``, use ``\$`` or enclose it inside a character class,
as in ``[$]``. as in ``[$]``.
@ -726,7 +726,7 @@ given location, they can obviously be matched an infinite number of times.
>>> p = re.compile(r'\bclass\b') >>> p = re.compile(r'\bclass\b')
>>> print(p.search('no class at all')) #doctest: +ELLIPSIS >>> print(p.search('no class at all')) #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(3, 8), match='class'> <re.Match object; span=(3, 8), match='class'>
>>> print(p.search('the declassified algorithm')) >>> print(p.search('the declassified algorithm'))
None None
>>> print(p.search('one subclass is')) >>> print(p.search('one subclass is'))
@ -744,7 +744,7 @@ given location, they can obviously be matched an infinite number of times.
>>> print(p.search('no class at all')) >>> print(p.search('no class at all'))
None None
>>> print(p.search('\b' + 'class' + '\b')) #doctest: +ELLIPSIS >>> print(p.search('\b' + 'class' + '\b')) #doctest: +ELLIPSIS
<_sre.SRE_Match object; span=(0, 7), match='\x08class\x08'> <re.Match object; span=(0, 7), match='\x08class\x08'>
Second, inside a character class, where there's no use for this assertion, Second, inside a character class, where there's no use for this assertion,
``\b`` represents the backspace character, for compatibility with Python's ``\b`` represents the backspace character, for compatibility with Python's

View file

@ -86,7 +86,7 @@ patterns.
'(?s:.*\\.txt)\\Z' '(?s:.*\\.txt)\\Z'
>>> reobj = re.compile(regex) >>> reobj = re.compile(regex)
>>> reobj.match('foobar.txt') >>> reobj.match('foobar.txt')
<_sre.SRE_Match object; span=(0, 10), match='foobar.txt'> <re.Match object; span=(0, 10), match='foobar.txt'>
.. seealso:: .. seealso::

View file

@ -492,7 +492,7 @@ form.
Compile a regular expression pattern into a :ref:`regular expression object Compile a regular expression pattern into a :ref:`regular expression object
<re-objects>`, which can be used for matching using its <re-objects>`, which can be used for matching using its
:func:`~regex.match`, :func:`~regex.search` and other methods, described :func:`~Pattern.match`, :func:`~Pattern.search` and other methods, described
below. below.
The expression's behaviour can be modified by specifying a *flags* value. The expression's behaviour can be modified by specifying a *flags* value.
@ -747,7 +747,7 @@ form.
>>> re.sub(r'\sAND\s', ' & ', 'Baked Beans And Spam', flags=re.IGNORECASE) >>> re.sub(r'\sAND\s', ' & ', 'Baked Beans And Spam', flags=re.IGNORECASE)
'Baked Beans & Spam' 'Baked Beans & Spam'
The pattern may be a string or an RE object. The pattern may be a string or a :class:`Pattern` object.
The optional argument *count* is the maximum number of pattern occurrences to be The optional argument *count* is the maximum number of pattern occurrences to be
replaced; *count* must be a non-negative integer. If omitted or zero, all replaced; *count* must be a non-negative integer. If omitted or zero, all
@ -861,7 +861,7 @@ Regular Expression Objects
Compiled regular expression objects support the following methods and Compiled regular expression objects support the following methods and
attributes: attributes:
.. method:: regex.search(string[, pos[, endpos]]) .. method:: Pattern.search(string[, pos[, endpos]])
Scan through *string* looking for the first location where this regular Scan through *string* looking for the first location where this regular
expression produces a match, and return a corresponding :ref:`match object expression produces a match, and return a corresponding :ref:`match object
@ -884,11 +884,11 @@ attributes:
>>> pattern = re.compile("d") >>> pattern = re.compile("d")
>>> pattern.search("dog") # Match at index 0 >>> pattern.search("dog") # Match at index 0
<_sre.SRE_Match object; span=(0, 1), match='d'> <re.Match object; span=(0, 1), match='d'>
>>> pattern.search("dog", 1) # No match; search doesn't include the "d" >>> pattern.search("dog", 1) # No match; search doesn't include the "d"
.. method:: regex.match(string[, pos[, endpos]]) .. method:: Pattern.match(string[, pos[, endpos]])
If zero or more characters at the *beginning* of *string* match this regular If zero or more characters at the *beginning* of *string* match this regular
expression, return a corresponding :ref:`match object <match-objects>`. expression, return a corresponding :ref:`match object <match-objects>`.
@ -896,86 +896,86 @@ attributes:
different from a zero-length match. different from a zero-length match.
The optional *pos* and *endpos* parameters have the same meaning as for the The optional *pos* and *endpos* parameters have the same meaning as for the
:meth:`~regex.search` method. :meth:`~Pattern.search` method.
>>> pattern = re.compile("o") >>> pattern = re.compile("o")
>>> pattern.match("dog") # No match as "o" is not at the start of "dog". >>> pattern.match("dog") # No match as "o" is not at the start of "dog".
>>> pattern.match("dog", 1) # Match as "o" is the 2nd character of "dog". >>> pattern.match("dog", 1) # Match as "o" is the 2nd character of "dog".
<_sre.SRE_Match object; span=(1, 2), match='o'> <re.Match object; span=(1, 2), match='o'>
If you want to locate a match anywhere in *string*, use If you want to locate a match anywhere in *string*, use
:meth:`~regex.search` instead (see also :ref:`search-vs-match`). :meth:`~Pattern.search` instead (see also :ref:`search-vs-match`).
.. method:: regex.fullmatch(string[, pos[, endpos]]) .. method:: Pattern.fullmatch(string[, pos[, endpos]])
If the whole *string* matches this regular expression, return a corresponding If the whole *string* matches this regular expression, return a corresponding
:ref:`match object <match-objects>`. Return ``None`` if the string does not :ref:`match object <match-objects>`. Return ``None`` if the string does not
match the pattern; note that this is different from a zero-length match. match the pattern; note that this is different from a zero-length match.
The optional *pos* and *endpos* parameters have the same meaning as for the The optional *pos* and *endpos* parameters have the same meaning as for the
:meth:`~regex.search` method. :meth:`~Pattern.search` method.
>>> pattern = re.compile("o[gh]") >>> pattern = re.compile("o[gh]")
>>> pattern.fullmatch("dog") # No match as "o" is not at the start of "dog". >>> pattern.fullmatch("dog") # No match as "o" is not at the start of "dog".
>>> pattern.fullmatch("ogre") # No match as not the full string matches. >>> pattern.fullmatch("ogre") # No match as not the full string matches.
>>> pattern.fullmatch("doggie", 1, 3) # Matches within given limits. >>> pattern.fullmatch("doggie", 1, 3) # Matches within given limits.
<_sre.SRE_Match object; span=(1, 3), match='og'> <re.Match object; span=(1, 3), match='og'>
.. versionadded:: 3.4 .. versionadded:: 3.4
.. method:: regex.split(string, maxsplit=0) .. method:: Pattern.split(string, maxsplit=0)
Identical to the :func:`split` function, using the compiled pattern. Identical to the :func:`split` function, using the compiled pattern.
.. method:: regex.findall(string[, pos[, endpos]]) .. method:: Pattern.findall(string[, pos[, endpos]])
Similar to the :func:`findall` function, using the compiled pattern, but Similar to the :func:`findall` function, using the compiled pattern, but
also accepts optional *pos* and *endpos* parameters that limit the search also accepts optional *pos* and *endpos* parameters that limit the search
region like for :meth:`match`. region like for :meth:`match`.
.. method:: regex.finditer(string[, pos[, endpos]]) .. method:: Pattern.finditer(string[, pos[, endpos]])
Similar to the :func:`finditer` function, using the compiled pattern, but Similar to the :func:`finditer` function, using the compiled pattern, but
also accepts optional *pos* and *endpos* parameters that limit the search also accepts optional *pos* and *endpos* parameters that limit the search
region like for :meth:`match`. region like for :meth:`match`.
.. method:: regex.sub(repl, string, count=0) .. method:: Pattern.sub(repl, string, count=0)
Identical to the :func:`sub` function, using the compiled pattern. Identical to the :func:`sub` function, using the compiled pattern.
.. method:: regex.subn(repl, string, count=0) .. method:: Pattern.subn(repl, string, count=0)
Identical to the :func:`subn` function, using the compiled pattern. Identical to the :func:`subn` function, using the compiled pattern.
.. attribute:: regex.flags .. attribute:: Pattern.flags
The regex matching flags. This is a combination of the flags given to The regex matching flags. This is a combination of the flags given to
:func:`.compile`, any ``(?...)`` inline flags in the pattern, and implicit :func:`.compile`, any ``(?...)`` inline flags in the pattern, and implicit
flags such as :data:`UNICODE` if the pattern is a Unicode string. flags such as :data:`UNICODE` if the pattern is a Unicode string.
.. attribute:: regex.groups .. attribute:: Pattern.groups
The number of capturing groups in the pattern. The number of capturing groups in the pattern.
.. attribute:: regex.groupindex .. attribute:: Pattern.groupindex
A dictionary mapping any symbolic group names defined by ``(?P<id>)`` to group A dictionary mapping any symbolic group names defined by ``(?P<id>)`` to group
numbers. The dictionary is empty if no symbolic groups were used in the numbers. The dictionary is empty if no symbolic groups were used in the
pattern. pattern.
.. attribute:: regex.pattern .. attribute:: Pattern.pattern
The pattern string from which the RE object was compiled. The pattern string from which the pattern object was compiled.
.. versionchanged:: 3.7 .. versionchanged:: 3.7
@ -989,7 +989,7 @@ Match Objects
------------- -------------
Match objects always have a boolean value of ``True``. Match objects always have a boolean value of ``True``.
Since :meth:`~regex.match` and :meth:`~regex.search` return ``None`` Since :meth:`~Pattern.match` and :meth:`~Pattern.search` return ``None``
when there is no match, you can test whether there was a match with a simple when there is no match, you can test whether there was a match with a simple
``if`` statement:: ``if`` statement::
@ -1000,10 +1000,10 @@ when there is no match, you can test whether there was a match with a simple
Match objects support the following methods and attributes: Match objects support the following methods and attributes:
.. method:: match.expand(template) .. method:: Match.expand(template)
Return the string obtained by doing backslash substitution on the template Return the string obtained by doing backslash substitution on the template
string *template*, as done by the :meth:`~regex.sub` method. string *template*, as done by the :meth:`~Pattern.sub` method.
Escapes such as ``\n`` are converted to the appropriate characters, Escapes such as ``\n`` are converted to the appropriate characters,
and numeric backreferences (``\1``, ``\2``) and named backreferences and numeric backreferences (``\1``, ``\2``) and named backreferences
(``\g<1>``, ``\g<name>``) are replaced by the contents of the (``\g<1>``, ``\g<name>``) are replaced by the contents of the
@ -1012,7 +1012,7 @@ Match objects support the following methods and attributes:
.. versionchanged:: 3.5 .. versionchanged:: 3.5
Unmatched groups are replaced with an empty string. Unmatched groups are replaced with an empty string.
.. method:: match.group([group1, ...]) .. method:: Match.group([group1, ...])
Returns one or more subgroups of the match. If there is a single argument, the Returns one or more subgroups of the match. If there is a single argument, the
result is a single string; if there are multiple arguments, the result is a result is a single string; if there are multiple arguments, the result is a
@ -1063,7 +1063,7 @@ Match objects support the following methods and attributes:
'c3' 'c3'
.. method:: match.__getitem__(g) .. method:: Match.__getitem__(g)
This is identical to ``m.group(g)``. This allows easier access to This is identical to ``m.group(g)``. This allows easier access to
an individual group from a match: an individual group from a match:
@ -1079,7 +1079,7 @@ Match objects support the following methods and attributes:
.. versionadded:: 3.6 .. versionadded:: 3.6
.. method:: match.groups(default=None) .. method:: Match.groups(default=None)
Return a tuple containing all the subgroups of the match, from 1 up to however Return a tuple containing all the subgroups of the match, from 1 up to however
many groups are in the pattern. The *default* argument is used for groups that many groups are in the pattern. The *default* argument is used for groups that
@ -1102,7 +1102,7 @@ Match objects support the following methods and attributes:
('24', '0') ('24', '0')
.. method:: match.groupdict(default=None) .. method:: Match.groupdict(default=None)
Return a dictionary containing all the *named* subgroups of the match, keyed by Return a dictionary containing all the *named* subgroups of the match, keyed by
the subgroup name. The *default* argument is used for groups that did not the subgroup name. The *default* argument is used for groups that did not
@ -1113,8 +1113,8 @@ Match objects support the following methods and attributes:
{'first_name': 'Malcolm', 'last_name': 'Reynolds'} {'first_name': 'Malcolm', 'last_name': 'Reynolds'}
.. method:: match.start([group]) .. method:: Match.start([group])
match.end([group]) Match.end([group])
Return the indices of the start and end of the substring matched by *group*; Return the indices of the start and end of the substring matched by *group*;
*group* defaults to zero (meaning the whole matched substring). Return ``-1`` if *group* defaults to zero (meaning the whole matched substring). Return ``-1`` if
@ -1137,28 +1137,28 @@ Match objects support the following methods and attributes:
'tony@tiger.net' 'tony@tiger.net'
.. method:: match.span([group]) .. method:: Match.span([group])
For a match *m*, return the 2-tuple ``(m.start(group), m.end(group))``. Note For a match *m*, return the 2-tuple ``(m.start(group), m.end(group))``. Note
that if *group* did not contribute to the match, this is ``(-1, -1)``. that if *group* did not contribute to the match, this is ``(-1, -1)``.
*group* defaults to zero, the entire match. *group* defaults to zero, the entire match.
.. attribute:: match.pos .. attribute:: Match.pos
The value of *pos* which was passed to the :meth:`~regex.search` or The value of *pos* which was passed to the :meth:`~Pattern.search` or
:meth:`~regex.match` method of a :ref:`regex object <re-objects>`. This is :meth:`~Pattern.match` method of a :ref:`regex object <re-objects>`. This is
the index into the string at which the RE engine started looking for a match. the index into the string at which the RE engine started looking for a match.
.. attribute:: match.endpos .. attribute:: Match.endpos
The value of *endpos* which was passed to the :meth:`~regex.search` or The value of *endpos* which was passed to the :meth:`~Pattern.search` or
:meth:`~regex.match` method of a :ref:`regex object <re-objects>`. This is :meth:`~Pattern.match` method of a :ref:`regex object <re-objects>`. This is
the index into the string beyond which the RE engine will not go. the index into the string beyond which the RE engine will not go.
.. attribute:: match.lastindex .. attribute:: Match.lastindex
The integer index of the last matched capturing group, or ``None`` if no group The integer index of the last matched capturing group, or ``None`` if no group
was matched at all. For example, the expressions ``(a)b``, ``((a)(b))``, and was matched at all. For example, the expressions ``(a)b``, ``((a)(b))``, and
@ -1167,21 +1167,21 @@ Match objects support the following methods and attributes:
string. string.
.. attribute:: match.lastgroup .. attribute:: Match.lastgroup
The name of the last matched capturing group, or ``None`` if the group didn't The name of the last matched capturing group, or ``None`` if the group didn't
have a name, or if no group was matched at all. have a name, or if no group was matched at all.
.. attribute:: match.re .. attribute:: Match.re
The regular expression object whose :meth:`~regex.match` or The regular expression object whose :meth:`~Pattern.match` or
:meth:`~regex.search` method produced this match instance. :meth:`~Pattern.search` method produced this match instance.
.. attribute:: match.string .. attribute:: Match.string
The string passed to :meth:`~regex.match` or :meth:`~regex.search`. The string passed to :meth:`~Pattern.match` or :meth:`~Pattern.search`.
.. versionchanged:: 3.7 .. versionchanged:: 3.7
@ -1234,7 +1234,7 @@ To match this with a regular expression, one could use backreferences as such:
"<Match: '354aa', groups=('a',)>" "<Match: '354aa', groups=('a',)>"
To find out what card the pair consists of, one could use the To find out what card the pair consists of, one could use the
:meth:`~match.group` method of the match object in the following manner: :meth:`~Match.group` method of the match object in the following manner:
.. doctest:: .. doctest::
@ -1314,7 +1314,7 @@ For example::
>>> re.match("c", "abcdef") # No match >>> re.match("c", "abcdef") # No match
>>> re.search("c", "abcdef") # Match >>> re.search("c", "abcdef") # Match
<_sre.SRE_Match object; span=(2, 3), match='c'> <re.Match object; span=(2, 3), match='c'>
Regular expressions beginning with ``'^'`` can be used with :func:`search` to Regular expressions beginning with ``'^'`` can be used with :func:`search` to
restrict the match at the beginning of the string:: restrict the match at the beginning of the string::
@ -1322,7 +1322,7 @@ restrict the match at the beginning of the string::
>>> re.match("c", "abcdef") # No match >>> re.match("c", "abcdef") # No match
>>> re.search("^c", "abcdef") # No match >>> re.search("^c", "abcdef") # No match
>>> re.search("^a", "abcdef") # Match >>> re.search("^a", "abcdef") # Match
<_sre.SRE_Match object; span=(0, 1), match='a'> <re.Match object; span=(0, 1), match='a'>
Note however that in :const:`MULTILINE` mode :func:`match` only matches at the Note however that in :const:`MULTILINE` mode :func:`match` only matches at the
beginning of the string, whereas using :func:`search` with a regular expression beginning of the string, whereas using :func:`search` with a regular expression
@ -1330,7 +1330,7 @@ beginning with ``'^'`` will match at the beginning of each line.
>>> re.match('X', 'A\nB\nX', re.MULTILINE) # No match >>> re.match('X', 'A\nB\nX', re.MULTILINE) # No match
>>> re.search('^X', 'A\nB\nX', re.MULTILINE) # Match >>> re.search('^X', 'A\nB\nX', re.MULTILINE) # Match
<_sre.SRE_Match object; span=(4, 5), match='X'> <re.Match object; span=(4, 5), match='X'>
Making a Phonebook Making a Phonebook
@ -1449,9 +1449,9 @@ another one to escape it. For example, the two following lines of code are
functionally identical: functionally identical:
>>> re.match(r"\W(.)\1\W", " ff ") >>> re.match(r"\W(.)\1\W", " ff ")
<_sre.SRE_Match object; span=(0, 4), match=' ff '> <re.Match object; span=(0, 4), match=' ff '>
>>> re.match("\\W(.)\\1\\W", " ff ") >>> re.match("\\W(.)\\1\\W", " ff ")
<_sre.SRE_Match object; span=(0, 4), match=' ff '> <re.Match object; span=(0, 4), match=' ff '>
When one wants to match a literal backslash, it must be escaped in the regular When one wants to match a literal backslash, it must be escaped in the regular
expression. With raw string notation, this means ``r"\\"``. Without raw string expression. With raw string notation, this means ``r"\\"``. Without raw string
@ -1459,9 +1459,9 @@ notation, one must use ``"\\\\"``, making the following lines of code
functionally identical: functionally identical:
>>> re.match(r"\\", r"\\") >>> re.match(r"\\", r"\\")
<_sre.SRE_Match object; span=(0, 1), match='\\'> <re.Match object; span=(0, 1), match='\\'>
>>> re.match("\\\\", r"\\") >>> re.match("\\\\", r"\\")
<_sre.SRE_Match object; span=(0, 1), match='\\'> <re.Match object; span=(0, 1), match='\\'>
Writing a Tokenizer Writing a Tokenizer

View file

@ -74,7 +74,7 @@ class Get_signatureTest(unittest.TestCase):
non-overlapping occurrences of the pattern in string by the non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable; replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is if a string, backslash escapes in it are processed. If it is
a callable, it's passed the match object and must return''') a callable, it's passed the Match object and must return''')
gtest(p.sub, '''(repl, string, count=0)\nReturn the string obtained by replacing the leftmost non-overlapping occurrences o...''') gtest(p.sub, '''(repl, string, count=0)\nReturn the string obtained by replacing the leftmost non-overlapping occurrences o...''')
def test_signature_wrap(self): def test_signature_wrap(self):

View file

@ -92,8 +92,8 @@ This module exports the following functions:
subn Same as sub, but also return the number of substitutions made. subn Same as sub, but also return the number of substitutions made.
split Split a string by the occurrences of a pattern. split Split a string by the occurrences of a pattern.
findall Find all occurrences of a pattern in a string. findall Find all occurrences of a pattern in a string.
finditer Return an iterator yielding a match object for each match. finditer Return an iterator yielding a Match object for each match.
compile Compile a pattern into a RegexObject. compile Compile a pattern into a Pattern object.
purge Clear the regular expression cache. purge Clear the regular expression cache.
escape Backslash all non-alphanumerics in a string. escape Backslash all non-alphanumerics in a string.
@ -139,7 +139,7 @@ except ImportError:
__all__ = [ __all__ = [
"match", "fullmatch", "search", "sub", "subn", "split", "match", "fullmatch", "search", "sub", "subn", "split",
"findall", "finditer", "compile", "purge", "template", "escape", "findall", "finditer", "compile", "purge", "template", "escape",
"error", "A", "I", "L", "M", "S", "X", "U", "error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U",
"ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
"UNICODE", "UNICODE",
] ]
@ -175,17 +175,17 @@ error = sre_compile.error
def match(pattern, string, flags=0): def match(pattern, string, flags=0):
"""Try to apply the pattern at the start of the string, returning """Try to apply the pattern at the start of the string, returning
a match object, or None if no match was found.""" a Match object, or None if no match was found."""
return _compile(pattern, flags).match(string) return _compile(pattern, flags).match(string)
def fullmatch(pattern, string, flags=0): def fullmatch(pattern, string, flags=0):
"""Try to apply the pattern to all of the string, returning """Try to apply the pattern to all of the string, returning
a match object, or None if no match was found.""" a Match object, or None if no match was found."""
return _compile(pattern, flags).fullmatch(string) return _compile(pattern, flags).fullmatch(string)
def search(pattern, string, flags=0): def search(pattern, string, flags=0):
"""Scan through string looking for a match to the pattern, returning """Scan through string looking for a match to the pattern, returning
a match object, or None if no match was found.""" a Match object, or None if no match was found."""
return _compile(pattern, flags).search(string) return _compile(pattern, flags).search(string)
def sub(pattern, repl, string, count=0, flags=0): def sub(pattern, repl, string, count=0, flags=0):
@ -193,7 +193,7 @@ def sub(pattern, repl, string, count=0, flags=0):
non-overlapping occurrences of the pattern in string by the non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable; replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is if a string, backslash escapes in it are processed. If it is
a callable, it's passed the match object and must return a callable, it's passed the Match object and must return
a replacement string to be used.""" a replacement string to be used."""
return _compile(pattern, flags).sub(repl, string, count) return _compile(pattern, flags).sub(repl, string, count)
@ -204,7 +204,7 @@ def subn(pattern, repl, string, count=0, flags=0):
string by the replacement repl. number is the number of string by the replacement repl. number is the number of
substitutions that were made. repl can be either a string or a substitutions that were made. repl can be either a string or a
callable; if a string, backslash escapes in it are processed. callable; if a string, backslash escapes in it are processed.
If it is a callable, it's passed the match object and must If it is a callable, it's passed the Match object and must
return a replacement string to be used.""" return a replacement string to be used."""
return _compile(pattern, flags).subn(repl, string, count) return _compile(pattern, flags).subn(repl, string, count)
@ -230,13 +230,13 @@ def findall(pattern, string, flags=0):
def finditer(pattern, string, flags=0): def finditer(pattern, string, flags=0):
"""Return an iterator over all non-overlapping matches in the """Return an iterator over all non-overlapping matches in the
string. For each match, the iterator returns a match object. string. For each match, the iterator returns a Match object.
Empty matches are included in the result.""" Empty matches are included in the result."""
return _compile(pattern, flags).finditer(string) return _compile(pattern, flags).finditer(string)
def compile(pattern, flags=0): def compile(pattern, flags=0):
"Compile a regular expression pattern, returning a pattern object." "Compile a regular expression pattern, returning a Pattern object."
return _compile(pattern, flags) return _compile(pattern, flags)
def purge(): def purge():
@ -245,7 +245,7 @@ def purge():
_compile_repl.cache_clear() _compile_repl.cache_clear()
def template(pattern, flags=0): def template(pattern, flags=0):
"Compile a template pattern, returning a pattern object" "Compile a template pattern, returning a Pattern object"
return _compile(pattern, flags|T) return _compile(pattern, flags|T)
# SPECIAL_CHARS # SPECIAL_CHARS
@ -264,13 +264,14 @@ def escape(pattern):
pattern = str(pattern, 'latin1') pattern = str(pattern, 'latin1')
return pattern.translate(_special_chars_map).encode('latin1') return pattern.translate(_special_chars_map).encode('latin1')
Pattern = type(sre_compile.compile('', 0))
Match = type(sre_compile.compile('', 0).match(''))
# -------------------------------------------------------------------- # --------------------------------------------------------------------
# internals # internals
_cache = OrderedDict() _cache = OrderedDict()
_pattern_type = type(sre_compile.compile("", 0))
_MAXCACHE = 512 _MAXCACHE = 512
def _compile(pattern, flags): def _compile(pattern, flags):
# internal: compile pattern # internal: compile pattern
@ -278,7 +279,7 @@ def _compile(pattern, flags):
return _cache[type(pattern), pattern, flags] return _cache[type(pattern), pattern, flags]
except KeyError: except KeyError:
pass pass
if isinstance(pattern, _pattern_type): if isinstance(pattern, Pattern):
if flags: if flags:
raise ValueError( raise ValueError(
"cannot process flags argument with a compiled pattern") "cannot process flags argument with a compiled pattern")
@ -301,12 +302,12 @@ def _compile_repl(repl, pattern):
return sre_parse.parse_template(repl, pattern) return sre_parse.parse_template(repl, pattern)
def _expand(pattern, match, template): def _expand(pattern, match, template):
# internal: match.expand implementation hook # internal: Match.expand implementation hook
template = sre_parse.parse_template(template, pattern) template = sre_parse.parse_template(template, pattern)
return sre_parse.expand_template(template, match) return sre_parse.expand_template(template, match)
def _subx(pattern, template): def _subx(pattern, template):
# internal: pattern.sub/subn implementation helper # internal: Pattern.sub/subn implementation helper
template = _compile_repl(template, pattern) template = _compile_repl(template, pattern)
if not template[0] and len(template[1]) == 1: if not template[0] and len(template[1]) == 1:
# literal replacement # literal replacement
@ -322,7 +323,7 @@ import copyreg
def _pickle(p): def _pickle(p):
return _compile, (p.pattern, p.flags) return _compile, (p.pattern, p.flags)
copyreg.pickle(_pattern_type, _pickle, _compile) copyreg.pickle(Pattern, _pickle, _compile)
# -------------------------------------------------------------------- # --------------------------------------------------------------------
# experimental stuff (see python-dev discussions for details) # experimental stuff (see python-dev discussions for details)

View file

@ -32,6 +32,8 @@ class error(Exception):
colno: The column corresponding to pos (may be None) colno: The column corresponding to pos (may be None)
""" """
__module__ = 're'
def __init__(self, msg, pattern=None, pos=None): def __init__(self, msg, pattern=None, pos=None):
self.msg = msg self.msg = msg
self.pattern = pattern self.pattern = pattern

View file

@ -585,12 +585,12 @@ class Telnet:
"""Read until one from a list of a regular expressions matches. """Read until one from a list of a regular expressions matches.
The first argument is a list of regular expressions, either The first argument is a list of regular expressions, either
compiled (re.RegexObject instances) or uncompiled (strings). compiled (re.Pattern instances) or uncompiled (strings).
The optional second argument is a timeout, in seconds; default The optional second argument is a timeout, in seconds; default
is no timeout. is no timeout.
Return a tuple of three items: the index in the list of the Return a tuple of three items: the index in the list of the
first regular expression that matches; the match object first regular expression that matches; the re.Match object
returned; and the text read up till and including the match. returned; and the text read up till and including the match.
If EOF is read and no text was read, raise EOFError. If EOF is read and no text was read, raise EOFError.

View file

@ -24,8 +24,6 @@ from optparse import make_option, Option, \
from optparse import _match_abbrev from optparse import _match_abbrev
from optparse import _parse_num from optparse import _parse_num
retype = type(re.compile(''))
class InterceptedError(Exception): class InterceptedError(Exception):
def __init__(self, def __init__(self,
error_message=None, error_message=None,
@ -107,7 +105,7 @@ Args were %(args)s.""" % locals ())
func(*args, **kwargs) func(*args, **kwargs)
except expected_exception as err: except expected_exception as err:
actual_message = str(err) actual_message = str(err)
if isinstance(expected_message, retype): if isinstance(expected_message, re.Pattern):
self.assertTrue(expected_message.search(actual_message), self.assertTrue(expected_message.search(actual_message),
"""\ """\
expected exception message pattern: expected exception message pattern:

View file

@ -1596,9 +1596,9 @@ class ReTests(unittest.TestCase):
def test_compile(self): def test_compile(self):
# Test return value when given string and pattern as parameter # Test return value when given string and pattern as parameter
pattern = re.compile('random pattern') pattern = re.compile('random pattern')
self.assertIsInstance(pattern, re._pattern_type) self.assertIsInstance(pattern, re.Pattern)
same_pattern = re.compile(pattern) same_pattern = re.compile(pattern)
self.assertIsInstance(same_pattern, re._pattern_type) self.assertIsInstance(same_pattern, re.Pattern)
self.assertIs(same_pattern, pattern) self.assertIs(same_pattern, pattern)
# Test behaviour when not given a string or pattern as parameter # Test behaviour when not given a string or pattern as parameter
self.assertRaises(TypeError, re.compile, 0) self.assertRaises(TypeError, re.compile, 0)

View file

@ -1273,7 +1273,7 @@ class TestCase(object):
Args: Args:
expected_exception: Exception class expected to be raised. expected_exception: Exception class expected to be raised.
expected_regex: Regex (re pattern object or string) expected expected_regex: Regex (re.Pattern object or string) expected
to be found in error message. to be found in error message.
args: Function to be called and extra positional args. args: Function to be called and extra positional args.
kwargs: Extra kwargs. kwargs: Extra kwargs.
@ -1292,7 +1292,7 @@ class TestCase(object):
Args: Args:
expected_warning: Warning class expected to be triggered. expected_warning: Warning class expected to be triggered.
expected_regex: Regex (re pattern object or string) expected expected_regex: Regex (re.Pattern object or string) expected
to be found in error message. to be found in error message.
args: Function to be called and extra positional args. args: Function to be called and extra positional args.
kwargs: Extra kwargs. kwargs: Extra kwargs.

View file

@ -0,0 +1,3 @@
The types of compiled regular objects and match objects are now exposed as
`re.Pattern` and `re.Match`. This adds information in pydoc output for the
re module.

View file

@ -630,13 +630,13 @@ _sre.SRE_Pattern.fullmatch
pos: Py_ssize_t = 0 pos: Py_ssize_t = 0
endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
Matches against all of the string Matches against all of the string.
[clinic start generated code]*/ [clinic start generated code]*/
static PyObject * static PyObject *
_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string, _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
Py_ssize_t pos, Py_ssize_t endpos) Py_ssize_t pos, Py_ssize_t endpos)
/*[clinic end generated code: output=5833c47782a35f4a input=a6f640614aaefceb]*/ /*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/
{ {
SRE_STATE state; SRE_STATE state;
Py_ssize_t status; Py_ssize_t status;
@ -1341,7 +1341,7 @@ done:
return result; return result;
} }
PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects"); PyDoc_STRVAR(pattern_doc, "Compiled regular expression object.");
/* PatternObject's 'groupindex' method. */ /* PatternObject's 'groupindex' method. */
static PyObject * static PyObject *
@ -2221,12 +2221,12 @@ _sre.SRE_Match.span
group: object(c_default="NULL") = 0 group: object(c_default="NULL") = 0
/ /
For MatchObject m, return the 2-tuple (m.start(group), m.end(group)). For match object m, return the 2-tuple (m.start(group), m.end(group)).
[clinic start generated code]*/ [clinic start generated code]*/
static PyObject * static PyObject *
_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group) _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/ /*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/
{ {
Py_ssize_t index = match_getindex(self, group); Py_ssize_t index = match_getindex(self, group);
@ -2625,15 +2625,18 @@ static PyGetSetDef pattern_getset[] = {
#define PAT_OFF(x) offsetof(PatternObject, x) #define PAT_OFF(x) offsetof(PatternObject, x)
static PyMemberDef pattern_members[] = { static PyMemberDef pattern_members[] = {
{"pattern", T_OBJECT, PAT_OFF(pattern), READONLY}, {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY,
{"flags", T_INT, PAT_OFF(flags), READONLY}, "The pattern string from which the RE object was compiled."},
{"groups", T_PYSSIZET, PAT_OFF(groups), READONLY}, {"flags", T_INT, PAT_OFF(flags), READONLY,
"The regex matching flags."},
{"groups", T_PYSSIZET, PAT_OFF(groups), READONLY,
"The number of capturing groups in the pattern."},
{NULL} /* Sentinel */ {NULL} /* Sentinel */
}; };
static PyTypeObject Pattern_Type = { static PyTypeObject Pattern_Type = {
PyVarObject_HEAD_INIT(NULL, 0) PyVarObject_HEAD_INIT(NULL, 0)
"_" SRE_MODULE ".SRE_Pattern", "re.Pattern",
sizeof(PatternObject), sizeof(SRE_CODE), sizeof(PatternObject), sizeof(SRE_CODE),
(destructor)pattern_dealloc, /* tp_dealloc */ (destructor)pattern_dealloc, /* tp_dealloc */
0, /* tp_print */ 0, /* tp_print */
@ -2685,18 +2688,24 @@ static PyMethodDef match_methods[] = {
}; };
static PyGetSetDef match_getset[] = { static PyGetSetDef match_getset[] = {
{"lastindex", (getter)match_lastindex_get, (setter)NULL}, {"lastindex", (getter)match_lastindex_get, (setter)NULL,
{"lastgroup", (getter)match_lastgroup_get, (setter)NULL}, "The integer index of the last matched capturing group."},
{"lastgroup", (getter)match_lastgroup_get, (setter)NULL,
"The name of the last matched capturing group."},
{"regs", (getter)match_regs_get, (setter)NULL}, {"regs", (getter)match_regs_get, (setter)NULL},
{NULL} {NULL}
}; };
#define MATCH_OFF(x) offsetof(MatchObject, x) #define MATCH_OFF(x) offsetof(MatchObject, x)
static PyMemberDef match_members[] = { static PyMemberDef match_members[] = {
{"string", T_OBJECT, MATCH_OFF(string), READONLY}, {"string", T_OBJECT, MATCH_OFF(string), READONLY,
{"re", T_OBJECT, MATCH_OFF(pattern), READONLY}, "The string passed to match() or search()."},
{"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY}, {"re", T_OBJECT, MATCH_OFF(pattern), READONLY,
{"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY}, "The regular expression object."},
{"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY,
"The index into the string at which the RE engine started looking for a match."},
{"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY,
"The index into the string beyond which the RE engine will not go."},
{NULL} {NULL}
}; };
@ -2705,7 +2714,7 @@ static PyMemberDef match_members[] = {
static PyTypeObject Match_Type = { static PyTypeObject Match_Type = {
PyVarObject_HEAD_INIT(NULL,0) PyVarObject_HEAD_INIT(NULL,0)
"_" SRE_MODULE ".SRE_Match", "re.Match",
sizeof(MatchObject), sizeof(Py_ssize_t), sizeof(MatchObject), sizeof(Py_ssize_t),
(destructor)match_dealloc, /* tp_dealloc */ (destructor)match_dealloc, /* tp_dealloc */
0, /* tp_print */ 0, /* tp_print */

View file

@ -190,7 +190,7 @@ PyDoc_STRVAR(_sre_SRE_Pattern_fullmatch__doc__,
"fullmatch($self, /, string, pos=0, endpos=sys.maxsize)\n" "fullmatch($self, /, string, pos=0, endpos=sys.maxsize)\n"
"--\n" "--\n"
"\n" "\n"
"Matches against all of the string"); "Matches against all of the string.");
#define _SRE_SRE_PATTERN_FULLMATCH_METHODDEF \ #define _SRE_SRE_PATTERN_FULLMATCH_METHODDEF \
{"fullmatch", (PyCFunction)_sre_SRE_Pattern_fullmatch, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_fullmatch__doc__}, {"fullmatch", (PyCFunction)_sre_SRE_Pattern_fullmatch, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_fullmatch__doc__},
@ -682,7 +682,7 @@ PyDoc_STRVAR(_sre_SRE_Match_span__doc__,
"span($self, group=0, /)\n" "span($self, group=0, /)\n"
"--\n" "--\n"
"\n" "\n"
"For MatchObject m, return the 2-tuple (m.start(group), m.end(group))."); "For match object m, return the 2-tuple (m.start(group), m.end(group)).");
#define _SRE_SRE_MATCH_SPAN_METHODDEF \ #define _SRE_SRE_MATCH_SPAN_METHODDEF \
{"span", (PyCFunction)_sre_SRE_Match_span, METH_FASTCALL, _sre_SRE_Match_span__doc__}, {"span", (PyCFunction)_sre_SRE_Match_span, METH_FASTCALL, _sre_SRE_Match_span__doc__},
@ -765,4 +765,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored))
{ {
return _sre_SRE_Scanner_search_impl(self); return _sre_SRE_Scanner_search_impl(self);
} }
/*[clinic end generated code: output=6e3fb17fef1be436 input=a9049054013a1b77]*/ /*[clinic end generated code: output=1e6a1be31302df09 input=a9049054013a1b77]*/