mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
gh-104719: Restore Tokenize module constants (#104722)
This commit is contained in:
parent
93923793f6
commit
ffe47cb623
1 changed files with 101 additions and 0 deletions
101
Lib/tokenize.py
101
Lib/tokenize.py
|
@ -56,6 +56,107 @@ class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line'
|
||||||
else:
|
else:
|
||||||
return self.type
|
return self.type
|
||||||
|
|
||||||
|
def group(*choices): return '(' + '|'.join(choices) + ')'
|
||||||
|
def any(*choices): return group(*choices) + '*'
|
||||||
|
def maybe(*choices): return group(*choices) + '?'
|
||||||
|
|
||||||
|
# Note: we use unicode matching for names ("\w") but ascii matching for
|
||||||
|
# number literals.
|
||||||
|
Whitespace = r'[ \f\t]*'
|
||||||
|
Comment = r'#[^\r\n]*'
|
||||||
|
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
|
||||||
|
Name = r'\w+'
|
||||||
|
|
||||||
|
Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
|
||||||
|
Binnumber = r'0[bB](?:_?[01])+'
|
||||||
|
Octnumber = r'0[oO](?:_?[0-7])+'
|
||||||
|
Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
|
||||||
|
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
||||||
|
Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
|
||||||
|
Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
|
||||||
|
r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
|
||||||
|
Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
|
||||||
|
Floatnumber = group(Pointfloat, Expfloat)
|
||||||
|
Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
|
||||||
|
Number = group(Imagnumber, Floatnumber, Intnumber)
|
||||||
|
|
||||||
|
# Return the empty string, plus all of the valid string prefixes.
|
||||||
|
def _all_string_prefixes():
|
||||||
|
# The valid string prefixes. Only contain the lower case versions,
|
||||||
|
# and don't contain any permutations (include 'fr', but not
|
||||||
|
# 'rf'). The various permutations will be generated.
|
||||||
|
_valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr']
|
||||||
|
# if we add binary f-strings, add: ['fb', 'fbr']
|
||||||
|
result = {''}
|
||||||
|
for prefix in _valid_string_prefixes:
|
||||||
|
for t in _itertools.permutations(prefix):
|
||||||
|
# create a list with upper and lower versions of each
|
||||||
|
# character
|
||||||
|
for u in _itertools.product(*[(c, c.upper()) for c in t]):
|
||||||
|
result.add(''.join(u))
|
||||||
|
return result
|
||||||
|
|
||||||
|
@functools.lru_cache
|
||||||
|
def _compile(expr):
|
||||||
|
return re.compile(expr, re.UNICODE)
|
||||||
|
|
||||||
|
# Note that since _all_string_prefixes includes the empty string,
|
||||||
|
# StringPrefix can be the empty string (making it optional).
|
||||||
|
StringPrefix = group(*_all_string_prefixes())
|
||||||
|
|
||||||
|
# Tail end of ' string.
|
||||||
|
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
|
||||||
|
# Tail end of " string.
|
||||||
|
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
|
||||||
|
# Tail end of ''' string.
|
||||||
|
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
|
||||||
|
# Tail end of """ string.
|
||||||
|
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
|
||||||
|
Triple = group(StringPrefix + "'''", StringPrefix + '"""')
|
||||||
|
# Single-line ' or " string.
|
||||||
|
String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
|
||||||
|
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
|
||||||
|
|
||||||
|
# Sorting in reverse order puts the long operators before their prefixes.
|
||||||
|
# Otherwise if = came before ==, == would get recognized as two instances
|
||||||
|
# of =.
|
||||||
|
Special = group(*map(re.escape, sorted(EXACT_TOKEN_TYPES, reverse=True)))
|
||||||
|
Funny = group(r'\r?\n', Special)
|
||||||
|
|
||||||
|
PlainToken = group(Number, Funny, String, Name)
|
||||||
|
Token = Ignore + PlainToken
|
||||||
|
|
||||||
|
# First (or only) line of ' or " string.
|
||||||
|
ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
|
||||||
|
group("'", r'\\\r?\n'),
|
||||||
|
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
|
||||||
|
group('"', r'\\\r?\n'))
|
||||||
|
PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
|
||||||
|
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
|
||||||
|
|
||||||
|
# For a given string prefix plus quotes, endpats maps it to a regex
|
||||||
|
# to match the remainder of that string. _prefix can be empty, for
|
||||||
|
# a normal single or triple quoted string (with no prefix).
|
||||||
|
endpats = {}
|
||||||
|
for _prefix in _all_string_prefixes():
|
||||||
|
endpats[_prefix + "'"] = Single
|
||||||
|
endpats[_prefix + '"'] = Double
|
||||||
|
endpats[_prefix + "'''"] = Single3
|
||||||
|
endpats[_prefix + '"""'] = Double3
|
||||||
|
del _prefix
|
||||||
|
|
||||||
|
# A set of all of the single and triple quoted string prefixes,
|
||||||
|
# including the opening quotes.
|
||||||
|
single_quoted = set()
|
||||||
|
triple_quoted = set()
|
||||||
|
for t in _all_string_prefixes():
|
||||||
|
for u in (t + '"', t + "'"):
|
||||||
|
single_quoted.add(u)
|
||||||
|
for u in (t + '"""', t + "'''"):
|
||||||
|
triple_quoted.add(u)
|
||||||
|
del t, u
|
||||||
|
|
||||||
|
tabsize = 8
|
||||||
|
|
||||||
class TokenError(Exception): pass
|
class TokenError(Exception): pass
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue