mirror of
https://github.com/python/cpython.git
synced 2025-09-27 10:50:04 +00:00
gh-134752: Improve speed of test_tokenize.StringPrefixTest.test_prefixes. (#134766)
This commit is contained in:
parent
737b4ba020
commit
579686d9fb
1 changed files with 43 additions and 25 deletions
|
@ -3241,39 +3241,40 @@ class CommandLineTest(unittest.TestCase):
|
||||||
|
|
||||||
|
|
||||||
class StringPrefixTest(unittest.TestCase):
|
class StringPrefixTest(unittest.TestCase):
|
||||||
def test_prefixes(self):
|
@staticmethod
|
||||||
# Get the list of defined string prefixes. I don't see an
|
def determine_valid_prefixes():
|
||||||
# obvious documented way of doing this, but probably the best
|
# Try all lengths until we find a length that has zero valid
|
||||||
# thing is to split apart tokenize.StringPrefix.
|
# prefixes. This will miss the case where for example there
|
||||||
|
# are no valid 3 character prefixes, but there are valid 4
|
||||||
|
# character prefixes. That seems unlikely.
|
||||||
|
|
||||||
# Make sure StringPrefix begins and ends in parens.
|
single_char_valid_prefixes = set()
|
||||||
self.assertEqual(tokenize.StringPrefix[0], '(')
|
|
||||||
self.assertEqual(tokenize.StringPrefix[-1], ')')
|
|
||||||
|
|
||||||
# Then split apart everything else by '|'.
|
# Find all of the single character string prefixes. Just get
|
||||||
defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))
|
# the lowercase version, we'll deal with combinations of upper
|
||||||
|
# and lower case later. I'm using this logic just in case
|
||||||
# Now compute the actual string prefixes, by exec-ing all
|
# some uppercase-only prefix is added.
|
||||||
# valid prefix combinations, followed by an empty string.
|
for letter in itertools.chain(string.ascii_lowercase, string.ascii_uppercase):
|
||||||
|
try:
|
||||||
# Try all prefix lengths until we find a length that has zero
|
eval(f'{letter}""')
|
||||||
# valid prefixes. This will miss the case where for example
|
single_char_valid_prefixes.add(letter.lower())
|
||||||
# there are no valid 3 character prefixes, but there are valid
|
except SyntaxError:
|
||||||
# 4 character prefixes. That seems extremely unlikely.
|
pass
|
||||||
|
|
||||||
# Note that the empty prefix is being included, because length
|
|
||||||
# starts at 0. That's expected, since StringPrefix includes
|
|
||||||
# the empty prefix.
|
|
||||||
|
|
||||||
|
# This logic assumes that all combinations of valid prefixes only use
|
||||||
|
# the characters that are valid single character prefixes. That seems
|
||||||
|
# like a valid assumption, but if it ever changes this will need
|
||||||
|
# adjusting.
|
||||||
valid_prefixes = set()
|
valid_prefixes = set()
|
||||||
for length in itertools.count():
|
for length in itertools.count():
|
||||||
num_at_this_length = 0
|
num_at_this_length = 0
|
||||||
for prefix in (
|
for prefix in (
|
||||||
"".join(l) for l in list(itertools.combinations(string.ascii_lowercase, length))
|
"".join(l)
|
||||||
|
for l in itertools.combinations(single_char_valid_prefixes, length)
|
||||||
):
|
):
|
||||||
for t in itertools.permutations(prefix):
|
for t in itertools.permutations(prefix):
|
||||||
for u in itertools.product(*[(c, c.upper()) for c in t]):
|
for u in itertools.product(*[(c, c.upper()) for c in t]):
|
||||||
p = ''.join(u)
|
p = "".join(u)
|
||||||
if p == "not":
|
if p == "not":
|
||||||
# 'not' can never be a string prefix,
|
# 'not' can never be a string prefix,
|
||||||
# because it's a valid expression: not ""
|
# because it's a valid expression: not ""
|
||||||
|
@ -3289,9 +3290,26 @@ class StringPrefixTest(unittest.TestCase):
|
||||||
except SyntaxError:
|
except SyntaxError:
|
||||||
pass
|
pass
|
||||||
if num_at_this_length == 0:
|
if num_at_this_length == 0:
|
||||||
break
|
return valid_prefixes
|
||||||
|
|
||||||
self.assertEqual(defined_prefixes, valid_prefixes)
|
|
||||||
|
def test_prefixes(self):
|
||||||
|
# Get the list of defined string prefixes. I don't see an
|
||||||
|
# obvious documented way of doing this, but probably the best
|
||||||
|
# thing is to split apart tokenize.StringPrefix.
|
||||||
|
|
||||||
|
# Make sure StringPrefix begins and ends in parens. We're
|
||||||
|
# assuming it's of the form "(a|b|ab)", if a, b, and cd are
|
||||||
|
# valid string prefixes.
|
||||||
|
self.assertEqual(tokenize.StringPrefix[0], '(')
|
||||||
|
self.assertEqual(tokenize.StringPrefix[-1], ')')
|
||||||
|
|
||||||
|
# Then split apart everything else by '|'.
|
||||||
|
defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))
|
||||||
|
|
||||||
|
# Now compute the actual allowed string prefixes and compare
|
||||||
|
# to what is defined in the tokenize module.
|
||||||
|
self.assertEqual(defined_prefixes, self.determine_valid_prefixes())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue