mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00
gh-134675: Add t-string prefixes to tokenizer module, lexical analysis doc, and add a test to make sure we catch this error in the future. (#134734)
* Add t-string prefixes to _all_string_prefixes, and add a test to make sure we catch this error in the future. * Update lexical analysis docs for t-string prefixes.
This commit is contained in:
parent
c60f39ada6
commit
08c78e02fa
3 changed files with 59 additions and 2 deletions
|
@ -489,8 +489,9 @@ String literals are described by the following lexical definitions:
|
||||||
|
|
||||||
.. productionlist:: python-grammar
|
.. productionlist:: python-grammar
|
||||||
stringliteral: [`stringprefix`](`shortstring` | `longstring`)
|
stringliteral: [`stringprefix`](`shortstring` | `longstring`)
|
||||||
stringprefix: "r" | "u" | "R" | "U" | "f" | "F"
|
stringprefix: "r" | "u" | "R" | "U" | "f" | "F" | "t" | "T"
|
||||||
: | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF"
|
: | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF"
|
||||||
|
: | "tr" | "Tr" | "tR" | "TR" | "rt" | "rT" | "Rt" | "RT"
|
||||||
shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"'
|
shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"'
|
||||||
longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""'
|
longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""'
|
||||||
shortstringitem: `shortstringchar` | `stringescapeseq`
|
shortstringitem: `shortstringchar` | `stringescapeseq`
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import itertools
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import string
|
||||||
import tempfile
|
import tempfile
|
||||||
import token
|
import token
|
||||||
import tokenize
|
import tokenize
|
||||||
|
@ -3238,5 +3240,59 @@ class CommandLineTest(unittest.TestCase):
|
||||||
self.check_output(source, expect, flag)
|
self.check_output(source, expect, flag)
|
||||||
|
|
||||||
|
|
||||||
|
class StringPrefixTest(unittest.TestCase):
|
||||||
|
def test_prefixes(self):
|
||||||
|
# Get the list of defined string prefixes. I don't see an
|
||||||
|
# obvious documented way of doing this, but probably the best
|
||||||
|
# thing is to split apart tokenize.StringPrefix.
|
||||||
|
|
||||||
|
# Make sure StringPrefix begins and ends in parens.
|
||||||
|
self.assertEqual(tokenize.StringPrefix[0], '(')
|
||||||
|
self.assertEqual(tokenize.StringPrefix[-1], ')')
|
||||||
|
|
||||||
|
# Then split apart everything else by '|'.
|
||||||
|
defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))
|
||||||
|
|
||||||
|
# Now compute the actual string prefixes, by exec-ing all
|
||||||
|
# valid prefix combinations, followed by an empty string.
|
||||||
|
|
||||||
|
# Try all prefix lengths until we find a length that has zero
|
||||||
|
# valid prefixes. This will miss the case where for example
|
||||||
|
# there are no valid 3 character prefixes, but there are valid
|
||||||
|
# 4 character prefixes. That seems extremely unlikely.
|
||||||
|
|
||||||
|
# Note that the empty prefix is being included, because length
|
||||||
|
# starts at 0. That's expected, since StringPrefix includes
|
||||||
|
# the empty prefix.
|
||||||
|
|
||||||
|
valid_prefixes = set()
|
||||||
|
for length in itertools.count():
|
||||||
|
num_at_this_length = 0
|
||||||
|
for prefix in (
|
||||||
|
"".join(l) for l in list(itertools.combinations(string.ascii_lowercase, length))
|
||||||
|
):
|
||||||
|
for t in itertools.permutations(prefix):
|
||||||
|
for u in itertools.product(*[(c, c.upper()) for c in t]):
|
||||||
|
p = ''.join(u)
|
||||||
|
if p == "not":
|
||||||
|
# 'not' can never be a string prefix,
|
||||||
|
# because it's a valid expression: not ""
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
eval(f'{p}""')
|
||||||
|
|
||||||
|
# No syntax error, so p is a valid string
|
||||||
|
# prefix.
|
||||||
|
|
||||||
|
valid_prefixes.add(p)
|
||||||
|
num_at_this_length += 1
|
||||||
|
except SyntaxError:
|
||||||
|
pass
|
||||||
|
if num_at_this_length == 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
self.assertEqual(defined_prefixes, valid_prefixes)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -86,7 +86,7 @@ def _all_string_prefixes():
|
||||||
# The valid string prefixes. Only contain the lower case versions,
|
# The valid string prefixes. Only contain the lower case versions,
|
||||||
# and don't contain any permutations (include 'fr', but not
|
# and don't contain any permutations (include 'fr', but not
|
||||||
# 'rf'). The various permutations will be generated.
|
# 'rf'). The various permutations will be generated.
|
||||||
_valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr']
|
_valid_string_prefixes = ['b', 'r', 'u', 'f', 't', 'br', 'fr', 'tr']
|
||||||
# if we add binary f-strings, add: ['fb', 'fbr']
|
# if we add binary f-strings, add: ['fb', 'fbr']
|
||||||
result = {''}
|
result = {''}
|
||||||
for prefix in _valid_string_prefixes:
|
for prefix in _valid_string_prefixes:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue