gh-131178: Add tests for tokenize command-line interface (#131274)

This commit is contained in:
Semyon Moroz 2025-03-27 20:04:16 +04:00 committed by GitHub
parent 8614f86b71
commit 9b83670f0f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 83 additions and 3 deletions

View file

@ -1,5 +1,7 @@
import contextlib
import os
import re
import tempfile
import token
import tokenize
import unittest
@ -3178,5 +3180,83 @@ class CTokenizerBufferTests(unittest.TestCase):
run_test_script(file_name)
class CommandLineTest(unittest.TestCase):
def setUp(self):
self.filename = tempfile.mktemp()
self.addCleanup(os_helper.unlink, self.filename)
@staticmethod
def text_normalize(string):
"""Dedent *string* and strip it from its surrounding whitespaces.
This method is used by the other utility functions so that any
string to write or to match against can be freely indented.
"""
return re.sub(r'\s+', ' ', string).strip()
def set_source(self, content):
with open(self.filename, 'w') as fp:
fp.write(content)
def invoke_tokenize(self, *flags):
output = StringIO()
with contextlib.redirect_stdout(output):
tokenize._main(args=[*flags, self.filename])
return self.text_normalize(output.getvalue())
def check_output(self, source, expect, *flags):
with self.subTest(source=source, flags=flags):
self.set_source(source)
res = self.invoke_tokenize(*flags)
expect = self.text_normalize(expect)
self.assertListEqual(res.splitlines(), expect.splitlines())
def test_invocation(self):
# test various combinations of parameters
base_flags = ('-e', '--exact')
self.set_source('''
def f():
print(x)
return None
''')
for flag in base_flags:
with self.subTest(args=flag):
_ = self.invoke_tokenize(flag)
with self.assertRaises(SystemExit):
# suppress argparse error message
with contextlib.redirect_stderr(StringIO()):
_ = self.invoke_tokenize('--unknown')
def test_without_flag(self):
# test 'python -m tokenize source.py'
source = 'a = 1'
expect = '''
0,0-0,0: ENCODING 'utf-8'
1,0-1,1: NAME 'a'
1,2-1,3: OP '='
1,4-1,5: NUMBER '1'
1,5-1,6: NEWLINE ''
2,0-2,0: ENDMARKER ''
'''
self.check_output(source, expect)
def test_exact_flag(self):
# test 'python -m tokenize -e/--exact source.py'
source = 'a = 1'
expect = '''
0,0-0,0: ENCODING 'utf-8'
1,0-1,1: NAME 'a'
1,2-1,3: EQUAL '='
1,4-1,5: NUMBER '1'
1,5-1,6: NEWLINE ''
2,0-2,0: ENDMARKER ''
'''
for flag in ['-e', '--exact']:
self.check_output(source, expect, flag)
if __name__ == "__main__":
unittest.main()

View file

@ -499,7 +499,7 @@ def generate_tokens(readline):
"""
return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)
def main():
def _main(args=None):
import argparse
# Helper error handling routines
@ -524,7 +524,7 @@ def main():
help='the file to tokenize; defaults to stdin')
parser.add_argument('-e', '--exact', dest='exact', action='store_true',
help='display token names using the exact type')
args = parser.parse_args()
args = parser.parse_args(args)
try:
# Tokenize the input
@ -589,4 +589,4 @@ def _generate_tokens_from_c_tokenizer(source, encoding=None, extra_tokens=False)
if __name__ == "__main__":
main()
_main()