gh-131178: Add tests for tokenize command-line interface (#131274)

2025-10-27 16:57:08 +00:00 · 2025-03-27 20:04:16 +04:00 · 2025-03-27 20:04:16 +04:00 · 9b83670f0f
commit 9b83670f0f
parent 8614f86b71
2 changed files with 83 additions and 3 deletions
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@ -1,5 +1,7 @@
 import contextlib
 import os
 import re
 import tempfile
 import token
 import tokenize
 import unittest
@ -3178,5 +3180,83 @@ class CTokenizerBufferTests(unittest.TestCase):
            run_test_script(file_name)
 class CommandLineTest(unittest.TestCase):
    def setUp(self):
        self.filename = tempfile.mktemp()
        self.addCleanup(os_helper.unlink, self.filename)
    @staticmethod
    def text_normalize(string):
        """Dedent *string* and strip it from its surrounding whitespaces.
        This method is used by the other utility functions so that any
        string to write or to match against can be freely indented.
        """
        return re.sub(r'\s+', ' ', string).strip()
    def set_source(self, content):
        with open(self.filename, 'w') as fp:
            fp.write(content)
    def invoke_tokenize(self, *flags):
        output = StringIO()
        with contextlib.redirect_stdout(output):
            tokenize._main(args=[*flags, self.filename])
        return self.text_normalize(output.getvalue())
    def check_output(self, source, expect, *flags):
        with self.subTest(source=source, flags=flags):
            self.set_source(source)
            res = self.invoke_tokenize(*flags)
            expect = self.text_normalize(expect)
            self.assertListEqual(res.splitlines(), expect.splitlines())
    def test_invocation(self):
        # test various combinations of parameters
        base_flags = ('-e', '--exact')
        self.set_source('''
            def f():
                print(x)
                return None
        ''')
        for flag in base_flags:
            with self.subTest(args=flag):
                _ = self.invoke_tokenize(flag)
        with self.assertRaises(SystemExit):
            # suppress argparse error message
            with contextlib.redirect_stderr(StringIO()):
                _ = self.invoke_tokenize('--unknown')
    def test_without_flag(self):
        # test 'python -m tokenize source.py'
        source = 'a = 1'
        expect = '''
            0,0-0,0:            ENCODING       'utf-8'
            1,0-1,1:            NAME           'a'
            1,2-1,3:            OP             '='
            1,4-1,5:            NUMBER         '1'
            1,5-1,6:            NEWLINE        ''
            2,0-2,0:            ENDMARKER      ''
        '''
        self.check_output(source, expect)
    def test_exact_flag(self):
        # test 'python -m tokenize -e/--exact source.py'
        source = 'a = 1'
        expect = '''
            0,0-0,0:            ENCODING       'utf-8'
            1,0-1,1:            NAME           'a'
            1,2-1,3:            EQUAL          '='
            1,4-1,5:            NUMBER         '1'
            1,5-1,6:            NEWLINE        ''
            2,0-2,0:            ENDMARKER      ''
        '''
        for flag in ['-e', '--exact']:
            self.check_output(source, expect, flag)
 if __name__ == "__main__":
    unittest.main()
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@ -499,7 +499,7 @@ def generate_tokens(readline):
    """
    return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)
-def main():
+def _main(args=None):
    import argparse
    # Helper error handling routines
@ -524,7 +524,7 @@ def main():
                        help='the file to tokenize; defaults to stdin')
    parser.add_argument('-e', '--exact', dest='exact', action='store_true',
                        help='display token names using the exact type')
-    args = parser.parse_args()
+    args = parser.parse_args(args)
    try:
        # Tokenize the input
@ -589,4 +589,4 @@ def _generate_tokens_from_c_tokenizer(source, encoding=None, extra_tokens=False)
 if __name__ == "__main__":
-    main()
+    _main()