gh-131178: Add tests for tokenize command-line interface (#131274)

2025-07-07 19:35:27 +00:00 · 2025-03-27 20:04:16 +04:00 · 2025-03-27 20:04:16 +04:00 · 9b83670f0f
commit 9b83670f0f
parent 8614f86b71
2 changed files with 83 additions and 3 deletions
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@ -1,5 +1,7 @@
+import contextlib
 import os
 import re
+import tempfile
 import token
 import tokenize
 import unittest
@ -3178,5 +3180,83 @@ class CTokenizerBufferTests(unittest.TestCase):
            run_test_script(file_name)


+class CommandLineTest(unittest.TestCase):
+    def setUp(self):
+        self.filename = tempfile.mktemp()
+        self.addCleanup(os_helper.unlink, self.filename)
+
+    @staticmethod
+    def text_normalize(string):
+        """Dedent *string* and strip it from its surrounding whitespaces.
+
+        This method is used by the other utility functions so that any
+        string to write or to match against can be freely indented.
+        """
+        return re.sub(r'\s+', ' ', string).strip()
+
+    def set_source(self, content):
+        with open(self.filename, 'w') as fp:
+            fp.write(content)
+
+    def invoke_tokenize(self, *flags):
+        output = StringIO()
+        with contextlib.redirect_stdout(output):
+            tokenize._main(args=[*flags, self.filename])
+        return self.text_normalize(output.getvalue())
+
+    def check_output(self, source, expect, *flags):
+        with self.subTest(source=source, flags=flags):
+            self.set_source(source)
+            res = self.invoke_tokenize(*flags)
+            expect = self.text_normalize(expect)
+            self.assertListEqual(res.splitlines(), expect.splitlines())
+
+    def test_invocation(self):
+        # test various combinations of parameters
+        base_flags = ('-e', '--exact')
+
+        self.set_source('''
+            def f():
+                print(x)
+                return None
+        ''')
+
+        for flag in base_flags:
+            with self.subTest(args=flag):
+                _ = self.invoke_tokenize(flag)
+
+        with self.assertRaises(SystemExit):
+            # suppress argparse error message
+            with contextlib.redirect_stderr(StringIO()):
+                _ = self.invoke_tokenize('--unknown')
+
+    def test_without_flag(self):
+        # test 'python -m tokenize source.py'
+        source = 'a = 1'
+        expect = '''
+            0,0-0,0:            ENCODING       'utf-8'
+            1,0-1,1:            NAME           'a'
+            1,2-1,3:            OP             '='
+            1,4-1,5:            NUMBER         '1'
+            1,5-1,6:            NEWLINE        ''
+            2,0-2,0:            ENDMARKER      ''
+        '''
+        self.check_output(source, expect)
+
+    def test_exact_flag(self):
+        # test 'python -m tokenize -e/--exact source.py'
+        source = 'a = 1'
+        expect = '''
+            0,0-0,0:            ENCODING       'utf-8'
+            1,0-1,1:            NAME           'a'
+            1,2-1,3:            EQUAL          '='
+            1,4-1,5:            NUMBER         '1'
+            1,5-1,6:            NEWLINE        ''
+            2,0-2,0:            ENDMARKER      ''
+        '''
+        for flag in ['-e', '--exact']:
+            self.check_output(source, expect, flag)
+
+
 if __name__ == "__main__":
    unittest.main()
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@ -499,7 +499,7 @@ def generate_tokens(readline):
    """
    return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)

-def main():
+def _main(args=None):
    import argparse

    # Helper error handling routines
@ -524,7 +524,7 @@ def main():
                        help='the file to tokenize; defaults to stdin')
    parser.add_argument('-e', '--exact', dest='exact', action='store_true',
                        help='display token names using the exact type')
-    args = parser.parse_args()
+    args = parser.parse_args(args)

    try:
        # Tokenize the input
@ -589,4 +589,4 @@ def _generate_tokens_from_c_tokenizer(source, encoding=None, extra_tokens=False)


 if __name__ == "__main__":
-    main()
+    _main()