Issue #20387: Merge patch and test

2025-09-26 18:29:57 +00:00 · 2015-06-28 10:23:11 -04:00 · 2015-06-28 10:23:11 -04:00 · 80c29ac1ea
commit 80c29ac1ea
parent 101ff3541c d1d628d552
3 changed files with 40 additions and 1 deletions
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@ -5,6 +5,8 @@ The tests can be really simple. Given a small fragment of source
 code, print out a table with tokens. The ENDMARKER is omitted for
 brevity.
    >>> import glob
    >>> dump_tokens("1 + 1")
    ENCODING   'utf-8'       (0, 0) (0, 0)
    NUMBER     '1'           (1, 0) (1, 1)
@ -647,7 +649,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
                     open as tokenize_open, Untokenizer)
 from io import BytesIO
 from unittest import TestCase, mock
-import os, sys, glob
+import os
 import token
 def dump_tokens(s):
@ -1227,6 +1229,22 @@ class UntokenizeTest(TestCase):
        self.assertEqual(untokenize(iter(tokens)), b'Hello ')
 class TestRoundtrip(TestCase):
    def roundtrip(self, code):
        if isinstance(code, str):
            code = code.encode('utf-8')
        return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
    def test_indentation_semantics_retained(self):
        """
        Ensure that although whitespace might be mutated in a roundtrip,
        the semantic meaning of the indentation remains consistent.
        """
        code = "if False:\n\tx=3\n\tx=3\n"
        codelines = self.roundtrip(code).split('\n')
        self.assertEqual(codelines[1], codelines[2])
 __test__ = {"doctests" : doctests, 'decistmt': decistmt}
 def test_main():
@ -1237,6 +1255,7 @@ def test_main():
    support.run_unittest(TestDetectEncoding)
    support.run_unittest(TestTokenize)
    support.run_unittest(UntokenizeTest)
    support.run_unittest(TestRoundtrip)
 if __name__ == "__main__":
    test_main()
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@ -244,6 +244,8 @@ class Untokenizer:
    def untokenize(self, iterable):
        it = iter(iterable)
        indents = []
        startline = False
        for t in it:
            if len(t) == 2:
                self.compat(t, it)
@ -254,6 +256,21 @@ class Untokenizer:
                continue
            if tok_type == ENDMARKER:
                break
            if tok_type == INDENT:
                indents.append(token)
                continue
            elif tok_type == DEDENT:
                indents.pop()
                self.prev_row, self.prev_col = end
                continue
            elif tok_type in (NEWLINE, NL):
                startline = True
            elif startline and indents:
                indent = indents[-1]
                if start[1] >= len(indent):
                    self.tokens.append(indent)
                    self.prev_col = len(indent)
                startline = False
            self.add_whitespace(start)
            self.tokens.append(token)
            self.prev_row, self.prev_col = end
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -60,6 +60,9 @@ Core and Builtins
 Library
 -------
 - Issue #20387: Restore semantic round-trip correctness in tokenize/untokenize
  for tab-indented blocks.
 - Issue #24336: The contextmanager decorator now works with functions with
  keyword arguments called "func" and "self".  Patch by Martin Panter.