gh-71299: Fix __all__ in tokenize (#105907)

Co-authored-by: Unit03
This commit is contained in:
Lysandros Nikolaou 2023-06-19 13:31:57 +02:00 committed by GitHub
parent 581619941e
commit ab3823a97b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 137 additions and 135 deletions

View file

@ -1,20 +1,16 @@
from test import support
from test.support import os_helper
from tokenize import (tokenize, untokenize, NUMBER, NAME, OP,
STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
open as tokenize_open, Untokenizer, generate_tokens,
NEWLINE, _generate_tokens_from_c_tokenizer, DEDENT, TokenInfo,
TokenError)
from io import BytesIO, StringIO
import os
import token
import tokenize
import unittest
from io import BytesIO, StringIO
from textwrap import dedent
from unittest import TestCase, mock
from test import support
from test.support import os_helper
from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
INVALID_UNDERSCORE_LITERALS)
from test.support import os_helper
from test.support.script_helper import run_test_script, make_script, run_python_until_end
import os
import token
# Converts a source string into a list of textual representation
# of the tokens such as:
@ -26,12 +22,12 @@ def stringify_tokens_from_source(token_generator, source_string):
missing_trailing_nl = source_string[-1] not in '\r\n'
for type, token, start, end, line in token_generator:
if type == ENDMARKER:
if type == tokenize.ENDMARKER:
break
# Ignore the new line on the last line if the input lacks one
if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
if missing_trailing_nl and type == tokenize.NEWLINE and end[0] == num_lines:
continue
type = tok_name[type]
type = tokenize.tok_name[type]
result.append(f" {type:10} {token!r:13} {start} {end}")
return result
@ -47,7 +43,7 @@ class TokenizeTest(TestCase):
# Format the tokens in s in a table format.
# The ENDMARKER and final NEWLINE are omitted.
f = BytesIO(s.encode('utf-8'))
result = stringify_tokens_from_source(tokenize(f.readline), s)
result = stringify_tokens_from_source(tokenize.tokenize(f.readline), s)
self.assertEqual(result,
[" ENCODING 'utf-8' (0, 0) (0, 0)"] +
expected.rstrip().splitlines())
@ -57,27 +53,27 @@ class TokenizeTest(TestCase):
yield "sdfosdg"
yield "sdfosdg"
with self.assertRaises(TypeError):
list(tokenize(gen().__next__))
list(tokenize.tokenize(gen().__next__))
def gen():
yield b"sdfosdg"
yield b"sdfosdg"
with self.assertRaises(TypeError):
list(generate_tokens(gen().__next__))
list(tokenize.generate_tokens(gen().__next__))
def gen():
yield "sdfosdg"
1/0
with self.assertRaises(ZeroDivisionError):
list(generate_tokens(gen().__next__))
list(tokenize.generate_tokens(gen().__next__))
def test_implicit_newline(self):
# Make sure that the tokenizer puts in an implicit NEWLINE
# when the input lacks a trailing new line.
f = BytesIO("x".encode('utf-8'))
tokens = list(tokenize(f.readline))
self.assertEqual(tokens[-2].type, NEWLINE)
self.assertEqual(tokens[-1].type, ENDMARKER)
tokens = list(tokenize.tokenize(f.readline))
self.assertEqual(tokens[-2].type, tokenize.NEWLINE)
self.assertEqual(tokens[-1].type, tokenize.ENDMARKER)
def test_basic(self):
self.check_tokenize("1 + 1", """\
@ -139,7 +135,7 @@ def k(x):
with self.assertRaisesRegex(IndentationError,
"unindent does not match any "
"outer indentation level") as e:
for tok in tokenize(readline):
for tok in tokenize.tokenize(readline):
pass
self.assertEqual(e.exception.lineno, 3)
self.assertEqual(e.exception.filename, '<string>')
@ -275,8 +271,8 @@ def k(x):
def test_underscore_literals(self):
def number_token(s):
f = BytesIO(s.encode('utf-8'))
for toktype, token, start, end, line in tokenize(f.readline):
if toktype == NUMBER:
for toktype, token, start, end, line in tokenize.tokenize(f.readline):
if toktype == tokenize.NUMBER:
return token
return 'invalid token'
for lit in VALID_UNDERSCORE_LITERALS:
@ -292,7 +288,7 @@ def k(x):
continue
try:
number_token(lit)
except TokenError:
except tokenize.TokenError:
continue
self.assertNotEqual(number_token(lit), lit)
@ -1150,24 +1146,24 @@ class GenerateTokensTest(TokenizeTest):
# Format the tokens in s in a table format.
# The ENDMARKER and final NEWLINE are omitted.
f = StringIO(s)
result = stringify_tokens_from_source(generate_tokens(f.readline), s)
result = stringify_tokens_from_source(tokenize.generate_tokens(f.readline), s)
self.assertEqual(result, expected.rstrip().splitlines())
def decistmt(s):
result = []
g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
g = tokenize.tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
for toknum, tokval, _, _, _ in g:
if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
if toknum == tokenize.NUMBER and '.' in tokval: # replace NUMBER tokens
result.extend([
(NAME, 'Decimal'),
(OP, '('),
(STRING, repr(tokval)),
(OP, ')')
(tokenize.NAME, 'Decimal'),
(tokenize.OP, '('),
(tokenize.STRING, repr(tokval)),
(tokenize.OP, ')')
])
else:
result.append((toknum, tokval))
return untokenize(result).decode('utf-8').strip()
return tokenize.untokenize(result).decode('utf-8').strip()
class TestMisc(TestCase):
@ -1191,6 +1187,13 @@ class TestMisc(TestCase):
self.assertEqual(eval(decistmt(s)),
Decimal('-3.217160342717258261933904529E-7'))
def test___all__(self):
expected = token.__all__ + [
"TokenInfo", "TokenError", "generate_tokens",
"detect_encoding", "untokenize", "open", "tokenize",
]
self.assertCountEqual(tokenize.__all__, expected)
class TestTokenizerAdheresToPep0263(TestCase):
"""
@ -1245,9 +1248,10 @@ class Test_Tokenize(TestCase):
yield b''
# skip the initial encoding token and the end tokens
tokens = list(_generate_tokens_from_c_tokenizer(readline().__next__, encoding='utf-8',
extra_tokens=True))[:-2]
expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
tokens = list(tokenize._generate_tokens_from_c_tokenizer(readline().__next__,
encoding='utf-8',
extra_tokens=True))[:-2]
expected_tokens = [tokenize.TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
self.assertEqual(tokens, expected_tokens,
"bytes not decoded with encoding")
@ -1271,7 +1275,7 @@ class TestDetectEncoding(TestCase):
b'print(something)\n',
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'utf-8')
self.assertEqual(consumed_lines, list(lines[:2]))
@ -1281,7 +1285,7 @@ class TestDetectEncoding(TestCase):
b'print(something)\n',
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'utf-8-sig')
self.assertEqual(consumed_lines,
[b'# something\n', b'print(something)\n'])
@ -1292,7 +1296,7 @@ class TestDetectEncoding(TestCase):
b'print(something)\n',
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'iso-8859-1')
self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
@ -1302,7 +1306,7 @@ class TestDetectEncoding(TestCase):
b'print(something)\n',
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'utf-8-sig')
self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
@ -1313,7 +1317,7 @@ class TestDetectEncoding(TestCase):
b'do_something(else)\n'
)
readline = self.get_readline(lines)
self.assertRaises(SyntaxError, detect_encoding, readline)
self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
def test_cookie_second_line_no_bom(self):
lines = (
@ -1322,7 +1326,7 @@ class TestDetectEncoding(TestCase):
b'print(something)\n',
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'ascii')
expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
self.assertEqual(consumed_lines, expected)
@ -1334,7 +1338,7 @@ class TestDetectEncoding(TestCase):
b'print(something)\n',
b'do_something(else)\n'
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'utf-8-sig')
self.assertEqual(consumed_lines,
[b'#! something\n', b'f# coding=utf-8\n'])
@ -1347,7 +1351,7 @@ class TestDetectEncoding(TestCase):
b'do_something(else)\n'
)
readline = self.get_readline(lines)
self.assertRaises(SyntaxError, detect_encoding, readline)
self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
def test_cookie_second_line_noncommented_first_line(self):
lines = (
@ -1355,7 +1359,7 @@ class TestDetectEncoding(TestCase):
b'# vim: set fileencoding=iso8859-15 :\n',
b"print('\xe2\x82\xac')\n"
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'utf-8')
expected = [b"print('\xc2\xa3')\n"]
self.assertEqual(consumed_lines, expected)
@ -1366,7 +1370,7 @@ class TestDetectEncoding(TestCase):
b'# vim: set fileencoding=iso8859-15 :\n',
b"print('\xe2\x82\xac')\n"
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'iso8859-15')
expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
self.assertEqual(consumed_lines, expected)
@ -1377,7 +1381,7 @@ class TestDetectEncoding(TestCase):
b'# vim: set fileencoding=iso8859-15 :\n',
b"print('\xe2\x82\xac')\n"
)
encoding, consumed_lines = detect_encoding(self.get_readline(lines))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
self.assertEqual(encoding, 'iso8859-15')
expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
self.assertEqual(consumed_lines, expected)
@ -1394,7 +1398,7 @@ class TestDetectEncoding(TestCase):
b"print(things)\n",
b"do_something += 4\n")
rl = self.get_readline(lines)
found, consumed_lines = detect_encoding(rl)
found, consumed_lines = tokenize.detect_encoding(rl)
self.assertEqual(found, "iso-8859-1")
def test_syntaxerror_latin1(self):
@ -1404,7 +1408,7 @@ class TestDetectEncoding(TestCase):
b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
)
readline = self.get_readline(lines)
self.assertRaises(SyntaxError, detect_encoding, readline)
self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
def test_utf8_normalization(self):
@ -1417,36 +1421,36 @@ class TestDetectEncoding(TestCase):
b"# coding: " + enc.encode("ascii") + b"\n",
b"1 + 3\n")
rl = self.get_readline(lines)
found, consumed_lines = detect_encoding(rl)
found, consumed_lines = tokenize.detect_encoding(rl)
self.assertEqual(found, "utf-8")
def test_short_files(self):
readline = self.get_readline((b'print(something)\n',))
encoding, consumed_lines = detect_encoding(readline)
encoding, consumed_lines = tokenize.detect_encoding(readline)
self.assertEqual(encoding, 'utf-8')
self.assertEqual(consumed_lines, [b'print(something)\n'])
encoding, consumed_lines = detect_encoding(self.get_readline(()))
encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(()))
self.assertEqual(encoding, 'utf-8')
self.assertEqual(consumed_lines, [])
readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
encoding, consumed_lines = detect_encoding(readline)
encoding, consumed_lines = tokenize.detect_encoding(readline)
self.assertEqual(encoding, 'utf-8-sig')
self.assertEqual(consumed_lines, [b'print(something)\n'])
readline = self.get_readline((b'\xef\xbb\xbf',))
encoding, consumed_lines = detect_encoding(readline)
encoding, consumed_lines = tokenize.detect_encoding(readline)
self.assertEqual(encoding, 'utf-8-sig')
self.assertEqual(consumed_lines, [])
readline = self.get_readline((b'# coding: bad\n',))
self.assertRaises(SyntaxError, detect_encoding, readline)
self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
def test_false_encoding(self):
# Issue 18873: "Encoding" detected in non-comment lines
readline = self.get_readline((b'print("#coding=fake")',))
encoding, consumed_lines = detect_encoding(readline)
encoding, consumed_lines = tokenize.detect_encoding(readline)
self.assertEqual(encoding, 'utf-8')
self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
@ -1459,14 +1463,14 @@ class TestDetectEncoding(TestCase):
with open(filename, 'w', encoding=encoding) as fp:
print("# coding: %s" % encoding, file=fp)
print("print('euro:\u20ac')", file=fp)
with tokenize_open(filename) as fp:
with tokenize.open(filename) as fp:
self.assertEqual(fp.encoding, encoding)
self.assertEqual(fp.mode, 'r')
# test BOM (no coding cookie)
with open(filename, 'w', encoding='utf-8-sig') as fp:
print("print('euro:\u20ac')", file=fp)
with tokenize_open(filename) as fp:
with tokenize.open(filename) as fp:
self.assertEqual(fp.encoding, 'utf-8-sig')
self.assertEqual(fp.mode, 'r')
@ -1493,16 +1497,16 @@ class TestDetectEncoding(TestCase):
ins = Bunk(lines, path)
# Make sure lacking a name isn't an issue.
del ins.name
detect_encoding(ins.readline)
tokenize.detect_encoding(ins.readline)
with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
ins = Bunk(lines, path)
detect_encoding(ins.readline)
tokenize.detect_encoding(ins.readline)
def test_open_error(self):
# Issue #23840: open() must close the binary file on error
m = BytesIO(b'#coding:xxx')
with mock.patch('tokenize._builtin_open', return_value=m):
self.assertRaises(SyntaxError, tokenize_open, 'foobar')
self.assertRaises(SyntaxError, tokenize.open, 'foobar')
self.assertTrue(m.closed)
@ -1542,7 +1546,7 @@ class TestTokenize(TestCase):
tokenize_module.detect_encoding = mock_detect_encoding
tokenize_module._generate_tokens_from_c_tokenizer = mock__tokenize
try:
results = tokenize(mock_readline)
results = tokenize.tokenize(mock_readline)
self.assertEqual(list(results)[1:],
[b'first', b'second', b'1', b'2', b'3', b'4'])
finally:
@ -1559,23 +1563,23 @@ class TestTokenize(TestCase):
buf = '\n'.join(buf)
# Test that 500 consequent, one-line defs is OK
toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
toks = list(tokenize.tokenize(BytesIO(buf.encode('utf-8')).readline))
self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER
# [-2] is always NEWLINE
def assertExactTypeEqual(self, opstr, *optypes):
tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
tokens = list(tokenize.tokenize(BytesIO(opstr.encode('utf-8')).readline))
num_optypes = len(optypes)
self.assertEqual(len(tokens), 3 + num_optypes)
self.assertEqual(tok_name[tokens[0].exact_type],
tok_name[ENCODING])
self.assertEqual(tokenize.tok_name[tokens[0].exact_type],
tokenize.tok_name[tokenize.ENCODING])
for i in range(num_optypes):
self.assertEqual(tok_name[tokens[i + 1].exact_type],
tok_name[optypes[i]])
self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type],
tok_name[token.NEWLINE])
self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type],
tok_name[token.ENDMARKER])
self.assertEqual(tokenize.tok_name[tokens[i + 1].exact_type],
tokenize.tok_name[optypes[i]])
self.assertEqual(tokenize.tok_name[tokens[1 + num_optypes].exact_type],
tokenize.tok_name[token.NEWLINE])
self.assertEqual(tokenize.tok_name[tokens[2 + num_optypes].exact_type],
tokenize.tok_name[token.ENDMARKER])
def test_exact_type(self):
self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
@ -1625,11 +1629,11 @@ class TestTokenize(TestCase):
self.assertExactTypeEqual('@=', token.ATEQUAL)
self.assertExactTypeEqual('a**2+b**2==c**2',
NAME, token.DOUBLESTAR, NUMBER,
tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER,
token.PLUS,
NAME, token.DOUBLESTAR, NUMBER,
tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER,
token.EQEQUAL,
NAME, token.DOUBLESTAR, NUMBER)
tokenize.NAME, token.DOUBLESTAR, tokenize.NUMBER)
self.assertExactTypeEqual('{1, 2, 3}',
token.LBRACE,
token.NUMBER, token.COMMA,
@ -1650,32 +1654,32 @@ class TestTokenize(TestCase):
# See http://bugs.python.org/issue44667
source = 'b = 1\n\n#test'
expected_tokens = [
TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'),
TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'),
TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'),
TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'),
TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
tokenize.TokenInfo(type=token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
tokenize.TokenInfo(type=token.NAME, string='b', start=(1, 0), end=(1, 1), line='b = 1\n'),
tokenize.TokenInfo(type=token.OP, string='=', start=(1, 2), end=(1, 3), line='b = 1\n'),
tokenize.TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
tokenize.TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
tokenize.TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
tokenize.TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'),
tokenize.TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'),
tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
]
tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
tokens = list(tokenize.tokenize(BytesIO(source.encode('utf-8')).readline))
self.assertEqual(tokens, expected_tokens)
def test_newline_and_space_at_the_end_of_the_source_without_newline(self):
# See https://github.com/python/cpython/issues/105435
source = 'a\n '
expected_tokens = [
TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '),
TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
tokenize.TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
tokenize.TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
tokenize.TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
tokenize.TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '),
tokenize.TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
]
tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline))
tokens = list(tokenize.tokenize(BytesIO(source.encode('utf-8')).readline))
self.assertEqual(tokens, expected_tokens)
def test_invalid_character_in_fstring_middle(self):
@ -1695,7 +1699,7 @@ class UntokenizeTest(TestCase):
def test_bad_input_order(self):
# raise if previous row
u = Untokenizer()
u = tokenize.Untokenizer()
u.prev_row = 2
u.prev_col = 2
with self.assertRaises(ValueError) as cm:
@ -1707,7 +1711,7 @@ class UntokenizeTest(TestCase):
def test_backslash_continuation(self):
# The problem is that <whitespace>\<newline> leaves no token
u = Untokenizer()
u = tokenize.Untokenizer()
u.prev_row = 1
u.prev_col = 1
u.tokens = []
@ -1719,17 +1723,17 @@ class UntokenizeTest(TestCase):
TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')
def test_iter_compat(self):
u = Untokenizer()
token = (NAME, 'Hello')
tokens = [(ENCODING, 'utf-8'), token]
u = tokenize.Untokenizer()
token = (tokenize.NAME, 'Hello')
tokens = [(tokenize.ENCODING, 'utf-8'), token]
u.compat(token, iter([]))
self.assertEqual(u.tokens, ["Hello "])
u = Untokenizer()
u = tokenize.Untokenizer()
self.assertEqual(u.untokenize(iter([token])), 'Hello ')
u = Untokenizer()
u = tokenize.Untokenizer()
self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
self.assertEqual(u.encoding, 'utf-8')
self.assertEqual(untokenize(iter(tokens)), b'Hello ')
self.assertEqual(tokenize.untokenize(iter(tokens)), b'Hello ')
class TestRoundtrip(TestCase):
@ -1752,17 +1756,17 @@ class TestRoundtrip(TestCase):
else:
code = f.read()
readline = iter(code.splitlines(keepends=True)).__next__
tokens5 = list(tokenize(readline))
tokens5 = list(tokenize.tokenize(readline))
tokens2 = [tok[:2] for tok in tokens5]
# Reproduce tokens2 from pairs
bytes_from2 = untokenize(tokens2)
bytes_from2 = tokenize.untokenize(tokens2)
readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
tokens2_from2 = [tok[:2] for tok in tokenize.tokenize(readline2)]
self.assertEqual(tokens2_from2, tokens2)
# Reproduce tokens2 from 5-tuples
bytes_from5 = untokenize(tokens5)
bytes_from5 = tokenize.untokenize(tokens5)
readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
tokens2_from5 = [tok[:2] for tok in tokenize.tokenize(readline5)]
self.assertEqual(tokens2_from5, tokens2)
def check_line_extraction(self, f):
@ -1771,8 +1775,8 @@ class TestRoundtrip(TestCase):
else:
code = f.read()
readline = iter(code.splitlines(keepends=True)).__next__
for tok in tokenize(readline):
if tok.type in {ENCODING, ENDMARKER}:
for tok in tokenize.tokenize(readline):
if tok.type in {tokenize.ENCODING, tokenize.ENDMARKER}:
continue
self.assertEqual(tok.string, tok.line[tok.start[1]: tok.end[1]])
@ -1878,7 +1882,7 @@ class TestRoundtrip(TestCase):
def roundtrip(self, code):
if isinstance(code, str):
code = code.encode('utf-8')
return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
return tokenize.untokenize(tokenize.tokenize(BytesIO(code).readline)).decode('utf-8')
def test_indentation_semantics_retained(self):
"""
@ -1896,27 +1900,27 @@ class InvalidPythonTests(TestCase):
# See issue #gh-105549
source = "2sin(x)"
expected_tokens = [
TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'),
TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'),
TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'),
TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'),
TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'),
TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'),
TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
tokenize.TokenInfo(type=token.NUMBER, string='2', start=(1, 0), end=(1, 1), line='2sin(x)'),
tokenize.TokenInfo(type=token.NAME, string='sin', start=(1, 1), end=(1, 4), line='2sin(x)'),
tokenize.TokenInfo(type=token.OP, string='(', start=(1, 4), end=(1, 5), line='2sin(x)'),
tokenize.TokenInfo(type=token.NAME, string='x', start=(1, 5), end=(1, 6), line='2sin(x)'),
tokenize.TokenInfo(type=token.OP, string=')', start=(1, 6), end=(1, 7), line='2sin(x)'),
tokenize.TokenInfo(type=token.NEWLINE, string='', start=(1, 7), end=(1, 8), line='2sin(x)'),
tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
]
tokens = list(generate_tokens(StringIO(source).readline))
tokens = list(tokenize.generate_tokens(StringIO(source).readline))
self.assertEqual(tokens, expected_tokens)
def test_number_starting_with_zero(self):
source = "01234"
expected_tokens = [
TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'),
TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'),
TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
tokenize.TokenInfo(type=token.NUMBER, string='01234', start=(1, 0), end=(1, 5), line='01234'),
tokenize.TokenInfo(type=token.NEWLINE, string='', start=(1, 5), end=(1, 6), line='01234'),
tokenize.TokenInfo(type=token.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
]
tokens = list(generate_tokens(StringIO(source).readline))
tokens = list(tokenize.generate_tokens(StringIO(source).readline))
self.assertEqual(tokens, expected_tokens)
class CTokenizeTest(TestCase):
@ -1926,7 +1930,7 @@ class CTokenizeTest(TestCase):
f = StringIO(s)
with self.subTest(source=s):
result = stringify_tokens_from_source(
_generate_tokens_from_c_tokenizer(f.readline), s
tokenize._generate_tokens_from_c_tokenizer(f.readline), s
)
self.assertEqual(result, expected.rstrip().splitlines())
@ -1935,15 +1939,15 @@ class CTokenizeTest(TestCase):
yield "1+1".encode(encoding)
expected = [
TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'),
TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1'),
TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'),
TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'),
TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
tokenize.TokenInfo(type=tokenize.NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'),
tokenize.TokenInfo(type=tokenize.OP, string='+', start=(1, 1), end=(1, 2), line='1+1'),
tokenize.TokenInfo(type=tokenize.NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'),
tokenize.TokenInfo(type=tokenize.NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'),
tokenize.TokenInfo(type=tokenize.ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
]
for encoding in ["utf-8", "latin-1", "utf-16"]:
with self.subTest(encoding=encoding):
tokens = list(_generate_tokens_from_c_tokenizer(
tokens = list(tokenize._generate_tokens_from_c_tokenizer(
readline(encoding).__next__,
extra_tokens=True,
encoding=encoding,
@ -2796,7 +2800,7 @@ async def f():
def test_invalid_syntax(self):
def get_tokens(string):
the_string = StringIO(string)
return list(_generate_tokens_from_c_tokenizer(the_string.readline))
return list(tokenize._generate_tokens_from_c_tokenizer(the_string.readline))
for case in [
"(1+2]",
@ -2832,7 +2836,7 @@ async def f():
"]",
]:
with self.subTest(case=case):
self.assertRaises(TokenError, get_tokens, case)
self.assertRaises(tokenize.TokenError, get_tokens, case)
def test_max_indent(self):
MAXINDENT = 100
@ -2844,14 +2848,14 @@ async def f():
valid = generate_source(MAXINDENT - 1)
the_input = StringIO(valid)
tokens = list(_generate_tokens_from_c_tokenizer(the_input.readline))
self.assertEqual(tokens[-2].type, DEDENT)
self.assertEqual(tokens[-1].type, ENDMARKER)
tokens = list(tokenize._generate_tokens_from_c_tokenizer(the_input.readline))
self.assertEqual(tokens[-2].type, tokenize.DEDENT)
self.assertEqual(tokens[-1].type, tokenize.ENDMARKER)
compile(valid, "<string>", "exec")
invalid = generate_source(MAXINDENT)
the_input = StringIO(invalid)
self.assertRaises(IndentationError, lambda: list(_generate_tokens_from_c_tokenizer(the_input.readline)))
self.assertRaises(IndentationError, lambda: list(tokenize._generate_tokens_from_c_tokenizer(the_input.readline)))
self.assertRaises(
IndentationError, compile, invalid, "<string>", "exec"
)
@ -2860,7 +2864,7 @@ async def f():
def get_tokens(string):
the_string = StringIO(string)
return [(kind, string) for (kind, string, *_)
in _generate_tokens_from_c_tokenizer(the_string.readline)]
in tokenize._generate_tokens_from_c_tokenizer(the_string.readline)]
code = dedent("""
def fib(n):

View file

@ -41,7 +41,7 @@ blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
import token
__all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding",
"untokenize", "TokenInfo"]
"untokenize", "TokenInfo", "open", "TokenError"]
del token
class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
@ -162,8 +162,6 @@ tabsize = 8
class TokenError(Exception): pass
class StopTokenizing(Exception): pass
class Untokenizer:
def __init__(self):