mirror of
https://github.com/python/cpython.git
synced 2025-08-24 10:45:53 +00:00
gh-105564: Don't include artificial newlines in the line attribute of tokens (#105565)
This commit is contained in:
parent
1dd267af64
commit
d7f46bcd98
4 changed files with 30 additions and 25 deletions
|
@ -552,14 +552,14 @@ class TestPegen(unittest.TestCase):
|
||||||
string="D",
|
string="D",
|
||||||
start=(1, 0),
|
start=(1, 0),
|
||||||
end=(1, 1),
|
end=(1, 1),
|
||||||
line="D A C A E\n",
|
line="D A C A E",
|
||||||
),
|
),
|
||||||
TokenInfo(
|
TokenInfo(
|
||||||
type=NAME,
|
type=NAME,
|
||||||
string="A",
|
string="A",
|
||||||
start=(1, 2),
|
start=(1, 2),
|
||||||
end=(1, 3),
|
end=(1, 3),
|
||||||
line="D A C A E\n",
|
line="D A C A E",
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
TokenInfo(
|
TokenInfo(
|
||||||
|
@ -567,7 +567,7 @@ class TestPegen(unittest.TestCase):
|
||||||
string="C",
|
string="C",
|
||||||
start=(1, 4),
|
start=(1, 4),
|
||||||
end=(1, 5),
|
end=(1, 5),
|
||||||
line="D A C A E\n",
|
line="D A C A E",
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
TokenInfo(
|
TokenInfo(
|
||||||
|
@ -575,11 +575,11 @@ class TestPegen(unittest.TestCase):
|
||||||
string="A",
|
string="A",
|
||||||
start=(1, 6),
|
start=(1, 6),
|
||||||
end=(1, 7),
|
end=(1, 7),
|
||||||
line="D A C A E\n",
|
line="D A C A E",
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
TokenInfo(
|
TokenInfo(
|
||||||
type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E\n"
|
type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -594,22 +594,22 @@ class TestPegen(unittest.TestCase):
|
||||||
string="B",
|
string="B",
|
||||||
start=(1, 0),
|
start=(1, 0),
|
||||||
end=(1, 1),
|
end=(1, 1),
|
||||||
line="B C A E\n",
|
line="B C A E",
|
||||||
),
|
),
|
||||||
TokenInfo(
|
TokenInfo(
|
||||||
type=NAME,
|
type=NAME,
|
||||||
string="C",
|
string="C",
|
||||||
start=(1, 2),
|
start=(1, 2),
|
||||||
end=(1, 3),
|
end=(1, 3),
|
||||||
line="B C A E\n",
|
line="B C A E",
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
TokenInfo(
|
TokenInfo(
|
||||||
type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E\n"
|
type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
TokenInfo(
|
TokenInfo(
|
||||||
type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E\n"
|
type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -655,10 +655,10 @@ class TestPegen(unittest.TestCase):
|
||||||
node,
|
node,
|
||||||
[
|
[
|
||||||
TokenInfo(
|
TokenInfo(
|
||||||
NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 .\n"
|
NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 ."
|
||||||
),
|
),
|
||||||
TokenInfo(
|
TokenInfo(
|
||||||
OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 .\n"
|
OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."
|
||||||
),
|
),
|
||||||
[
|
[
|
||||||
TokenInfo(
|
TokenInfo(
|
||||||
|
@ -666,7 +666,7 @@ class TestPegen(unittest.TestCase):
|
||||||
string="12",
|
string="12",
|
||||||
start=(1, 6),
|
start=(1, 6),
|
||||||
end=(1, 8),
|
end=(1, 8),
|
||||||
line="foo = 12 + 12 .\n",
|
line="foo = 12 + 12 .",
|
||||||
),
|
),
|
||||||
[
|
[
|
||||||
[
|
[
|
||||||
|
@ -675,14 +675,14 @@ class TestPegen(unittest.TestCase):
|
||||||
string="+",
|
string="+",
|
||||||
start=(1, 9),
|
start=(1, 9),
|
||||||
end=(1, 10),
|
end=(1, 10),
|
||||||
line="foo = 12 + 12 .\n",
|
line="foo = 12 + 12 .",
|
||||||
),
|
),
|
||||||
TokenInfo(
|
TokenInfo(
|
||||||
NUMBER,
|
NUMBER,
|
||||||
string="12",
|
string="12",
|
||||||
start=(1, 11),
|
start=(1, 11),
|
||||||
end=(1, 13),
|
end=(1, 13),
|
||||||
line="foo = 12 + 12 .\n",
|
line="foo = 12 + 12 .",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
|
@ -734,9 +734,9 @@ class TestPegen(unittest.TestCase):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
node,
|
node,
|
||||||
[
|
[
|
||||||
TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)\n"),
|
TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
|
||||||
TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)\n"),
|
TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"),
|
||||||
TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)\n"),
|
TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -1229,7 +1229,7 @@ class Test_Tokenize(TestCase):
|
||||||
# skip the initial encoding token and the end tokens
|
# skip the initial encoding token and the end tokens
|
||||||
tokens = list(_generate_tokens_from_c_tokenizer(readline().__next__, encoding='utf-8',
|
tokens = list(_generate_tokens_from_c_tokenizer(readline().__next__, encoding='utf-8',
|
||||||
extra_tokens=True))[:-2]
|
extra_tokens=True))[:-2]
|
||||||
expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"\n')]
|
expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
|
||||||
self.assertEqual(tokens, expected_tokens,
|
self.assertEqual(tokens, expected_tokens,
|
||||||
"bytes not decoded with encoding")
|
"bytes not decoded with encoding")
|
||||||
|
|
||||||
|
@ -1638,8 +1638,8 @@ class TestTokenize(TestCase):
|
||||||
TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
|
TokenInfo(type=token.NUMBER, string='1', start=(1, 4), end=(1, 5), line='b = 1\n'),
|
||||||
TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
|
TokenInfo(type=token.NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='b = 1\n'),
|
||||||
TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
|
TokenInfo(type=token.NL, string='\n', start=(2, 0), end=(2, 1), line='\n'),
|
||||||
TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test\n'),
|
TokenInfo(type=token.COMMENT, string='#test', start=(3, 0), end=(3, 5), line='#test'),
|
||||||
TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test\n'),
|
TokenInfo(type=token.NL, string='', start=(3, 5), end=(3, 6), line='#test'),
|
||||||
TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
|
TokenInfo(type=token.ENDMARKER, string='', start=(4, 0), end=(4, 0), line='')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1653,7 +1653,7 @@ class TestTokenize(TestCase):
|
||||||
TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
|
TokenInfo(token.ENCODING, string='utf-8', start=(0, 0), end=(0, 0), line=''),
|
||||||
TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
|
TokenInfo(token.NAME, string='a', start=(1, 0), end=(1, 1), line='a\n'),
|
||||||
TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
|
TokenInfo(token.NEWLINE, string='\n', start=(1, 1), end=(1, 2), line='a\n'),
|
||||||
TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' \n'),
|
TokenInfo(token.NL, string='', start=(2, 1), end=(2, 2), line=' '),
|
||||||
TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
|
TokenInfo(token.ENDMARKER, string='', start=(3, 0), end=(3, 0), line='')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1889,10 +1889,10 @@ class CTokenizeTest(TestCase):
|
||||||
yield "1+1".encode(encoding)
|
yield "1+1".encode(encoding)
|
||||||
|
|
||||||
expected = [
|
expected = [
|
||||||
TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1\n'),
|
TokenInfo(type=NUMBER, string='1', start=(1, 0), end=(1, 1), line='1+1'),
|
||||||
TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1\n'),
|
TokenInfo(type=OP, string='+', start=(1, 1), end=(1, 2), line='1+1'),
|
||||||
TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1\n'),
|
TokenInfo(type=NUMBER, string='1', start=(1, 2), end=(1, 3), line='1+1'),
|
||||||
TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1\n'),
|
TokenInfo(type=NEWLINE, string='', start=(1, 3), end=(1, 4), line='1+1'),
|
||||||
TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
|
TokenInfo(type=ENDMARKER, string='', start=(2, 0), end=(2, 0), line='')
|
||||||
]
|
]
|
||||||
for encoding in ["utf-8", "latin-1", "utf-16"]:
|
for encoding in ["utf-8", "latin-1", "utf-16"]:
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Don't include artificil newlines in the ``line`` attribute of tokens in the
|
||||||
|
APIs of the :mod:`tokenize` module. Patch by Pablo Galindo
|
|
@ -206,6 +206,9 @@ tokenizeriter_next(tokenizeriterobject *it)
|
||||||
line = PyUnicode_FromString("");
|
line = PyUnicode_FromString("");
|
||||||
} else {
|
} else {
|
||||||
Py_ssize_t size = it->tok->inp - line_start;
|
Py_ssize_t size = it->tok->inp - line_start;
|
||||||
|
if (size >= 1 && it->tok->implicit_newline) {
|
||||||
|
size -= 1;
|
||||||
|
}
|
||||||
line = PyUnicode_DecodeUTF8(line_start, size, "replace");
|
line = PyUnicode_DecodeUTF8(line_start, size, "replace");
|
||||||
}
|
}
|
||||||
if (line == NULL) {
|
if (line == NULL) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue