mirror of
https://github.com/python/cpython.git
synced 2025-08-27 04:05:34 +00:00
gh-124188: Fix PyErr_ProgramTextObject() (GH-124189)
* Detect source file encoding. * Use the "replace" error handler even for UTF-8 (default) encoding. * Remove the BOM. * Fix detection of too long lines if they contain NUL. * Return the head rather than the tail for truncated long lines.
This commit is contained in:
parent
3c83f9958c
commit
e2f710792b
6 changed files with 328 additions and 117 deletions
|
@ -7,6 +7,7 @@ import unittest
|
|||
import pickle
|
||||
import weakref
|
||||
import errno
|
||||
from codecs import BOM_UTF8
|
||||
from textwrap import dedent
|
||||
|
||||
from test.support import (captured_stderr, check_impl_detail,
|
||||
|
@ -2011,16 +2012,20 @@ class ImportErrorTests(unittest.TestCase):
|
|||
self.assertEqual(exc.path, orig.path)
|
||||
|
||||
|
||||
def run_script(source):
|
||||
if isinstance(source, str):
|
||||
with open(TESTFN, 'w', encoding='utf-8') as testfile:
|
||||
testfile.write(dedent(source))
|
||||
else:
|
||||
with open(TESTFN, 'wb') as testfile:
|
||||
testfile.write(source)
|
||||
_rc, _out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
||||
return err.decode('utf-8').splitlines()
|
||||
|
||||
class AssertionErrorTests(unittest.TestCase):
|
||||
def tearDown(self):
|
||||
unlink(TESTFN)
|
||||
|
||||
def write_source(self, source):
|
||||
with open(TESTFN, 'w') as testfile:
|
||||
testfile.write(dedent(source))
|
||||
_rc, _out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
||||
return err.decode('utf-8').splitlines()
|
||||
|
||||
@force_not_colorized
|
||||
def test_assertion_error_location(self):
|
||||
cases = [
|
||||
|
@ -2052,11 +2057,32 @@ class AssertionErrorTests(unittest.TestCase):
|
|||
'AssertionError',
|
||||
],
|
||||
),
|
||||
('assert 1 > 2, "message"',
|
||||
('assert 1 > 2, "messäge"',
|
||||
[
|
||||
' assert 1 > 2, "message"',
|
||||
' assert 1 > 2, "messäge"',
|
||||
' ^^^^^',
|
||||
'AssertionError: message',
|
||||
'AssertionError: messäge',
|
||||
],
|
||||
),
|
||||
('assert 1 > 2, "messäge"'.encode(),
|
||||
[
|
||||
' assert 1 > 2, "messäge"',
|
||||
' ^^^^^',
|
||||
'AssertionError: messäge',
|
||||
],
|
||||
),
|
||||
('# coding: latin1\nassert 1 > 2, "messäge"'.encode('latin1'),
|
||||
[
|
||||
' assert 1 > 2, "messäge"',
|
||||
' ^^^^^',
|
||||
'AssertionError: messäge',
|
||||
],
|
||||
),
|
||||
(BOM_UTF8 + 'assert 1 > 2, "messäge"'.encode(),
|
||||
[
|
||||
' assert 1 > 2, "messäge"',
|
||||
' ^^^^^',
|
||||
'AssertionError: messäge',
|
||||
],
|
||||
),
|
||||
|
||||
|
@ -2094,8 +2120,8 @@ class AssertionErrorTests(unittest.TestCase):
|
|||
),
|
||||
]
|
||||
for source, expected in cases:
|
||||
with self.subTest(source):
|
||||
result = self.write_source(source)
|
||||
with self.subTest(source=source):
|
||||
result = run_script(source)
|
||||
self.assertEqual(result[-3:], expected)
|
||||
|
||||
@force_not_colorized
|
||||
|
@ -2125,12 +2151,14 @@ class AssertionErrorTests(unittest.TestCase):
|
|||
),
|
||||
]
|
||||
for source, expected in cases:
|
||||
with self.subTest(source):
|
||||
result = self.write_source(source)
|
||||
with self.subTest(source=source):
|
||||
result = run_script(source)
|
||||
self.assertEqual(result[-len(expected):], expected)
|
||||
|
||||
|
||||
class SyntaxErrorTests(unittest.TestCase):
|
||||
maxDiff = None
|
||||
|
||||
@force_not_colorized
|
||||
def test_range_of_offsets(self):
|
||||
cases = [
|
||||
|
@ -2223,45 +2251,106 @@ class SyntaxErrorTests(unittest.TestCase):
|
|||
the_exception = exc
|
||||
|
||||
def test_encodings(self):
|
||||
self.addCleanup(unlink, TESTFN)
|
||||
source = (
|
||||
'# -*- coding: cp437 -*-\n'
|
||||
'"¢¢¢¢¢¢" + f(4, x for x in range(1))\n'
|
||||
)
|
||||
try:
|
||||
with open(TESTFN, 'w', encoding='cp437') as testfile:
|
||||
testfile.write(source)
|
||||
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
||||
err = err.decode('utf-8').splitlines()
|
||||
|
||||
self.assertEqual(err[-3], ' "¢¢¢¢¢¢" + f(4, x for x in range(1))')
|
||||
self.assertEqual(err[-2], ' ^^^^^^^^^^^^^^^^^^^')
|
||||
finally:
|
||||
unlink(TESTFN)
|
||||
err = run_script(source.encode('cp437'))
|
||||
self.assertEqual(err[-3], ' "¢¢¢¢¢¢" + f(4, x for x in range(1))')
|
||||
self.assertEqual(err[-2], ' ^^^^^^^^^^^^^^^^^^^')
|
||||
|
||||
# Check backwards tokenizer errors
|
||||
source = '# -*- coding: ascii -*-\n\n(\n'
|
||||
try:
|
||||
with open(TESTFN, 'w', encoding='ascii') as testfile:
|
||||
testfile.write(source)
|
||||
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
||||
err = err.decode('utf-8').splitlines()
|
||||
|
||||
self.assertEqual(err[-3], ' (')
|
||||
self.assertEqual(err[-2], ' ^')
|
||||
finally:
|
||||
unlink(TESTFN)
|
||||
err = run_script(source)
|
||||
self.assertEqual(err[-3], ' (')
|
||||
self.assertEqual(err[-2], ' ^')
|
||||
|
||||
def test_non_utf8(self):
|
||||
# Check non utf-8 characters
|
||||
try:
|
||||
with open(TESTFN, 'bw') as testfile:
|
||||
testfile.write(b"\x89")
|
||||
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
||||
err = err.decode('utf-8').splitlines()
|
||||
self.addCleanup(unlink, TESTFN)
|
||||
err = run_script(b"\x89")
|
||||
self.assertIn("SyntaxError: Non-UTF-8 code starting with '\\x89' in file", err[-1])
|
||||
|
||||
self.assertIn("SyntaxError: Non-UTF-8 code starting with '\\x89' in file", err[-1])
|
||||
finally:
|
||||
unlink(TESTFN)
|
||||
def test_string_source(self):
|
||||
def try_compile(source):
|
||||
with self.assertRaises(SyntaxError) as cm:
|
||||
compile(source, '<string>', 'exec')
|
||||
return cm.exception
|
||||
|
||||
exc = try_compile('return "ä"')
|
||||
self.assertEqual(str(exc), "'return' outside function (<string>, line 1)")
|
||||
self.assertIsNone(exc.text)
|
||||
self.assertEqual(exc.offset, 1)
|
||||
self.assertEqual(exc.end_offset, 12)
|
||||
|
||||
exc = try_compile('return "ä"'.encode())
|
||||
self.assertEqual(str(exc), "'return' outside function (<string>, line 1)")
|
||||
self.assertIsNone(exc.text)
|
||||
self.assertEqual(exc.offset, 1)
|
||||
self.assertEqual(exc.end_offset, 12)
|
||||
|
||||
exc = try_compile(BOM_UTF8 + 'return "ä"'.encode())
|
||||
self.assertEqual(str(exc), "'return' outside function (<string>, line 1)")
|
||||
self.assertIsNone(exc.text)
|
||||
self.assertEqual(exc.offset, 1)
|
||||
self.assertEqual(exc.end_offset, 12)
|
||||
|
||||
exc = try_compile('# coding: latin1\nreturn "ä"'.encode('latin1'))
|
||||
self.assertEqual(str(exc), "'return' outside function (<string>, line 2)")
|
||||
self.assertIsNone(exc.text)
|
||||
self.assertEqual(exc.offset, 1)
|
||||
self.assertEqual(exc.end_offset, 12)
|
||||
|
||||
exc = try_compile('return "ä" #' + 'ä'*1000)
|
||||
self.assertEqual(str(exc), "'return' outside function (<string>, line 1)")
|
||||
self.assertIsNone(exc.text)
|
||||
self.assertEqual(exc.offset, 1)
|
||||
self.assertEqual(exc.end_offset, 12)
|
||||
|
||||
exc = try_compile('return "ä" # ' + 'ä'*1000)
|
||||
self.assertEqual(str(exc), "'return' outside function (<string>, line 1)")
|
||||
self.assertIsNone(exc.text)
|
||||
self.assertEqual(exc.offset, 1)
|
||||
self.assertEqual(exc.end_offset, 12)
|
||||
|
||||
def test_file_source(self):
|
||||
self.addCleanup(unlink, TESTFN)
|
||||
err = run_script('return "ä"')
|
||||
self.assertEqual(err[-3:], [
|
||||
' return "ä"',
|
||||
' ^^^^^^^^^^',
|
||||
"SyntaxError: 'return' outside function"])
|
||||
|
||||
err = run_script('return "ä"'.encode())
|
||||
self.assertEqual(err[-3:], [
|
||||
' return "ä"',
|
||||
' ^^^^^^^^^^',
|
||||
"SyntaxError: 'return' outside function"])
|
||||
|
||||
err = run_script(BOM_UTF8 + 'return "ä"'.encode())
|
||||
self.assertEqual(err[-3:], [
|
||||
' return "ä"',
|
||||
' ^^^^^^^^^^',
|
||||
"SyntaxError: 'return' outside function"])
|
||||
|
||||
err = run_script('# coding: latin1\nreturn "ä"'.encode('latin1'))
|
||||
self.assertEqual(err[-3:], [
|
||||
' return "ä"',
|
||||
' ^^^^^^^^^^',
|
||||
"SyntaxError: 'return' outside function"])
|
||||
|
||||
err = run_script('return "ä" #' + 'ä'*1000)
|
||||
self.assertEqual(err[-2:], [
|
||||
' ^^^^^^^^^^^',
|
||||
"SyntaxError: 'return' outside function"])
|
||||
self.assertEqual(err[-3][:100], ' return "ä" #' + 'ä'*84)
|
||||
|
||||
err = run_script('return "ä" # ' + 'ä'*1000)
|
||||
self.assertEqual(err[-2:], [
|
||||
' ^^^^^^^^^^^',
|
||||
"SyntaxError: 'return' outside function"])
|
||||
self.assertEqual(err[-3][:100], ' return "ä" # ' + 'ä'*83)
|
||||
|
||||
def test_attributes_new_constructor(self):
|
||||
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue