mirror of
https://github.com/python/cpython.git
synced 2025-08-03 00:23:06 +00:00
[3.10] bpo-45848: Allow the parser to get error lines from encoded files (GH-29646) (GH-29661)
(cherry picked from commit fdcc46d955
)
Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
This commit is contained in:
parent
bbe3c57c86
commit
904af3de2b
6 changed files with 49 additions and 11 deletions
6
.gitignore
vendored
6
.gitignore
vendored
|
@ -134,3 +134,9 @@ Tools/ssl/win32
|
|||
# Ignore ./python binary on Unix but still look into ./Python/ directory.
|
||||
/python
|
||||
!/Python/
|
||||
|
||||
# Artifacts generated by 3.11 lying around when switching branches:
|
||||
/_bootstrap_python
|
||||
/Programs/_freeze_module
|
||||
/Python/deepfreeze/
|
||||
/Python/frozen_modules/
|
|
@ -185,6 +185,12 @@ Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_Create(
|
|||
Py_ssize_t end,
|
||||
const char *reason /* UTF-8 encoded string */
|
||||
);
|
||||
|
||||
PyAPI_FUNC(PyObject *) _PyErr_ProgramDecodedTextObject(
|
||||
PyObject *filename,
|
||||
int lineno,
|
||||
const char* encoding);
|
||||
|
||||
PyAPI_FUNC(PyObject *) _PyUnicodeTranslateError_Create(
|
||||
PyObject *object,
|
||||
Py_ssize_t start,
|
||||
|
|
|
@ -2352,6 +2352,19 @@ class SyntaxErrorTests(unittest.TestCase):
|
|||
finally:
|
||||
unlink(TESTFN)
|
||||
|
||||
# Check backwards tokenizer errors
|
||||
source = '# -*- coding: ascii -*-\n\n(\n'
|
||||
try:
|
||||
with open(TESTFN, 'w', encoding='ascii') as testfile:
|
||||
testfile.write(source)
|
||||
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
|
||||
err = err.decode('utf-8').splitlines()
|
||||
|
||||
self.assertEqual(err[-3], ' (')
|
||||
self.assertEqual(err[-2], ' ^')
|
||||
finally:
|
||||
unlink(TESTFN)
|
||||
|
||||
def test_attributes_new_constructor(self):
|
||||
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
|
||||
the_exception = SyntaxError("bad bad", args)
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Allow the parser to obtain error lines directly from encoded files. Patch by
|
||||
Pablo Galindo
|
|
@ -480,14 +480,12 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
|||
goto error;
|
||||
}
|
||||
|
||||
// PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file
|
||||
// with an arbitrary encoding or otherwise we could get some badly decoded text.
|
||||
int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0);
|
||||
if (p->tok->fp_interactive) {
|
||||
error_line = get_error_line(p, lineno);
|
||||
}
|
||||
else if (uses_utf8_codec && p->start_rule == Py_file_input) {
|
||||
error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
|
||||
else if (p->start_rule == Py_file_input) {
|
||||
error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
|
||||
(int) lineno, p->tok->encoding);
|
||||
}
|
||||
|
||||
if (!error_line) {
|
||||
|
@ -498,15 +496,18 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
|||
we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
|
||||
`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
|
||||
does not physically exist */
|
||||
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
|
||||
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
|
||||
|
||||
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
|
||||
Py_ssize_t size = p->tok->inp - p->tok->buf;
|
||||
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
|
||||
}
|
||||
else {
|
||||
else if (p->tok->fp == NULL || p->tok->fp == stdin) {
|
||||
error_line = get_error_line(p, lineno);
|
||||
}
|
||||
else {
|
||||
error_line = PyUnicode_FromStringAndSize("", 0);
|
||||
}
|
||||
if (!error_line) {
|
||||
goto error;
|
||||
}
|
||||
|
|
|
@ -1724,7 +1724,7 @@ PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset)
|
|||
functionality in tb_displayline() in traceback.c. */
|
||||
|
||||
static PyObject *
|
||||
err_programtext(PyThreadState *tstate, FILE *fp, int lineno)
|
||||
err_programtext(PyThreadState *tstate, FILE *fp, int lineno, const char* encoding)
|
||||
{
|
||||
int i;
|
||||
char linebuf[1000];
|
||||
|
@ -1752,7 +1752,11 @@ after_loop:
|
|||
fclose(fp);
|
||||
if (i == lineno) {
|
||||
PyObject *res;
|
||||
if (encoding != NULL) {
|
||||
res = PyUnicode_Decode(linebuf, strlen(linebuf), encoding, "replace");
|
||||
} else {
|
||||
res = PyUnicode_FromString(linebuf);
|
||||
}
|
||||
if (res == NULL)
|
||||
_PyErr_Clear(tstate);
|
||||
return res;
|
||||
|
@ -1778,7 +1782,7 @@ PyErr_ProgramText(const char *filename, int lineno)
|
|||
}
|
||||
|
||||
PyObject *
|
||||
PyErr_ProgramTextObject(PyObject *filename, int lineno)
|
||||
_PyErr_ProgramDecodedTextObject(PyObject *filename, int lineno, const char* encoding)
|
||||
{
|
||||
if (filename == NULL || lineno <= 0) {
|
||||
return NULL;
|
||||
|
@ -1790,7 +1794,13 @@ PyErr_ProgramTextObject(PyObject *filename, int lineno)
|
|||
_PyErr_Clear(tstate);
|
||||
return NULL;
|
||||
}
|
||||
return err_programtext(tstate, fp, lineno);
|
||||
return err_programtext(tstate, fp, lineno, encoding);
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyErr_ProgramTextObject(PyObject *filename, int lineno)
|
||||
{
|
||||
return _PyErr_ProgramDecodedTextObject(filename, lineno, NULL);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue