bpo-45848: Allow the parser to get error lines from encoded files (GH-29646)

This commit is contained in:
Pablo Galindo Salgado 2021-11-20 14:36:07 +00:00 committed by GitHub
parent 6d430ef5ab
commit fdcc46d955
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 42 additions and 11 deletions

View file

@ -482,14 +482,12 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
goto error;
}
// PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file
// with an arbitrary encoding or otherwise we could get some badly decoded text.
int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0);
if (p->tok->fp_interactive) {
error_line = get_error_line(p, lineno);
}
else if (uses_utf8_codec && p->start_rule == Py_file_input) {
error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
else if (p->start_rule == Py_file_input) {
error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
(int) lineno, p->tok->encoding);
}
if (!error_line) {
@ -500,15 +498,18 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
does not physically exist */
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
Py_ssize_t size = p->tok->inp - p->tok->buf;
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
}
else {
else if (p->tok->fp == NULL || p->tok->fp == stdin) {
error_line = get_error_line(p, lineno);
}
else {
error_line = PyUnicode_FromStringAndSize("", 0);
}
if (!error_line) {
goto error;
}