mirror of
https://github.com/python/cpython.git
synced 2025-07-19 17:25:54 +00:00
[3.13] gh-116042: Fix location for SyntaxErrors of invalid escapes in the tokenizer (GH-116049) (#130066)
(cherry picked from commit56eda25633
) (cherry picked from commit369704b428
)
This commit is contained in:
parent
4c2a59b7b8
commit
8d1d36b742
5 changed files with 82 additions and 18 deletions
|
@ -660,7 +660,7 @@ class CmdLineTest(unittest.TestCase):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
stderr.splitlines()[-3:],
|
stderr.splitlines()[-3:],
|
||||||
[ b' foo = """\\q"""',
|
[ b' foo = """\\q"""',
|
||||||
b' ^^^^^^^^',
|
b' ^^',
|
||||||
b'SyntaxError: invalid escape sequence \'\\q\''
|
b'SyntaxError: invalid escape sequence \'\\q\''
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
@ -118,7 +118,7 @@ class TestLiterals(unittest.TestCase):
|
||||||
self.assertEqual(len(w), 1)
|
self.assertEqual(len(w), 1)
|
||||||
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
|
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
|
||||||
self.assertEqual(w[0].filename, '<string>')
|
self.assertEqual(w[0].filename, '<string>')
|
||||||
self.assertEqual(w[0].lineno, 1)
|
self.assertEqual(w[0].lineno, 2)
|
||||||
|
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
warnings.simplefilter('error', category=SyntaxWarning)
|
warnings.simplefilter('error', category=SyntaxWarning)
|
||||||
|
@ -128,7 +128,7 @@ class TestLiterals(unittest.TestCase):
|
||||||
self.assertEqual(w, [])
|
self.assertEqual(w, [])
|
||||||
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
|
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
|
||||||
self.assertEqual(exc.filename, '<string>')
|
self.assertEqual(exc.filename, '<string>')
|
||||||
self.assertEqual(exc.lineno, 1)
|
self.assertEqual(exc.lineno, 2)
|
||||||
self.assertEqual(exc.offset, 1)
|
self.assertEqual(exc.offset, 1)
|
||||||
|
|
||||||
# Check that the warning is raised only once if there are syntax errors
|
# Check that the warning is raised only once if there are syntax errors
|
||||||
|
@ -155,7 +155,7 @@ class TestLiterals(unittest.TestCase):
|
||||||
self.assertEqual(str(w[0].message),
|
self.assertEqual(str(w[0].message),
|
||||||
r"invalid octal escape sequence '\407'")
|
r"invalid octal escape sequence '\407'")
|
||||||
self.assertEqual(w[0].filename, '<string>')
|
self.assertEqual(w[0].filename, '<string>')
|
||||||
self.assertEqual(w[0].lineno, 1)
|
self.assertEqual(w[0].lineno, 2)
|
||||||
|
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
warnings.simplefilter('error', category=SyntaxWarning)
|
warnings.simplefilter('error', category=SyntaxWarning)
|
||||||
|
@ -165,9 +165,32 @@ class TestLiterals(unittest.TestCase):
|
||||||
self.assertEqual(w, [])
|
self.assertEqual(w, [])
|
||||||
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
|
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
|
||||||
self.assertEqual(exc.filename, '<string>')
|
self.assertEqual(exc.filename, '<string>')
|
||||||
self.assertEqual(exc.lineno, 1)
|
self.assertEqual(exc.lineno, 2)
|
||||||
self.assertEqual(exc.offset, 1)
|
self.assertEqual(exc.offset, 1)
|
||||||
|
|
||||||
|
def test_invalid_escape_locations_with_offset(self):
|
||||||
|
with warnings.catch_warnings(record=True) as w:
|
||||||
|
warnings.simplefilter('error', category=SyntaxWarning)
|
||||||
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
|
eval("\"'''''''''''''''''''''invalid\ Escape\"")
|
||||||
|
exc = cm.exception
|
||||||
|
self.assertEqual(w, [])
|
||||||
|
self.assertEqual(exc.msg, r"invalid escape sequence '\ '")
|
||||||
|
self.assertEqual(exc.filename, '<string>')
|
||||||
|
self.assertEqual(exc.lineno, 1)
|
||||||
|
self.assertEqual(exc.offset, 30)
|
||||||
|
|
||||||
|
with warnings.catch_warnings(record=True) as w:
|
||||||
|
warnings.simplefilter('error', category=SyntaxWarning)
|
||||||
|
with self.assertRaises(SyntaxError) as cm:
|
||||||
|
eval("\"''Incorrect \ logic?\"")
|
||||||
|
exc = cm.exception
|
||||||
|
self.assertEqual(w, [])
|
||||||
|
self.assertEqual(exc.msg, r"invalid escape sequence '\ '")
|
||||||
|
self.assertEqual(exc.filename, '<string>')
|
||||||
|
self.assertEqual(exc.lineno, 1)
|
||||||
|
self.assertEqual(exc.offset, 14)
|
||||||
|
|
||||||
def test_eval_str_raw(self):
|
def test_eval_str_raw(self):
|
||||||
self.assertEqual(eval(""" r'x' """), 'x')
|
self.assertEqual(eval(""" r'x' """), 'x')
|
||||||
self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
|
self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
|
||||||
|
@ -207,7 +230,7 @@ class TestLiterals(unittest.TestCase):
|
||||||
self.assertEqual(len(w), 1)
|
self.assertEqual(len(w), 1)
|
||||||
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
|
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
|
||||||
self.assertEqual(w[0].filename, '<string>')
|
self.assertEqual(w[0].filename, '<string>')
|
||||||
self.assertEqual(w[0].lineno, 1)
|
self.assertEqual(w[0].lineno, 2)
|
||||||
|
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
warnings.simplefilter('error', category=SyntaxWarning)
|
warnings.simplefilter('error', category=SyntaxWarning)
|
||||||
|
@ -217,7 +240,7 @@ class TestLiterals(unittest.TestCase):
|
||||||
self.assertEqual(w, [])
|
self.assertEqual(w, [])
|
||||||
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
|
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
|
||||||
self.assertEqual(exc.filename, '<string>')
|
self.assertEqual(exc.filename, '<string>')
|
||||||
self.assertEqual(exc.lineno, 1)
|
self.assertEqual(exc.lineno, 2)
|
||||||
|
|
||||||
def test_eval_bytes_invalid_octal_escape(self):
|
def test_eval_bytes_invalid_octal_escape(self):
|
||||||
for i in range(0o400, 0o1000):
|
for i in range(0o400, 0o1000):
|
||||||
|
@ -231,7 +254,7 @@ class TestLiterals(unittest.TestCase):
|
||||||
self.assertEqual(str(w[0].message),
|
self.assertEqual(str(w[0].message),
|
||||||
r"invalid octal escape sequence '\407'")
|
r"invalid octal escape sequence '\407'")
|
||||||
self.assertEqual(w[0].filename, '<string>')
|
self.assertEqual(w[0].filename, '<string>')
|
||||||
self.assertEqual(w[0].lineno, 1)
|
self.assertEqual(w[0].lineno, 2)
|
||||||
|
|
||||||
with warnings.catch_warnings(record=True) as w:
|
with warnings.catch_warnings(record=True) as w:
|
||||||
warnings.simplefilter('error', category=SyntaxWarning)
|
warnings.simplefilter('error', category=SyntaxWarning)
|
||||||
|
@ -241,7 +264,7 @@ class TestLiterals(unittest.TestCase):
|
||||||
self.assertEqual(w, [])
|
self.assertEqual(w, [])
|
||||||
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
|
self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'")
|
||||||
self.assertEqual(exc.filename, '<string>')
|
self.assertEqual(exc.filename, '<string>')
|
||||||
self.assertEqual(exc.lineno, 1)
|
self.assertEqual(exc.lineno, 2)
|
||||||
|
|
||||||
def test_eval_bytes_raw(self):
|
def test_eval_bytes_raw(self):
|
||||||
self.assertEqual(eval(""" br'x' """), b'x')
|
self.assertEqual(eval(""" br'x' """), b'x')
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fix location for SyntaxErrors of invalid escapes in the tokenizer. Patch by
|
||||||
|
Pablo Galindo
|
|
@ -352,8 +352,8 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
||||||
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
|
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
|
||||||
|
|
||||||
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
|
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
|
||||||
Py_ssize_t size = p->tok->inp - p->tok->buf;
|
Py_ssize_t size = p->tok->inp - p->tok->line_start;
|
||||||
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
|
error_line = PyUnicode_DecodeUTF8(p->tok->line_start, size, "replace");
|
||||||
}
|
}
|
||||||
else if (p->tok->fp == NULL || p->tok->fp == stdin) {
|
else if (p->tok->fp == NULL || p->tok->fp == stdin) {
|
||||||
error_line = get_error_line_from_tokenizer_buffers(p, lineno);
|
error_line = get_error_line_from_tokenizer_buffers(p, lineno);
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
//// STRING HANDLING FUNCTIONS ////
|
//// STRING HANDLING FUNCTIONS ////
|
||||||
|
|
||||||
static int
|
static int
|
||||||
warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
|
warn_invalid_escape_sequence(Parser *p, const char* buffer, const char *first_invalid_escape, Token *t)
|
||||||
{
|
{
|
||||||
if (p->call_invalid_rules) {
|
if (p->call_invalid_rules) {
|
||||||
// Do not report warnings if we are in the second pass of the parser
|
// Do not report warnings if we are in the second pass of the parser
|
||||||
|
@ -41,8 +41,46 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
|
||||||
else {
|
else {
|
||||||
category = PyExc_DeprecationWarning;
|
category = PyExc_DeprecationWarning;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate the lineno and the col_offset of the invalid escape sequence
|
||||||
|
const char *start = buffer;
|
||||||
|
const char *end = first_invalid_escape;
|
||||||
|
int lineno = t->lineno;
|
||||||
|
int col_offset = t->col_offset;
|
||||||
|
while (start < end) {
|
||||||
|
if (*start == '\n') {
|
||||||
|
lineno++;
|
||||||
|
col_offset = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
col_offset++;
|
||||||
|
}
|
||||||
|
start++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count the number of quotes in the token
|
||||||
|
char first_quote = 0;
|
||||||
|
if (lineno == t->lineno) {
|
||||||
|
int quote_count = 0;
|
||||||
|
char* tok = PyBytes_AsString(t->bytes);
|
||||||
|
for (int i = 0; i < PyBytes_Size(t->bytes); i++) {
|
||||||
|
if (tok[i] == '\'' || tok[i] == '\"') {
|
||||||
|
if (quote_count == 0) {
|
||||||
|
first_quote = tok[i];
|
||||||
|
}
|
||||||
|
if (tok[i] == first_quote) {
|
||||||
|
quote_count++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
col_offset += quote_count;
|
||||||
|
}
|
||||||
|
|
||||||
if (PyErr_WarnExplicitObject(category, msg, p->tok->filename,
|
if (PyErr_WarnExplicitObject(category, msg, p->tok->filename,
|
||||||
t->lineno, NULL, NULL) < 0) {
|
lineno, NULL, NULL) < 0) {
|
||||||
if (PyErr_ExceptionMatches(category)) {
|
if (PyErr_ExceptionMatches(category)) {
|
||||||
/* Replace the Syntax/DeprecationWarning exception with a SyntaxError
|
/* Replace the Syntax/DeprecationWarning exception with a SyntaxError
|
||||||
to get a more accurate error report */
|
to get a more accurate error report */
|
||||||
|
@ -53,11 +91,12 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
|
||||||
error location, if p->known_err_token is not set. */
|
error location, if p->known_err_token is not set. */
|
||||||
p->known_err_token = t;
|
p->known_err_token = t;
|
||||||
if (octal) {
|
if (octal) {
|
||||||
RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
|
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1,
|
||||||
first_invalid_escape);
|
"invalid octal escape sequence '\\%.3s'", first_invalid_escape);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
|
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1,
|
||||||
|
"invalid escape sequence '\\%c'", c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Py_DECREF(msg);
|
Py_DECREF(msg);
|
||||||
|
@ -151,7 +190,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
|
||||||
// HACK: later we can simply pass the line no, since we don't preserve the tokens
|
// HACK: later we can simply pass the line no, since we don't preserve the tokens
|
||||||
// when we are decoding the string but we preserve the line numbers.
|
// when we are decoding the string but we preserve the line numbers.
|
||||||
if (v != NULL && first_invalid_escape != NULL && t != NULL) {
|
if (v != NULL && first_invalid_escape != NULL && t != NULL) {
|
||||||
if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
|
if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) {
|
||||||
/* We have not decref u before because first_invalid_escape points
|
/* We have not decref u before because first_invalid_escape points
|
||||||
inside u. */
|
inside u. */
|
||||||
Py_XDECREF(u);
|
Py_XDECREF(u);
|
||||||
|
@ -173,7 +212,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (first_invalid_escape != NULL) {
|
if (first_invalid_escape != NULL) {
|
||||||
if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
|
if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) {
|
||||||
Py_DECREF(result);
|
Py_DECREF(result);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue