mirror of
https://github.com/python/cpython.git
synced 2025-07-24 11:44:31 +00:00
[3.12] gh-116042: Fix location for SyntaxErrors of invalid escapes in the tokenizer (GH-116049) (#130065)
(cherry picked from commit 56eda25633
)
This commit is contained in:
parent
719d08cccf
commit
5e8a9eb13d
5 changed files with 82 additions and 18 deletions
|
@ -9,7 +9,7 @@
|
|||
//// STRING HANDLING FUNCTIONS ////
|
||||
|
||||
static int
|
||||
warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
|
||||
warn_invalid_escape_sequence(Parser *p, const char* buffer, const char *first_invalid_escape, Token *t)
|
||||
{
|
||||
if (p->call_invalid_rules) {
|
||||
// Do not report warnings if we are in the second pass of the parser
|
||||
|
@ -38,8 +38,46 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
|
|||
else {
|
||||
category = PyExc_DeprecationWarning;
|
||||
}
|
||||
|
||||
// Calculate the lineno and the col_offset of the invalid escape sequence
|
||||
const char *start = buffer;
|
||||
const char *end = first_invalid_escape;
|
||||
int lineno = t->lineno;
|
||||
int col_offset = t->col_offset;
|
||||
while (start < end) {
|
||||
if (*start == '\n') {
|
||||
lineno++;
|
||||
col_offset = 0;
|
||||
}
|
||||
else {
|
||||
col_offset++;
|
||||
}
|
||||
start++;
|
||||
}
|
||||
|
||||
// Count the number of quotes in the token
|
||||
char first_quote = 0;
|
||||
if (lineno == t->lineno) {
|
||||
int quote_count = 0;
|
||||
char* tok = PyBytes_AsString(t->bytes);
|
||||
for (int i = 0; i < PyBytes_Size(t->bytes); i++) {
|
||||
if (tok[i] == '\'' || tok[i] == '\"') {
|
||||
if (quote_count == 0) {
|
||||
first_quote = tok[i];
|
||||
}
|
||||
if (tok[i] == first_quote) {
|
||||
quote_count++;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
col_offset += quote_count;
|
||||
}
|
||||
|
||||
if (PyErr_WarnExplicitObject(category, msg, p->tok->filename,
|
||||
t->lineno, NULL, NULL) < 0) {
|
||||
lineno, NULL, NULL) < 0) {
|
||||
if (PyErr_ExceptionMatches(category)) {
|
||||
/* Replace the Syntax/DeprecationWarning exception with a SyntaxError
|
||||
to get a more accurate error report */
|
||||
|
@ -50,11 +88,12 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
|
|||
error location, if p->known_err_token is not set. */
|
||||
p->known_err_token = t;
|
||||
if (octal) {
|
||||
RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
|
||||
first_invalid_escape);
|
||||
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1,
|
||||
"invalid octal escape sequence '\\%.3s'", first_invalid_escape);
|
||||
}
|
||||
else {
|
||||
RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
|
||||
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1,
|
||||
"invalid escape sequence '\\%c'", c);
|
||||
}
|
||||
}
|
||||
Py_DECREF(msg);
|
||||
|
@ -148,7 +187,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
|
|||
// HACK: later we can simply pass the line no, since we don't preserve the tokens
|
||||
// when we are decoding the string but we preserve the line numbers.
|
||||
if (v != NULL && first_invalid_escape != NULL && t != NULL) {
|
||||
if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
|
||||
if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) {
|
||||
/* We have not decref u before because first_invalid_escape points
|
||||
inside u. */
|
||||
Py_XDECREF(u);
|
||||
|
@ -170,7 +209,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
|
|||
}
|
||||
|
||||
if (first_invalid_escape != NULL) {
|
||||
if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
|
||||
if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) {
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue