bpo-45494: Fix parser crash when reporting errors involving invalid continuation characters (GH-28993)

There are two errors that this commit fixes:

* The parser was not correctly computing the offset and the string
  source for E_LINECONT errors due to the incorrect usage of strtok().
* The parser was not correctly unwinding the call stack when a tokenizer
  exception happened in rules involving optionals ('?', [...]) as we
  always make them return valid results by using the comma operator. We
  need to check first if we don't have an error before continuing.
This commit is contained in:
Pablo Galindo Salgado 2021-10-19 20:24:12 +01:00 committed by GitHub
parent bda69abe84
commit a106343f63
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 137 additions and 123 deletions

View file

@ -350,10 +350,18 @@ tokenizer_error(Parser *p)
errtype = PyExc_IndentationError;
msg = "too many levels of indentation";
break;
case E_LINECONT:
col_offset = strlen(strtok(p->tok->buf, "\n")) - 1;
case E_LINECONT: {
char* loc = strrchr(p->tok->buf, '\n');
const char* last_char = p->tok->cur - 1;
if (loc != NULL && loc != last_char) {
col_offset = p->tok->cur - loc - 1;
p->tok->buf = loc;
} else {
col_offset = last_char - p->tok->buf - 1;
}
msg = "unexpected character after line continuation character";
break;
}
default:
msg = "unknown parsing error";
}