gh-107450: Check for overflow in the tokenizer and fix overflow test (#110832)

Co-authored-by: Filipe Laíns <lains@riseup.net>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Lysandros Nikolaou 2023-10-16 16:42:49 +02:00 committed by GitHub
parent b3c9faf056
commit a1ac5590e0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 40 additions and 22 deletions

View file

@ -59,6 +59,10 @@ tok_nextc(struct tok_state *tok)
int rc;
for (;;) {
if (tok->cur != tok->inp) {
if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
tok->done = E_COLUMNOVERFLOW;
return EOF;
}
tok->col_offset++;
return Py_CHARMASK(*tok->cur++); /* Fast path */
}

View file

@ -68,6 +68,7 @@ _Pypegen_tokenizer_error(Parser *p)
const char *msg = NULL;
PyObject* errtype = PyExc_SyntaxError;
Py_ssize_t col_offset = -1;
p->error_indicator = 1;
switch (p->tok->done) {
case E_TOKEN:
msg = "invalid token";
@ -103,6 +104,10 @@ _Pypegen_tokenizer_error(Parser *p)
msg = "unexpected character after line continuation character";
break;
}
case E_COLUMNOVERFLOW:
PyErr_SetString(PyExc_OverflowError,
"Parser column offset overflow - source line is too big");
return -1;
default:
msg = "unknown parsing error";
}