[3.12] gh-107450: Check for overflow in the tokenizer and fix overflow test (GH-110832) (#110931)

(cherry picked from commit a1ac5590e0)

Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
Co-authored-by: Filipe Laíns <lains@riseup.net>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Lysandros Nikolaou 2023-10-16 18:59:18 +02:00 committed by GitHub
parent b8e5b1b28a
commit 3b87e520fc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 40 additions and 29 deletions

View file

@ -66,6 +66,7 @@ _Pypegen_tokenizer_error(Parser *p)
const char *msg = NULL;
PyObject* errtype = PyExc_SyntaxError;
Py_ssize_t col_offset = -1;
p->error_indicator = 1;
switch (p->tok->done) {
case E_TOKEN:
msg = "invalid token";
@ -101,6 +102,10 @@ _Pypegen_tokenizer_error(Parser *p)
msg = "unexpected character after line continuation character";
break;
}
case E_COLUMNOVERFLOW:
PyErr_SetString(PyExc_OverflowError,
"Parser column offset overflow - source line is too big");
return -1;
default:
msg = "unknown parsing error";
}
@ -233,12 +238,6 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, int use_mark, const char *err
col_offset = 0;
} else {
const char* start = p->tok->buf ? p->tok->line_start : p->tok->buf;
if (p->tok->cur - start > INT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"Parser column offset overflow - source line is too big");
p->error_indicator = 1;
return NULL;
}
col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
}
} else {