mirror of
https://github.com/python/cpython.git
synced 2025-08-10 11:58:39 +00:00
[3.12] gh-107450: Check for overflow in the tokenizer and fix overflow test (GH-110832) (#110931)
(cherry picked from commit a1ac5590e0
)
Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
Co-authored-by: Filipe Laíns <lains@riseup.net>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
b8e5b1b28a
commit
3b87e520fc
4 changed files with 40 additions and 29 deletions
|
@ -4,7 +4,6 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* Error codes passed around between file input, tokenizer, parser and
|
/* Error codes passed around between file input, tokenizer, parser and
|
||||||
interpreter. This is necessary so we can turn them into Python
|
interpreter. This is necessary so we can turn them into Python
|
||||||
exceptions at a higher level. Note that some errors have a
|
exceptions at a higher level. Note that some errors have a
|
||||||
|
@ -13,24 +12,25 @@ extern "C" {
|
||||||
the parser only returns E_EOF when it hits EOF immediately, and it
|
the parser only returns E_EOF when it hits EOF immediately, and it
|
||||||
never returns E_OK. */
|
never returns E_OK. */
|
||||||
|
|
||||||
#define E_OK 10 /* No error */
|
#define E_OK 10 /* No error */
|
||||||
#define E_EOF 11 /* End Of File */
|
#define E_EOF 11 /* End Of File */
|
||||||
#define E_INTR 12 /* Interrupted */
|
#define E_INTR 12 /* Interrupted */
|
||||||
#define E_TOKEN 13 /* Bad token */
|
#define E_TOKEN 13 /* Bad token */
|
||||||
#define E_SYNTAX 14 /* Syntax error */
|
#define E_SYNTAX 14 /* Syntax error */
|
||||||
#define E_NOMEM 15 /* Ran out of memory */
|
#define E_NOMEM 15 /* Ran out of memory */
|
||||||
#define E_DONE 16 /* Parsing complete */
|
#define E_DONE 16 /* Parsing complete */
|
||||||
#define E_ERROR 17 /* Execution error */
|
#define E_ERROR 17 /* Execution error */
|
||||||
#define E_TABSPACE 18 /* Inconsistent mixing of tabs and spaces */
|
#define E_TABSPACE 18 /* Inconsistent mixing of tabs and spaces */
|
||||||
#define E_OVERFLOW 19 /* Node had too many children */
|
#define E_OVERFLOW 19 /* Node had too many children */
|
||||||
#define E_TOODEEP 20 /* Too many indentation levels */
|
#define E_TOODEEP 20 /* Too many indentation levels */
|
||||||
#define E_DEDENT 21 /* No matching outer block for dedent */
|
#define E_DEDENT 21 /* No matching outer block for dedent */
|
||||||
#define E_DECODE 22 /* Error in decoding into Unicode */
|
#define E_DECODE 22 /* Error in decoding into Unicode */
|
||||||
#define E_EOFS 23 /* EOF in triple-quoted string */
|
#define E_EOFS 23 /* EOF in triple-quoted string */
|
||||||
#define E_EOLS 24 /* EOL in single-quoted string */
|
#define E_EOLS 24 /* EOL in single-quoted string */
|
||||||
#define E_LINECONT 25 /* Unexpected characters after a line continuation */
|
#define E_LINECONT 25 /* Unexpected characters after a line continuation */
|
||||||
#define E_BADSINGLE 27 /* Ill-formed single statement input */
|
#define E_BADSINGLE 27 /* Ill-formed single statement input */
|
||||||
#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */
|
#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */
|
||||||
|
#define E_COLUMNOVERFLOW 29 /* Column offset overflow */
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,12 @@ from test.support.os_helper import TESTFN, unlink
|
||||||
from test.support.warnings_helper import check_warnings
|
from test.support.warnings_helper import check_warnings
|
||||||
from test import support
|
from test import support
|
||||||
|
|
||||||
|
try:
|
||||||
|
from _testcapi import INT_MAX
|
||||||
|
except ImportError:
|
||||||
|
INT_MAX = 2**31 - 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class NaiveException(Exception):
|
class NaiveException(Exception):
|
||||||
def __init__(self, x):
|
def __init__(self, x):
|
||||||
|
@ -318,11 +324,13 @@ class ExceptionTests(unittest.TestCase):
|
||||||
check('(yield i) = 2', 1, 2)
|
check('(yield i) = 2', 1, 2)
|
||||||
check('def f(*):\n pass', 1, 7)
|
check('def f(*):\n pass', 1, 7)
|
||||||
|
|
||||||
|
@unittest.skipIf(INT_MAX >= sys.maxsize, "Downcasting to int is safe for col_offset")
|
||||||
@support.requires_resource('cpu')
|
@support.requires_resource('cpu')
|
||||||
@support.bigmemtest(support._2G, memuse=1.5)
|
@support.bigmemtest(INT_MAX, memuse=2, dry_run=False)
|
||||||
def testMemoryErrorBigSource(self, _size):
|
def testMemoryErrorBigSource(self, size):
|
||||||
with self.assertRaises(OverflowError):
|
src = b"if True:\n%*s" % (size, b"pass")
|
||||||
exec(f"if True:\n {' ' * 2**31}print('hello world')")
|
with self.assertRaisesRegex(OverflowError, "Parser column offset overflow"):
|
||||||
|
compile(src, '<fragment>', 'exec')
|
||||||
|
|
||||||
@cpython_only
|
@cpython_only
|
||||||
def testSettingException(self):
|
def testSettingException(self):
|
||||||
|
|
|
@ -66,6 +66,7 @@ _Pypegen_tokenizer_error(Parser *p)
|
||||||
const char *msg = NULL;
|
const char *msg = NULL;
|
||||||
PyObject* errtype = PyExc_SyntaxError;
|
PyObject* errtype = PyExc_SyntaxError;
|
||||||
Py_ssize_t col_offset = -1;
|
Py_ssize_t col_offset = -1;
|
||||||
|
p->error_indicator = 1;
|
||||||
switch (p->tok->done) {
|
switch (p->tok->done) {
|
||||||
case E_TOKEN:
|
case E_TOKEN:
|
||||||
msg = "invalid token";
|
msg = "invalid token";
|
||||||
|
@ -101,6 +102,10 @@ _Pypegen_tokenizer_error(Parser *p)
|
||||||
msg = "unexpected character after line continuation character";
|
msg = "unexpected character after line continuation character";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case E_COLUMNOVERFLOW:
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"Parser column offset overflow - source line is too big");
|
||||||
|
return -1;
|
||||||
default:
|
default:
|
||||||
msg = "unknown parsing error";
|
msg = "unknown parsing error";
|
||||||
}
|
}
|
||||||
|
@ -233,12 +238,6 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, int use_mark, const char *err
|
||||||
col_offset = 0;
|
col_offset = 0;
|
||||||
} else {
|
} else {
|
||||||
const char* start = p->tok->buf ? p->tok->line_start : p->tok->buf;
|
const char* start = p->tok->buf ? p->tok->line_start : p->tok->buf;
|
||||||
if (p->tok->cur - start > INT_MAX) {
|
|
||||||
PyErr_SetString(PyExc_OverflowError,
|
|
||||||
"Parser column offset overflow - source line is too big");
|
|
||||||
p->error_indicator = 1;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
|
col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -1366,6 +1366,10 @@ tok_nextc(struct tok_state *tok)
|
||||||
int rc;
|
int rc;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (tok->cur != tok->inp) {
|
if (tok->cur != tok->inp) {
|
||||||
|
if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
|
||||||
|
tok->done = E_COLUMNOVERFLOW;
|
||||||
|
return EOF;
|
||||||
|
}
|
||||||
tok->col_offset++;
|
tok->col_offset++;
|
||||||
return Py_CHARMASK(*tok->cur++); /* Fast path */
|
return Py_CHARMASK(*tok->cur++); /* Fast path */
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue