mirror of
https://github.com/python/cpython.git
synced 2025-07-19 09:15:34 +00:00
gh-96670: Raise SyntaxError when parsing NULL bytes (#97594)
This commit is contained in:
parent
dd53b79de0
commit
aab01e3524
10 changed files with 65 additions and 21 deletions
|
@ -86,6 +86,12 @@ Other Language Changes
|
||||||
* :class:`memoryview` now supports the half-float type (the "e" format code).
|
* :class:`memoryview` now supports the half-float type (the "e" format code).
|
||||||
(Contributed by Dong-hee Na and Antoine Pitrou in :gh:`90751`.)
|
(Contributed by Dong-hee Na and Antoine Pitrou in :gh:`90751`.)
|
||||||
|
|
||||||
|
* The parser now raises :exc:`SyntaxError` when parsing source code containing
|
||||||
|
null bytes. (Contributed by Pablo Galindo in :gh:`96670`.)
|
||||||
|
|
||||||
|
* :func:`ast.parse` now raises :exc:`SyntaxError` instead of :exc:`ValueError`
|
||||||
|
when parsing source code containing null bytes. (Contributed by Pablo Galindo
|
||||||
|
in :gh:`96670`.)
|
||||||
|
|
||||||
New Modules
|
New Modules
|
||||||
===========
|
===========
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
PyAPI_FUNC(char *) Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *);
|
PyAPI_FUNC(char *) Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *);
|
||||||
|
PyAPI_FUNC(char *) _Py_UniversalNewlineFgetsWithSize(char *, int, FILE*, PyObject *, size_t*);
|
||||||
|
|
||||||
/* The std printer acts as a preliminary sys.stderr until the new io
|
/* The std printer acts as a preliminary sys.stderr until the new io
|
||||||
infrastructure is in place. */
|
infrastructure is in place. */
|
||||||
|
|
|
@ -844,6 +844,10 @@ class AST_Tests(unittest.TestCase):
|
||||||
check_limit("a", "[0]")
|
check_limit("a", "[0]")
|
||||||
check_limit("a", "*a")
|
check_limit("a", "*a")
|
||||||
|
|
||||||
|
def test_null_bytes(self):
|
||||||
|
with self.assertRaises(SyntaxError,
|
||||||
|
msg="source code string cannot contain null bytes"):
|
||||||
|
ast.parse("a\0b")
|
||||||
|
|
||||||
class ASTHelpers_Test(unittest.TestCase):
|
class ASTHelpers_Test(unittest.TestCase):
|
||||||
maxDiff = None
|
maxDiff = None
|
||||||
|
|
|
@ -334,11 +334,10 @@ class BuiltinTest(unittest.TestCase):
|
||||||
self.assertRaises(TypeError, compile)
|
self.assertRaises(TypeError, compile)
|
||||||
self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'badmode')
|
self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'badmode')
|
||||||
self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'single', 0xff)
|
self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'single', 0xff)
|
||||||
self.assertRaises(ValueError, compile, chr(0), 'f', 'exec')
|
|
||||||
self.assertRaises(TypeError, compile, 'pass', '?', 'exec',
|
self.assertRaises(TypeError, compile, 'pass', '?', 'exec',
|
||||||
mode='eval', source='0', filename='tmp')
|
mode='eval', source='0', filename='tmp')
|
||||||
compile('print("\xe5")\n', '', 'exec')
|
compile('print("\xe5")\n', '', 'exec')
|
||||||
self.assertRaises(ValueError, compile, chr(0), 'f', 'exec')
|
self.assertRaises(SyntaxError, compile, chr(0), 'f', 'exec')
|
||||||
self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad')
|
self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad')
|
||||||
|
|
||||||
# test the optimize argument
|
# test the optimize argument
|
||||||
|
|
|
@ -657,6 +657,18 @@ class CmdLineTest(unittest.TestCase):
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_syntaxerror_null_bytes(self):
|
||||||
|
script = "x = '\0' nothing to see here\n';import os;os.system('echo pwnd')\n"
|
||||||
|
with os_helper.temp_dir() as script_dir:
|
||||||
|
script_name = _make_test_script(script_dir, 'script', script)
|
||||||
|
exitcode, stdout, stderr = assert_python_failure(script_name)
|
||||||
|
self.assertEqual(
|
||||||
|
stderr.splitlines()[-2:],
|
||||||
|
[ b" x = '",
|
||||||
|
b'SyntaxError: source code cannot contain null bytes'
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
def test_consistent_sys_path_for_direct_execution(self):
|
def test_consistent_sys_path_for_direct_execution(self):
|
||||||
# This test case ensures that the following all give the same
|
# This test case ensures that the following all give the same
|
||||||
# sys.path configuration:
|
# sys.path configuration:
|
||||||
|
|
|
@ -544,7 +544,7 @@ if 1:
|
||||||
with open(fn, "wb") as fp:
|
with open(fn, "wb") as fp:
|
||||||
fp.write(src)
|
fp.write(src)
|
||||||
res = script_helper.run_python_until_end(fn)[0]
|
res = script_helper.run_python_until_end(fn)[0]
|
||||||
self.assertIn(b"Non-UTF-8", res.err)
|
self.assertIn(b"source code cannot contain null bytes", res.err)
|
||||||
|
|
||||||
def test_yet_more_evil_still_undecodable(self):
|
def test_yet_more_evil_still_undecodable(self):
|
||||||
# Issue #25388
|
# Issue #25388
|
||||||
|
@ -554,7 +554,7 @@ if 1:
|
||||||
with open(fn, "wb") as fp:
|
with open(fn, "wb") as fp:
|
||||||
fp.write(src)
|
fp.write(src)
|
||||||
res = script_helper.run_python_until_end(fn)[0]
|
res = script_helper.run_python_until_end(fn)[0]
|
||||||
self.assertIn(b"Non-UTF-8", res.err)
|
self.assertIn(b"source code cannot contain null bytes", res.err)
|
||||||
|
|
||||||
@support.cpython_only
|
@support.cpython_only
|
||||||
@unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI")
|
@unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI")
|
||||||
|
@ -591,9 +591,9 @@ if 1:
|
||||||
def test_null_terminated(self):
|
def test_null_terminated(self):
|
||||||
# The source code is null-terminated internally, but bytes-like
|
# The source code is null-terminated internally, but bytes-like
|
||||||
# objects are accepted, which could be not terminated.
|
# objects are accepted, which could be not terminated.
|
||||||
with self.assertRaisesRegex(ValueError, "cannot contain null"):
|
with self.assertRaisesRegex(SyntaxError, "cannot contain null"):
|
||||||
compile("123\x00", "<dummy>", "eval")
|
compile("123\x00", "<dummy>", "eval")
|
||||||
with self.assertRaisesRegex(ValueError, "cannot contain null"):
|
with self.assertRaisesRegex(SyntaxError, "cannot contain null"):
|
||||||
compile(memoryview(b"123\x00"), "<dummy>", "eval")
|
compile(memoryview(b"123\x00"), "<dummy>", "eval")
|
||||||
code = compile(memoryview(b"123\x00")[1:-1], "<dummy>", "eval")
|
code = compile(memoryview(b"123\x00")[1:-1], "<dummy>", "eval")
|
||||||
self.assertEqual(eval(code), 23)
|
self.assertEqual(eval(code), 23)
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
The parser now raises :exc:`SyntaxError` when parsing source code containing
|
||||||
|
null bytes. Patch by Pablo Galindo
|
|
@ -230,16 +230,8 @@ _PyLong_FileDescriptor_Converter(PyObject *o, void *ptr)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
** Py_UniversalNewlineFgets is an fgets variation that understands
|
|
||||||
** all of \r, \n and \r\n conventions.
|
|
||||||
** The stream should be opened in binary mode.
|
|
||||||
** The fobj parameter exists solely for legacy reasons and must be NULL.
|
|
||||||
** Note that we need no error handling: fgets() treats error and eof
|
|
||||||
** identically.
|
|
||||||
*/
|
|
||||||
char *
|
char *
|
||||||
Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
|
_Py_UniversalNewlineFgetsWithSize(char *buf, int n, FILE *stream, PyObject *fobj, size_t* size)
|
||||||
{
|
{
|
||||||
char *p = buf;
|
char *p = buf;
|
||||||
int c;
|
int c;
|
||||||
|
@ -265,11 +257,28 @@ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
|
||||||
}
|
}
|
||||||
FUNLOCKFILE(stream);
|
FUNLOCKFILE(stream);
|
||||||
*p = '\0';
|
*p = '\0';
|
||||||
if (p == buf)
|
if (p == buf) {
|
||||||
return NULL;
|
return NULL;
|
||||||
|
}
|
||||||
|
*size = p - buf;
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
** Py_UniversalNewlineFgets is an fgets variation that understands
|
||||||
|
** all of \r, \n and \r\n conventions.
|
||||||
|
** The stream should be opened in binary mode.
|
||||||
|
** The fobj parameter exists solely for legacy reasons and must be NULL.
|
||||||
|
** Note that we need no error handling: fgets() treats error and eof
|
||||||
|
** identically.
|
||||||
|
*/
|
||||||
|
|
||||||
|
char *
|
||||||
|
Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) {
|
||||||
|
size_t size;
|
||||||
|
return _Py_UniversalNewlineFgetsWithSize(buf, n, stream, fobj, &size);
|
||||||
|
}
|
||||||
|
|
||||||
/* **************************** std printer ****************************
|
/* **************************** std printer ****************************
|
||||||
* The stdprinter is used during the boot strapping phase as a preliminary
|
* The stdprinter is used during the boot strapping phase as a preliminary
|
||||||
* file like object for sys.stderr.
|
* file like object for sys.stderr.
|
||||||
|
|
|
@ -378,6 +378,11 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
contains_null_bytes(const char* str, size_t size) {
|
||||||
|
return memchr(str, 0, size) != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
tok_readline_recode(struct tok_state *tok) {
|
tok_readline_recode(struct tok_state *tok) {
|
||||||
PyObject *line;
|
PyObject *line;
|
||||||
|
@ -829,9 +834,9 @@ tok_readline_raw(struct tok_state *tok)
|
||||||
if (!tok_reserve_buf(tok, BUFSIZ)) {
|
if (!tok_reserve_buf(tok, BUFSIZ)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
char *line = Py_UniversalNewlineFgets(tok->inp,
|
int n_chars = (int)(tok->end - tok->inp);
|
||||||
(int)(tok->end - tok->inp),
|
size_t line_size = 0;
|
||||||
tok->fp, NULL);
|
char *line = _Py_UniversalNewlineFgetsWithSize(tok->inp, n_chars, tok->fp, NULL, &line_size);
|
||||||
if (line == NULL) {
|
if (line == NULL) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -839,7 +844,7 @@ tok_readline_raw(struct tok_state *tok)
|
||||||
tok_concatenate_interactive_new_line(tok, line) == -1) {
|
tok_concatenate_interactive_new_line(tok, line) == -1) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
tok->inp = strchr(tok->inp, '\0');
|
tok->inp += line_size;
|
||||||
if (tok->inp == tok->buf) {
|
if (tok->inp == tok->buf) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1075,6 +1080,12 @@ tok_nextc(struct tok_state *tok)
|
||||||
return EOF;
|
return EOF;
|
||||||
}
|
}
|
||||||
tok->line_start = tok->cur;
|
tok->line_start = tok->cur;
|
||||||
|
|
||||||
|
if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
|
||||||
|
syntaxerror(tok, "source code cannot contain null bytes");
|
||||||
|
tok->cur = tok->inp;
|
||||||
|
return EOF;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Py_UNREACHABLE();
|
Py_UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1858,7 +1858,7 @@ _Py_SourceAsString(PyObject *cmd, const char *funcname, const char *what, PyComp
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strlen(str) != (size_t)size) {
|
if (strlen(str) != (size_t)size) {
|
||||||
PyErr_SetString(PyExc_ValueError,
|
PyErr_SetString(PyExc_SyntaxError,
|
||||||
"source code string cannot contain null bytes");
|
"source code string cannot contain null bytes");
|
||||||
Py_CLEAR(*cmd_copy);
|
Py_CLEAR(*cmd_copy);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue