bpo-41180: Replace marshal code.__new__ audit event with marshal.load[s] and marshal.dumps (GH-26961)

This commit is contained in:
Steve Dower 2021-06-30 17:21:37 +01:00 committed by GitHub
parent 86eeeb4259
commit 139de04518
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 65 additions and 10 deletions

View file

@ -66,6 +66,8 @@ The module defines these functions:
The *version* argument indicates the data format that ``dump`` should use
(see below).
.. audit-event:: marshal.dumps value,version marshal.dump
.. function:: load(file)
@ -74,6 +76,8 @@ The module defines these functions:
format), raise :exc:`EOFError`, :exc:`ValueError` or :exc:`TypeError`. The
file must be a readable :term:`binary file`.
.. audit-event:: marshal.loads bytes marshal.load
.. note::
If an object containing an unsupported type was marshalled with :func:`dump`,
@ -89,6 +93,8 @@ The module defines these functions:
The *version* argument indicates the data format that ``dumps`` should use
(see below).
.. audit-event:: marshal.dumps value,version marshal.dump
.. function:: loads(bytes)
@ -96,6 +102,8 @@ The module defines these functions:
:exc:`EOFError`, :exc:`ValueError` or :exc:`TypeError`. Extra bytes in the
input are ignored.
.. audit-event:: marshal.loads bytes marshal.load
In addition, the following constants are defined:

View file

@ -6,6 +6,7 @@ module with arguments identifying each test.
"""
import contextlib
import os
import sys
@ -106,6 +107,32 @@ def test_block_add_hook_baseexception():
pass
def test_marshal():
import marshal
o = ("a", "b", "c", 1, 2, 3)
payload = marshal.dumps(o)
with TestHook() as hook:
assertEqual(o, marshal.loads(marshal.dumps(o)))
try:
with open("test-marshal.bin", "wb") as f:
marshal.dump(o, f)
with open("test-marshal.bin", "rb") as f:
assertEqual(o, marshal.load(f))
finally:
os.unlink("test-marshal.bin")
actual = [(a[0], a[1]) for e, a in hook.seen if e == "marshal.dumps"]
assertSequenceEqual(actual, [(o, marshal.version)] * 2)
actual = [a[0] for e, a in hook.seen if e == "marshal.loads"]
assertSequenceEqual(actual, [payload])
actual = [e for e, a in hook.seen if e == "marshal.load"]
assertSequenceEqual(actual, ["marshal.load"])
def test_pickle():
import pickle

View file

@ -54,6 +54,11 @@ class AuditTest(unittest.TestCase):
def test_block_add_hook_baseexception(self):
self.do_test("test_block_add_hook_baseexception")
def test_marshal(self):
import_helper.import_module("marshal")
self.do_test("test_marshal")
def test_pickle(self):
import_helper.import_module("pickle")

View file

@ -0,0 +1,5 @@
Add auditing events to the :mod:`marshal` module, and stop raising
``code.__init__`` events for every unmarshalled code object. Directly
instantiated code objects will continue to raise an event, and audit event
handlers should inspect or collect the raw marshal data. This reduces a
significant performance overhead when loading from ``.pyc`` files.

View file

@ -596,14 +596,18 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
{
char buf[BUFSIZ];
WFILE wf;
if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
return; /* caller must check PyErr_Occurred() */
}
memset(&wf, 0, sizeof(wf));
wf.fp = fp;
wf.ptr = wf.buf = buf;
wf.end = wf.ptr + sizeof(buf);
wf.error = WFERR_OK;
wf.version = version;
if (w_init_refs(&wf, version))
return; /* caller mush check PyErr_Occurred() */
if (w_init_refs(&wf, version)) {
return; /* caller must check PyErr_Occurred() */
}
w_object(x, &wf);
w_clear_refs(&wf);
w_flush(&wf);
@ -1368,12 +1372,6 @@ r_object(RFILE *p)
goto code_error;
Py_ssize_t nlocalsplus = PyTuple_GET_SIZE(localsplusnames);
if (PySys_Audit("code.__new__", "OOOiiiiii",
code, filename, name, argcount, posonlyargcount,
kwonlyargcount, nlocalsplus, stacksize,
flags) < 0) {
goto code_error;
}
struct _PyCodeConstructor con = {
.filename = filename,
@ -1460,6 +1458,15 @@ read_object(RFILE *p)
fprintf(stderr, "XXX readobject called with exception set\n");
return NULL;
}
if (p->ptr && p->end) {
if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
return NULL;
}
} else if (p->fp || p->readable) {
if (PySys_Audit("marshal.load", NULL) < 0) {
return NULL;
}
}
v = r_object(p);
if (v == NULL && !PyErr_Occurred())
PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
@ -1556,7 +1563,7 @@ PyMarshal_ReadObjectFromFile(FILE *fp)
rf.refs = PyList_New(0);
if (rf.refs == NULL)
return NULL;
result = r_object(&rf);
result = read_object(&rf);
Py_DECREF(rf.refs);
if (rf.buf != NULL)
PyMem_Free(rf.buf);
@ -1577,7 +1584,7 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
rf.refs = PyList_New(0);
if (rf.refs == NULL)
return NULL;
result = r_object(&rf);
result = read_object(&rf);
Py_DECREF(rf.refs);
if (rf.buf != NULL)
PyMem_Free(rf.buf);
@ -1589,6 +1596,9 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
{
WFILE wf;
if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
return NULL;
}
memset(&wf, 0, sizeof(wf));
wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
if (wf.str == NULL)