Issue #6137: The pickle module now translates module names when loading

or dumping pickles with a 2.x-compatible protocol, in order to make data
sharing and migration easier. This behaviour can be disabled using the
new `fix_imports` optional argument.
This commit is contained in:
Antoine Pitrou 2009-06-04 20:32:06 +00:00
parent 751899a59f
commit d9dfaa9487
8 changed files with 532 additions and 157 deletions

View file

@ -103,25 +103,33 @@ enum {
/* Exception classes for pickle. These should override the ones defined in
pickle.py, when the C-optimized Pickler and Unpickler are used. */
static PyObject *PickleError;
static PyObject *PicklingError;
static PyObject *UnpicklingError;
static PyObject *PickleError = NULL;
static PyObject *PicklingError = NULL;
static PyObject *UnpicklingError = NULL;
/* copyreg.dispatch_table, {type_object: pickling_function} */
static PyObject *dispatch_table;
static PyObject *dispatch_table = NULL;
/* For EXT[124] opcodes. */
/* copyreg._extension_registry, {(module_name, function_name): code} */
static PyObject *extension_registry;
static PyObject *extension_registry = NULL;
/* copyreg._inverted_registry, {code: (module_name, function_name)} */
static PyObject *inverted_registry;
static PyObject *inverted_registry = NULL;
/* copyreg._extension_cache, {code: object} */
static PyObject *extension_cache;
static PyObject *extension_cache = NULL;
/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
static PyObject *name_mapping_2to3 = NULL;
/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
static PyObject *import_mapping_2to3 = NULL;
/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
static PyObject *name_mapping_3to2 = NULL;
static PyObject *import_mapping_3to2 = NULL;
/* XXX: Are these really nescessary? */
/* As the name says, an empty tuple. */
static PyObject *empty_tuple;
static PyObject *empty_tuple = NULL;
/* For looking up name pairs in copyreg._extension_registry. */
static PyObject *two_tuple;
static PyObject *two_tuple = NULL;
static int
stack_underflow(void)
@ -315,6 +323,8 @@ typedef struct PicklerObject {
should not be used if with self-referential
objects. */
int fast_nesting;
int fix_imports; /* Indicate whether Pickler should fix
the name of globals for Python 2.x. */
PyObject *fast_memo;
} PicklerObject;
@ -340,6 +350,9 @@ typedef struct UnpicklerObject {
objects. */
Py_ssize_t num_marks; /* Number of marks in the mark stack. */
Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
int proto; /* Protocol of the pickle loaded. */
int fix_imports; /* Indicate whether Unpickler should fix
the name of globals pickled by Python 2.x. */
} UnpicklerObject;
/* Forward declarations */
@ -1972,6 +1985,63 @@ save_global(PicklerObject *self, PyObject *obj, PyObject *name)
unicode_encoder = PyUnicode_AsASCIIString;
}
/* For protocol < 3 and if the user didn't request against doing so,
we convert module names to the old 2.x module names. */
if (self->fix_imports) {
PyObject *key;
PyObject *item;
key = PyTuple_Pack(2, module_name, global_name);
if (key == NULL)
goto error;
item = PyDict_GetItemWithError(name_mapping_3to2, key);
Py_DECREF(key);
if (item) {
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
PyErr_Format(PyExc_RuntimeError,
"_compat_pickle.REVERSE_NAME_MAPPING values "
"should be 2-tuples, not %.200s",
Py_TYPE(item)->tp_name);
goto error;
}
Py_CLEAR(module_name);
Py_CLEAR(global_name);
module_name = PyTuple_GET_ITEM(item, 0);
global_name = PyTuple_GET_ITEM(item, 1);
if (!PyUnicode_Check(module_name) ||
!PyUnicode_Check(global_name)) {
PyErr_Format(PyExc_RuntimeError,
"_compat_pickle.REVERSE_NAME_MAPPING values "
"should be pairs of str, not (%.200s, %.200s)",
Py_TYPE(module_name)->tp_name,
Py_TYPE(global_name)->tp_name);
goto error;
}
Py_INCREF(module_name);
Py_INCREF(global_name);
}
else if (PyErr_Occurred()) {
goto error;
}
item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
if (item) {
if (!PyUnicode_Check(item)) {
PyErr_Format(PyExc_RuntimeError,
"_compat_pickle.REVERSE_IMPORT_MAPPING values "
"should be strings, not %.200s",
Py_TYPE(item)->tp_name);
goto error;
}
Py_CLEAR(module_name);
module_name = item;
Py_INCREF(module_name);
}
else if (PyErr_Occurred()) {
goto error;
}
}
/* Save the name of the module. */
encoded = unicode_encoder(module_name);
if (encoded == NULL) {
@ -2608,18 +2678,23 @@ PyDoc_STRVAR(Pickler_doc,
"The file argument must have a write() method that accepts a single\n"
"bytes argument. It can thus be a file object opened for binary\n"
"writing, a io.BytesIO instance, or any other custom object that\n"
"meets this interface.\n");
"meets this interface.\n"
"\n"
"If fix_imports is True and protocol is less than 3, pickle will try to\n"
"map the new Python 3.x names to the old module names used in Python\n"
"2.x, so that the pickle data stream is readable with Python 2.x.\n");
static int
Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {"file", "protocol", 0};
static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
PyObject *file;
PyObject *proto_obj = NULL;
long proto = 0;
int fix_imports = 1;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
kwlist, &file, &proto_obj))
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:Pickler",
kwlist, &file, &proto_obj, &fix_imports))
return -1;
/* In case of multiple __init__() calls, clear previous content. */
@ -2628,8 +2703,11 @@ Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
if (proto_obj == NULL || proto_obj == Py_None)
proto = DEFAULT_PROTOCOL;
else
else {
proto = PyLong_AsLong(proto_obj);
if (proto == -1 && PyErr_Occurred())
return -1;
}
if (proto < 0)
proto = HIGHEST_PROTOCOL;
@ -2639,12 +2717,13 @@ Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
return -1;
}
self->proto = proto;
self->bin = proto > 0;
self->arg = NULL;
self->fast = 0;
self->fast_nesting = 0;
self->fast_memo = NULL;
self->proto = proto;
self->bin = proto > 0;
self->arg = NULL;
self->fast = 0;
self->fast_nesting = 0;
self->fast_memo = NULL;
self->fix_imports = fix_imports && proto < 3;
if (!PyObject_HasAttrString(file, "write")) {
PyErr_SetString(PyExc_TypeError,
@ -4220,8 +4299,10 @@ load_proto(UnpicklerObject *self)
return -1;
i = (unsigned char)s[0];
if (i <= HIGHEST_PROTOCOL)
if (i <= HIGHEST_PROTOCOL) {
self->proto = i;
return 0;
}
PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
return -1;
@ -4383,12 +4464,67 @@ Unpickler_find_class(UnpicklerObject *self, PyObject *args)
&module_name, &global_name))
return NULL;
/* Try to map the old names used in Python 2.x to the new ones used in
Python 3.x. We do this only with old pickle protocols and when the
user has not disabled the feature. */
if (self->proto < 3 && self->fix_imports) {
PyObject *key;
PyObject *item;
/* Check if the global (i.e., a function or a class) was renamed
or moved to another module. */
key = PyTuple_Pack(2, module_name, global_name);
if (key == NULL)
return NULL;
item = PyDict_GetItemWithError(name_mapping_2to3, key);
Py_DECREF(key);
if (item) {
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
PyErr_Format(PyExc_RuntimeError,
"_compat_pickle.NAME_MAPPING values should be "
"2-tuples, not %.200s", Py_TYPE(item)->tp_name);
return NULL;
}
module_name = PyTuple_GET_ITEM(item, 0);
global_name = PyTuple_GET_ITEM(item, 1);
if (!PyUnicode_Check(module_name) ||
!PyUnicode_Check(global_name)) {
PyErr_Format(PyExc_RuntimeError,
"_compat_pickle.NAME_MAPPING values should be "
"pairs of str, not (%.200s, %.200s)",
Py_TYPE(module_name)->tp_name,
Py_TYPE(global_name)->tp_name);
return NULL;
}
}
else if (PyErr_Occurred()) {
return NULL;
}
/* Check if the module was renamed. */
item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
if (item) {
if (!PyUnicode_Check(item)) {
PyErr_Format(PyExc_RuntimeError,
"_compat_pickle.IMPORT_MAPPING values should be "
"strings, not %.200s", Py_TYPE(item)->tp_name);
return NULL;
}
module_name = item;
}
else if (PyErr_Occurred()) {
return NULL;
}
}
modules_dict = PySys_GetObject("modules");
if (modules_dict == NULL)
return NULL;
module = PyDict_GetItem(modules_dict, module_name);
module = PyDict_GetItemWithError(modules_dict, module_name);
if (module == NULL) {
if (PyErr_Occurred())
return NULL;
module = PyImport_Import(module_name);
if (module == NULL)
return NULL;
@ -4477,15 +4613,20 @@ PyDoc_STRVAR(Unpickler_doc,
"reading, a BytesIO object, or any other custom object that\n"
"meets this interface.\n"
"\n"
"Optional keyword arguments are encoding and errors, which are\n"
"used to decode 8-bit string instances pickled by Python 2.x.\n"
"These default to 'ASCII' and 'strict', respectively.\n");
"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
"which are used to control compatiblity support for pickle stream\n"
"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
"map the old Python 2.x names to the new names used in Python 3.x. The\n"
"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
"instances pickled by Python 2.x; these default to 'ASCII' and\n"
"'strict', respectively.\n");
static int
Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {"file", "encoding", "errors", 0};
static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
PyObject *file;
int fix_imports = 1;
char *encoding = NULL;
char *errors = NULL;
@ -4504,8 +4645,8 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
extra careful in the other Unpickler methods, since a subclass could
forget to call Unpickler.__init__() thus breaking our internal
invariants. */
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
&file, &encoding, &errors))
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iss:Unpickler", kwlist,
&file, &fix_imports, &encoding, &errors))
return -1;
/* In case of multiple __init__() calls, clear previous content. */
@ -4549,6 +4690,8 @@ Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
self->last_string = NULL;
self->arg = NULL;
self->proto = 0;
self->fix_imports = fix_imports;
return 0;
}
@ -4672,40 +4815,85 @@ static PyTypeObject Unpickler_Type = {
};
static int
init_stuff(void)
initmodule(void)
{
PyObject *copyreg;
PyObject *copyreg = NULL;
PyObject *compat_pickle = NULL;
/* XXX: We should ensure that the types of the dictionaries imported are
exactly PyDict objects. Otherwise, it is possible to crash the pickle
since we use the PyDict API directly to access these dictionaries. */
copyreg = PyImport_ImportModule("copyreg");
if (!copyreg)
return -1;
goto error;
dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
if (!dispatch_table)
goto error;
extension_registry = \
PyObject_GetAttrString(copyreg, "_extension_registry");
if (!extension_registry)
goto error;
inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
if (!inverted_registry)
goto error;
extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
if (!extension_cache)
goto error;
Py_CLEAR(copyreg);
Py_DECREF(copyreg);
/* Load the 2.x -> 3.x stdlib module mapping tables */
compat_pickle = PyImport_ImportModule("_compat_pickle");
if (!compat_pickle)
goto error;
name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
if (!name_mapping_2to3)
goto error;
if (!PyDict_CheckExact(name_mapping_2to3)) {
PyErr_Format(PyExc_RuntimeError,
"_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
Py_TYPE(name_mapping_2to3)->tp_name);
goto error;
}
import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
"IMPORT_MAPPING");
if (!import_mapping_2to3)
goto error;
if (!PyDict_CheckExact(import_mapping_2to3)) {
PyErr_Format(PyExc_RuntimeError,
"_compat_pickle.IMPORT_MAPPING should be a dict, "
"not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
goto error;
}
/* ... and the 3.x -> 2.x mapping tables */
name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
"REVERSE_NAME_MAPPING");
if (!name_mapping_3to2)
goto error;
if (!PyDict_CheckExact(name_mapping_3to2)) {
PyErr_Format(PyExc_RuntimeError,
"_compat_pickle.REVERSE_NAME_MAPPING shouldbe a dict, "
"not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
goto error;
}
import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
"REVERSE_IMPORT_MAPPING");
if (!import_mapping_3to2)
goto error;
if (!PyDict_CheckExact(import_mapping_3to2)) {
PyErr_Format(PyExc_RuntimeError,
"_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
"not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
goto error;
}
Py_CLEAR(compat_pickle);
empty_tuple = PyTuple_New(0);
if (empty_tuple == NULL)
return -1;
goto error;
two_tuple = PyTuple_New(2);
if (two_tuple == NULL)
return -1;
goto error;
/* We use this temp container with no regard to refcounts, or to
* keeping containees alive. Exempt from GC, because we don't
* want anything looking at two_tuple() by magic.
@ -4715,7 +4903,18 @@ init_stuff(void)
return 0;
error:
Py_DECREF(copyreg);
Py_CLEAR(copyreg);
Py_CLEAR(dispatch_table);
Py_CLEAR(extension_registry);
Py_CLEAR(inverted_registry);
Py_CLEAR(extension_cache);
Py_CLEAR(compat_pickle);
Py_CLEAR(name_mapping_2to3);
Py_CLEAR(import_mapping_2to3);
Py_CLEAR(name_mapping_3to2);
Py_CLEAR(import_mapping_3to2);
Py_CLEAR(empty_tuple);
Py_CLEAR(two_tuple);
return -1;
}
@ -4773,7 +4972,7 @@ PyInit__pickle(void)
if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
return NULL;
if (init_stuff() < 0)
if (initmodule() < 0)
return NULL;
return m;