bpo-41930: Add support for SQLite serialise/deserialise API (GH-26728)

Co-authored-by: Jelle Zijlstra <jelle.zijlstra@gmail.com>
Co-authored-by: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com>
This commit is contained in:
Erlend Egeberg Aasland 2022-04-05 16:15:25 +02:00 committed by GitHub
parent aa0f056a00
commit a7551247e7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 435 additions and 1 deletions

View file

@ -693,6 +693,156 @@ exit:
return return_value;
}
#if defined(PY_SQLITE_HAVE_SERIALIZE)
PyDoc_STRVAR(serialize__doc__,
"serialize($self, /, *, name=\'main\')\n"
"--\n"
"\n"
"Serialize a database into a byte string.\n"
"\n"
" name\n"
" Which database to serialize.\n"
"\n"
"For an ordinary on-disk database file, the serialization is just a copy of the\n"
"disk file. For an in-memory database or a \"temp\" database, the serialization is\n"
"the same sequence of bytes which would be written to disk if that database\n"
"were backed up to disk.");
#define SERIALIZE_METHODDEF \
{"serialize", (PyCFunction)(void(*)(void))serialize, METH_FASTCALL|METH_KEYWORDS, serialize__doc__},
static PyObject *
serialize_impl(pysqlite_Connection *self, const char *name);
static PyObject *
serialize(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"name", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "serialize", 0};
PyObject *argsbuf[1];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
const char *name = "main";
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 0, 0, argsbuf);
if (!args) {
goto exit;
}
if (!noptargs) {
goto skip_optional_kwonly;
}
if (!PyUnicode_Check(args[0])) {
_PyArg_BadArgument("serialize", "argument 'name'", "str", args[0]);
goto exit;
}
Py_ssize_t name_length;
name = PyUnicode_AsUTF8AndSize(args[0], &name_length);
if (name == NULL) {
goto exit;
}
if (strlen(name) != (size_t)name_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
skip_optional_kwonly:
return_value = serialize_impl(self, name);
exit:
return return_value;
}
#endif /* defined(PY_SQLITE_HAVE_SERIALIZE) */
#if defined(PY_SQLITE_HAVE_SERIALIZE)
PyDoc_STRVAR(deserialize__doc__,
"deserialize($self, data, /, *, name=\'main\')\n"
"--\n"
"\n"
"Load a serialized database.\n"
"\n"
" data\n"
" The serialized database content.\n"
" name\n"
" Which database to reopen with the deserialization.\n"
"\n"
"The deserialize interface causes the database connection to disconnect from the\n"
"target database, and then reopen it as an in-memory database based on the given\n"
"serialized data.\n"
"\n"
"The deserialize interface will fail with SQLITE_BUSY if the database is\n"
"currently in a read transaction or is involved in a backup operation.");
#define DESERIALIZE_METHODDEF \
{"deserialize", (PyCFunction)(void(*)(void))deserialize, METH_FASTCALL|METH_KEYWORDS, deserialize__doc__},
static PyObject *
deserialize_impl(pysqlite_Connection *self, Py_buffer *data,
const char *name);
static PyObject *
deserialize(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"", "name", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "deserialize", 0};
PyObject *argsbuf[2];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
Py_buffer data = {NULL, NULL};
const char *name = "main";
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
if (!args) {
goto exit;
}
if (PyUnicode_Check(args[0])) {
Py_ssize_t len;
const char *ptr = PyUnicode_AsUTF8AndSize(args[0], &len);
if (ptr == NULL) {
goto exit;
}
PyBuffer_FillInfo(&data, args[0], (void *)ptr, len, 1, 0);
}
else { /* any bytes-like object */
if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) {
goto exit;
}
if (!PyBuffer_IsContiguous(&data, 'C')) {
_PyArg_BadArgument("deserialize", "argument 1", "contiguous buffer", args[0]);
goto exit;
}
}
if (!noptargs) {
goto skip_optional_kwonly;
}
if (!PyUnicode_Check(args[1])) {
_PyArg_BadArgument("deserialize", "argument 'name'", "str", args[1]);
goto exit;
}
Py_ssize_t name_length;
name = PyUnicode_AsUTF8AndSize(args[1], &name_length);
if (name == NULL) {
goto exit;
}
if (strlen(name) != (size_t)name_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
skip_optional_kwonly:
return_value = deserialize_impl(self, &data, name);
exit:
/* Cleanup for data */
if (data.obj) {
PyBuffer_Release(&data);
}
return return_value;
}
#endif /* defined(PY_SQLITE_HAVE_SERIALIZE) */
PyDoc_STRVAR(pysqlite_connection_enter__doc__,
"__enter__($self, /)\n"
"--\n"
@ -832,4 +982,12 @@ exit:
#ifndef PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF
#define PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF
#endif /* !defined(PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF) */
/*[clinic end generated code: output=176c9095219b17c4 input=a9049054013a1b77]*/
#ifndef SERIALIZE_METHODDEF
#define SERIALIZE_METHODDEF
#endif /* !defined(SERIALIZE_METHODDEF) */
#ifndef DESERIALIZE_METHODDEF
#define DESERIALIZE_METHODDEF
#endif /* !defined(DESERIALIZE_METHODDEF) */
/*[clinic end generated code: output=d965a68f9229a56c input=a9049054013a1b77]*/

View file

@ -1818,6 +1818,125 @@ pysqlite_connection_create_collation_impl(pysqlite_Connection *self,
Py_RETURN_NONE;
}
#ifdef PY_SQLITE_HAVE_SERIALIZE
/*[clinic input]
_sqlite3.Connection.serialize as serialize
*
name: str = "main"
Which database to serialize.
Serialize a database into a byte string.
For an ordinary on-disk database file, the serialization is just a copy of the
disk file. For an in-memory database or a "temp" database, the serialization is
the same sequence of bytes which would be written to disk if that database
were backed up to disk.
[clinic start generated code]*/
static PyObject *
serialize_impl(pysqlite_Connection *self, const char *name)
/*[clinic end generated code: output=97342b0e55239dd3 input=d2eb5194a65abe2b]*/
{
if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) {
return NULL;
}
/* If SQLite has a contiguous memory representation of the database, we can
* avoid memory allocations, so we try with the no-copy flag first.
*/
sqlite3_int64 size;
unsigned int flags = SQLITE_SERIALIZE_NOCOPY;
const char *data;
Py_BEGIN_ALLOW_THREADS
data = (const char *)sqlite3_serialize(self->db, name, &size, flags);
if (data == NULL) {
flags &= ~SQLITE_SERIALIZE_NOCOPY;
data = (const char *)sqlite3_serialize(self->db, name, &size, flags);
}
Py_END_ALLOW_THREADS
if (data == NULL) {
PyErr_Format(self->OperationalError, "unable to serialize '%s'",
name);
return NULL;
}
PyObject *res = PyBytes_FromStringAndSize(data, size);
if (!(flags & SQLITE_SERIALIZE_NOCOPY)) {
sqlite3_free((void *)data);
}
return res;
}
/*[clinic input]
_sqlite3.Connection.deserialize as deserialize
data: Py_buffer(accept={buffer, str})
The serialized database content.
/
*
name: str = "main"
Which database to reopen with the deserialization.
Load a serialized database.
The deserialize interface causes the database connection to disconnect from the
target database, and then reopen it as an in-memory database based on the given
serialized data.
The deserialize interface will fail with SQLITE_BUSY if the database is
currently in a read transaction or is involved in a backup operation.
[clinic start generated code]*/
static PyObject *
deserialize_impl(pysqlite_Connection *self, Py_buffer *data,
const char *name)
/*[clinic end generated code: output=e394c798b98bad89 input=1be4ca1faacf28f2]*/
{
if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) {
return NULL;
}
/* Transfer ownership of the buffer to SQLite:
* - Move buffer from Py to SQLite
* - Tell SQLite to free buffer memory
* - Tell SQLite that it is permitted to grow the resulting database
*
* Make sure we don't overflow sqlite3_deserialize(); it accepts a signed
* 64-bit int as its data size argument.
*
* We can safely use sqlite3_malloc64 here, since it was introduced before
* the serialize APIs.
*/
if (data->len > 9223372036854775807) { // (1 << 63) - 1
PyErr_SetString(PyExc_OverflowError, "'data' is too large");
return NULL;
}
sqlite3_int64 size = (sqlite3_int64)data->len;
unsigned char *buf = sqlite3_malloc64(size);
if (buf == NULL) {
return PyErr_NoMemory();
}
const unsigned int flags = SQLITE_DESERIALIZE_FREEONCLOSE |
SQLITE_DESERIALIZE_RESIZEABLE;
int rc;
Py_BEGIN_ALLOW_THREADS
(void)memcpy(buf, data->buf, data->len);
rc = sqlite3_deserialize(self->db, name, buf, size, size, flags);
Py_END_ALLOW_THREADS
if (rc != SQLITE_OK) {
(void)_pysqlite_seterror(self->state, self->db);
return NULL;
}
Py_RETURN_NONE;
}
#endif // PY_SQLITE_HAVE_SERIALIZE
/*[clinic input]
_sqlite3.Connection.__enter__ as pysqlite_connection_enter
@ -1971,6 +2090,8 @@ static PyMethodDef connection_methods[] = {
PYSQLITE_CONNECTION_SET_TRACE_CALLBACK_METHODDEF
SETLIMIT_METHODDEF
GETLIMIT_METHODDEF
SERIALIZE_METHODDEF
DESERIALIZE_METHODDEF
{NULL, NULL}
};