mirror of
https://github.com/python/cpython.git
synced 2025-10-10 00:43:41 +00:00
Issue #15381: Optimized io.BytesIO to make less allocations and copyings.
This commit is contained in:
parent
83e802796c
commit
87d0b45485
4 changed files with 159 additions and 189 deletions
|
@ -362,6 +362,9 @@ The following performance enhancements have been added:
|
||||||
The speed up can range from 3x to 15x.
|
The speed up can range from 3x to 15x.
|
||||||
(:issue:`21486`, :issue:`21487`, :issue:`20826`)
|
(:issue:`21486`, :issue:`21487`, :issue:`20826`)
|
||||||
|
|
||||||
|
* Many operations on :class:`io.BytesIO` are now 50% to 100% faster.
|
||||||
|
(Contributed by Serhiy Storchaka in :issue:`15381`.)
|
||||||
|
|
||||||
|
|
||||||
Build and C API Changes
|
Build and C API Changes
|
||||||
=======================
|
=======================
|
||||||
|
|
|
@ -718,12 +718,11 @@ class CBytesIOTest(PyBytesIOTest):
|
||||||
|
|
||||||
@support.cpython_only
|
@support.cpython_only
|
||||||
def test_sizeof(self):
|
def test_sizeof(self):
|
||||||
basesize = support.calcobjsize('P2nN2PnP')
|
basesize = support.calcobjsize('P2n2Pn')
|
||||||
check = self.check_sizeof
|
check = self.check_sizeof
|
||||||
self.assertEqual(object.__sizeof__(io.BytesIO()), basesize)
|
self.assertEqual(object.__sizeof__(io.BytesIO()), basesize)
|
||||||
check(io.BytesIO(), basesize )
|
check(io.BytesIO(), basesize )
|
||||||
check(io.BytesIO(b'a'), basesize + 1 )
|
check(io.BytesIO(b'a' * 1000), basesize + sys.getsizeof(b'a' * 1000))
|
||||||
check(io.BytesIO(b'a' * 1000), basesize + 1000)
|
|
||||||
|
|
||||||
# Various tests of copy-on-write behaviour for BytesIO.
|
# Various tests of copy-on-write behaviour for BytesIO.
|
||||||
|
|
||||||
|
|
|
@ -232,6 +232,8 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #15381: Optimized io.BytesIO to make less allocations and copyings.
|
||||||
|
|
||||||
- Issue #22818: Splitting on a pattern that could match an empty string now
|
- Issue #22818: Splitting on a pattern that could match an empty string now
|
||||||
raises a warning. Patterns that can only match empty strings are now
|
raises a warning. Patterns that can only match empty strings are now
|
||||||
rejected.
|
rejected.
|
||||||
|
|
|
@ -4,17 +4,12 @@
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
char *buf;
|
PyObject *buf;
|
||||||
Py_ssize_t pos;
|
Py_ssize_t pos;
|
||||||
Py_ssize_t string_size;
|
Py_ssize_t string_size;
|
||||||
size_t buf_size;
|
|
||||||
PyObject *dict;
|
PyObject *dict;
|
||||||
PyObject *weakreflist;
|
PyObject *weakreflist;
|
||||||
Py_ssize_t exports;
|
Py_ssize_t exports;
|
||||||
/** If `initvalue' != NULL, `buf' is a read-only pointer into the PyBytes
|
|
||||||
* referenced by `initvalue'. It must be copied prior to mutation, and
|
|
||||||
* released during finalization */
|
|
||||||
PyObject *initvalue;
|
|
||||||
} bytesio;
|
} bytesio;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -22,12 +17,18 @@ typedef struct {
|
||||||
bytesio *source;
|
bytesio *source;
|
||||||
} bytesiobuf;
|
} bytesiobuf;
|
||||||
|
|
||||||
|
/* The bytesio object can be in three states:
|
||||||
|
* Py_REFCNT(buf) == 1, exports == 0.
|
||||||
|
* Py_REFCNT(buf) > 1. exports == 0, string_size == PyBytes_GET_SIZE(buf),
|
||||||
|
first modification or export causes the internal buffer copying.
|
||||||
|
* exports > 0. Py_REFCNT(buf) == 1, any modifications are forbidden.
|
||||||
|
*/
|
||||||
|
|
||||||
#define CHECK_CLOSED(self, ret) \
|
#define CHECK_CLOSED(self) \
|
||||||
if ((self)->buf == NULL) { \
|
if ((self)->buf == NULL) { \
|
||||||
PyErr_SetString(PyExc_ValueError, \
|
PyErr_SetString(PyExc_ValueError, \
|
||||||
"I/O operation on closed file."); \
|
"I/O operation on closed file."); \
|
||||||
return ret; \
|
return NULL; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CHECK_EXPORTS(self) \
|
#define CHECK_EXPORTS(self) \
|
||||||
|
@ -37,47 +38,8 @@ typedef struct {
|
||||||
return NULL; \
|
return NULL; \
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Ensure we have a buffer suitable for writing, in the case that an initvalue
|
#define SHARED_BUF(self) (Py_REFCNT((self)->buf) > 1)
|
||||||
* object was provided, and we're currently borrowing its buffer. `size'
|
|
||||||
* indicates the new buffer size allocated as part of unsharing, to avoid a
|
|
||||||
* redundant reallocation caused by any subsequent mutation. `truncate'
|
|
||||||
* indicates whether truncation should occur if `size` < self->string_size.
|
|
||||||
*
|
|
||||||
* Do nothing if the buffer wasn't shared. Returns 0 on success, or sets an
|
|
||||||
* exception and returns -1 on failure. Existing state is preserved on failure.
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
unshare(bytesio *self, size_t preferred_size, int truncate)
|
|
||||||
{
|
|
||||||
if (self->initvalue) {
|
|
||||||
Py_ssize_t copy_size;
|
|
||||||
char *new_buf;
|
|
||||||
|
|
||||||
if((! truncate) && preferred_size < (size_t)self->string_size) {
|
|
||||||
preferred_size = self->string_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* PyMem_Malloc() returns NULL if preferred_size is bigger
|
|
||||||
than PY_SSIZE_T_MAX */
|
|
||||||
new_buf = (char *)PyMem_Malloc(preferred_size);
|
|
||||||
if (new_buf == NULL) {
|
|
||||||
PyErr_NoMemory();
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
copy_size = self->string_size;
|
|
||||||
if ((size_t)copy_size > preferred_size) {
|
|
||||||
copy_size = preferred_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(new_buf, self->buf, copy_size);
|
|
||||||
Py_CLEAR(self->initvalue);
|
|
||||||
self->buf = new_buf;
|
|
||||||
self->buf_size = preferred_size;
|
|
||||||
self->string_size = (Py_ssize_t) copy_size;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Internal routine to get a line from the buffer of a BytesIO
|
/* Internal routine to get a line from the buffer of a BytesIO
|
||||||
object. Returns the length between the current position to the
|
object. Returns the length between the current position to the
|
||||||
|
@ -91,7 +53,7 @@ scan_eol(bytesio *self, Py_ssize_t len)
|
||||||
assert(self->buf != NULL);
|
assert(self->buf != NULL);
|
||||||
|
|
||||||
/* Move to the end of the line, up to the end of the string, s. */
|
/* Move to the end of the line, up to the end of the string, s. */
|
||||||
start = self->buf + self->pos;
|
start = PyBytes_AS_STRING(self->buf) + self->pos;
|
||||||
maxlen = self->string_size - self->pos;
|
maxlen = self->string_size - self->pos;
|
||||||
if (len < 0 || len > maxlen)
|
if (len < 0 || len > maxlen)
|
||||||
len = maxlen;
|
len = maxlen;
|
||||||
|
@ -109,6 +71,27 @@ scan_eol(bytesio *self, Py_ssize_t len)
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Internal routine for detaching the shared buffer of BytesIO objects.
|
||||||
|
The caller should ensure that the 'size' argument is non-negative and
|
||||||
|
not lesser than self->string_size. Returns 0 on success, -1 otherwise. */
|
||||||
|
static int
|
||||||
|
unshare_buffer(bytesio *self, size_t size)
|
||||||
|
{
|
||||||
|
PyObject *new_buf, *old_buf;
|
||||||
|
assert(SHARED_BUF(self));
|
||||||
|
assert(self->exports == 0);
|
||||||
|
assert(size >= (size_t)self->string_size);
|
||||||
|
new_buf = PyBytes_FromStringAndSize(NULL, size);
|
||||||
|
if (new_buf == NULL)
|
||||||
|
return -1;
|
||||||
|
memcpy(PyBytes_AS_STRING(new_buf), PyBytes_AS_STRING(self->buf),
|
||||||
|
self->string_size);
|
||||||
|
old_buf = self->buf;
|
||||||
|
self->buf = new_buf;
|
||||||
|
Py_DECREF(old_buf);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Internal routine for changing the size of the buffer of BytesIO objects.
|
/* Internal routine for changing the size of the buffer of BytesIO objects.
|
||||||
The caller should ensure that the 'size' argument is non-negative. Returns
|
The caller should ensure that the 'size' argument is non-negative. Returns
|
||||||
0 on success, -1 otherwise. */
|
0 on success, -1 otherwise. */
|
||||||
|
@ -117,8 +100,7 @@ resize_buffer(bytesio *self, size_t size)
|
||||||
{
|
{
|
||||||
/* Here, unsigned types are used to avoid dealing with signed integer
|
/* Here, unsigned types are used to avoid dealing with signed integer
|
||||||
overflow, which is undefined in C. */
|
overflow, which is undefined in C. */
|
||||||
size_t alloc = self->buf_size;
|
size_t alloc = PyBytes_GET_SIZE(self->buf);
|
||||||
char *new_buf = NULL;
|
|
||||||
|
|
||||||
assert(self->buf != NULL);
|
assert(self->buf != NULL);
|
||||||
|
|
||||||
|
@ -146,13 +128,15 @@ resize_buffer(bytesio *self, size_t size)
|
||||||
|
|
||||||
if (alloc > ((size_t)-1) / sizeof(char))
|
if (alloc > ((size_t)-1) / sizeof(char))
|
||||||
goto overflow;
|
goto overflow;
|
||||||
new_buf = (char *)PyMem_Realloc(self->buf, alloc * sizeof(char));
|
|
||||||
if (new_buf == NULL) {
|
if (SHARED_BUF(self)) {
|
||||||
PyErr_NoMemory();
|
if (unshare_buffer(self, alloc) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (_PyBytes_Resize(&self->buf, alloc) < 0)
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
self->buf_size = alloc;
|
|
||||||
self->buf = new_buf;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@ -167,21 +151,18 @@ resize_buffer(bytesio *self, size_t size)
|
||||||
static Py_ssize_t
|
static Py_ssize_t
|
||||||
write_bytes(bytesio *self, const char *bytes, Py_ssize_t len)
|
write_bytes(bytesio *self, const char *bytes, Py_ssize_t len)
|
||||||
{
|
{
|
||||||
size_t desired;
|
|
||||||
|
|
||||||
assert(self->buf != NULL);
|
assert(self->buf != NULL);
|
||||||
assert(self->pos >= 0);
|
assert(self->pos >= 0);
|
||||||
assert(len >= 0);
|
assert(len >= 0);
|
||||||
|
|
||||||
desired = (size_t)self->pos + len;
|
if ((size_t)self->pos + len > (size_t)PyBytes_GET_SIZE(self->buf)) {
|
||||||
if (unshare(self, desired, 0) < 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (desired > self->buf_size) {
|
|
||||||
if (resize_buffer(self, (size_t)self->pos + len) < 0)
|
if (resize_buffer(self, (size_t)self->pos + len) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
else if (SHARED_BUF(self)) {
|
||||||
|
if (unshare_buffer(self, self->string_size) < 0)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
if (self->pos > self->string_size) {
|
if (self->pos > self->string_size) {
|
||||||
/* In case of overseek, pad with null bytes the buffer region between
|
/* In case of overseek, pad with null bytes the buffer region between
|
||||||
|
@ -192,13 +173,13 @@ write_bytes(bytesio *self, const char *bytes, Py_ssize_t len)
|
||||||
| | <--to pad-->|<---to write---> |
|
| | <--to pad-->|<---to write---> |
|
||||||
0 buf position
|
0 buf position
|
||||||
*/
|
*/
|
||||||
memset(self->buf + self->string_size, '\0',
|
memset(PyBytes_AS_STRING(self->buf) + self->string_size, '\0',
|
||||||
(self->pos - self->string_size) * sizeof(char));
|
(self->pos - self->string_size) * sizeof(char));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Copy the data to the internal buffer, overwriting some of the existing
|
/* Copy the data to the internal buffer, overwriting some of the existing
|
||||||
data if self->pos < self->string_size. */
|
data if self->pos < self->string_size. */
|
||||||
memcpy(self->buf + self->pos, bytes, len);
|
memcpy(PyBytes_AS_STRING(self->buf) + self->pos, bytes, len);
|
||||||
self->pos += len;
|
self->pos += len;
|
||||||
|
|
||||||
/* Set the new length of the internal string if it has changed. */
|
/* Set the new length of the internal string if it has changed. */
|
||||||
|
@ -209,74 +190,6 @@ write_bytes(bytesio *self, const char *bytes, Py_ssize_t len)
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Release or free any existing buffer, and place the BytesIO in the closed
|
|
||||||
* state. */
|
|
||||||
static void
|
|
||||||
reset(bytesio *self)
|
|
||||||
{
|
|
||||||
if (self->initvalue) {
|
|
||||||
Py_CLEAR(self->initvalue);
|
|
||||||
} else if (self->buf) {
|
|
||||||
PyMem_Free(self->buf);
|
|
||||||
}
|
|
||||||
self->buf = NULL;
|
|
||||||
self->string_size = 0;
|
|
||||||
self->pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Reinitialize with a new heap-allocated buffer of size `size`. Returns 0 on
|
|
||||||
* success, or sets an exception and returns -1 on failure. Existing state is
|
|
||||||
* preserved on failure. */
|
|
||||||
static int
|
|
||||||
reinit_private(bytesio *self, Py_ssize_t size)
|
|
||||||
{
|
|
||||||
char *tmp = (char *)PyMem_Malloc(size);
|
|
||||||
if (tmp == NULL) {
|
|
||||||
PyErr_NoMemory();
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
reset(self);
|
|
||||||
self->buf = tmp;
|
|
||||||
self->buf_size = size;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Internal version of BytesIO.__init__; resets the object to its initial
|
|
||||||
* (closed) state before repopulating it, optionally by sharing a PyBytes
|
|
||||||
* buffer provided by `initvalue'. Returns 0 on success, or sets an exception
|
|
||||||
* and returns -1 on failure. */
|
|
||||||
static int
|
|
||||||
reinit(bytesio *self, PyObject *initvalue)
|
|
||||||
{
|
|
||||||
CHECK_CLOSED(self, -1);
|
|
||||||
|
|
||||||
if (initvalue == NULL || initvalue == Py_None) {
|
|
||||||
if (reinit_private(self, 0) < 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
} else if (PyBytes_CheckExact(initvalue)) {
|
|
||||||
reset(self);
|
|
||||||
Py_INCREF(initvalue);
|
|
||||||
self->initvalue = initvalue;
|
|
||||||
self->buf = PyBytes_AS_STRING(initvalue);
|
|
||||||
self->buf_size = PyBytes_GET_SIZE(initvalue);
|
|
||||||
self->string_size = PyBytes_GET_SIZE(initvalue);
|
|
||||||
} else {
|
|
||||||
Py_buffer buf;
|
|
||||||
if (PyObject_GetBuffer(initvalue, &buf, PyBUF_CONTIG_RO) < 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (reinit_private(self, buf.len) < 0) {
|
|
||||||
PyBuffer_Release(&buf);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
memcpy(self->buf, buf.buf, buf.len);
|
|
||||||
self->string_size = buf.len;
|
|
||||||
PyBuffer_Release(&buf);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
bytesio_get_closed(bytesio *self)
|
bytesio_get_closed(bytesio *self)
|
||||||
{
|
{
|
||||||
|
@ -301,7 +214,7 @@ PyDoc_STRVAR(seekable_doc,
|
||||||
static PyObject *
|
static PyObject *
|
||||||
return_not_closed(bytesio *self)
|
return_not_closed(bytesio *self)
|
||||||
{
|
{
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
Py_RETURN_TRUE;
|
Py_RETURN_TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -311,7 +224,7 @@ PyDoc_STRVAR(flush_doc,
|
||||||
static PyObject *
|
static PyObject *
|
||||||
bytesio_flush(bytesio *self)
|
bytesio_flush(bytesio *self)
|
||||||
{
|
{
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -327,7 +240,7 @@ bytesio_getbuffer(bytesio *self)
|
||||||
bytesiobuf *buf;
|
bytesiobuf *buf;
|
||||||
PyObject *view;
|
PyObject *view;
|
||||||
|
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
|
|
||||||
buf = (bytesiobuf *) type->tp_alloc(type, 0);
|
buf = (bytesiobuf *) type->tp_alloc(type, 0);
|
||||||
if (buf == NULL)
|
if (buf == NULL)
|
||||||
|
@ -347,8 +260,23 @@ PyDoc_STRVAR(getval_doc,
|
||||||
static PyObject *
|
static PyObject *
|
||||||
bytesio_getvalue(bytesio *self)
|
bytesio_getvalue(bytesio *self)
|
||||||
{
|
{
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
return PyBytes_FromStringAndSize(self->buf, self->string_size);
|
if (self->string_size <= 1 || self->exports > 0)
|
||||||
|
return PyBytes_FromStringAndSize(PyBytes_AS_STRING(self->buf),
|
||||||
|
self->string_size);
|
||||||
|
|
||||||
|
if (self->string_size != PyBytes_GET_SIZE(self->buf)) {
|
||||||
|
if (SHARED_BUF(self)) {
|
||||||
|
if (unshare_buffer(self, self->string_size) < 0)
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (_PyBytes_Resize(&self->buf, self->string_size) < 0)
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Py_INCREF(self->buf);
|
||||||
|
return self->buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(isatty_doc,
|
PyDoc_STRVAR(isatty_doc,
|
||||||
|
@ -360,7 +288,7 @@ PyDoc_STRVAR(isatty_doc,
|
||||||
static PyObject *
|
static PyObject *
|
||||||
bytesio_isatty(bytesio *self)
|
bytesio_isatty(bytesio *self)
|
||||||
{
|
{
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
Py_RETURN_FALSE;
|
Py_RETURN_FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -370,10 +298,29 @@ PyDoc_STRVAR(tell_doc,
|
||||||
static PyObject *
|
static PyObject *
|
||||||
bytesio_tell(bytesio *self)
|
bytesio_tell(bytesio *self)
|
||||||
{
|
{
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
return PyLong_FromSsize_t(self->pos);
|
return PyLong_FromSsize_t(self->pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
read_bytes(bytesio *self, Py_ssize_t size)
|
||||||
|
{
|
||||||
|
char *output;
|
||||||
|
|
||||||
|
assert(self->buf != NULL);
|
||||||
|
if (size > 1 &&
|
||||||
|
self->pos == 0 && size == PyBytes_GET_SIZE(self->buf) &&
|
||||||
|
self->exports == 0) {
|
||||||
|
self->pos += size;
|
||||||
|
Py_INCREF(self->buf);
|
||||||
|
return self->buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
output = PyBytes_AS_STRING(self->buf) + self->pos;
|
||||||
|
self->pos += size;
|
||||||
|
return PyBytes_FromStringAndSize(output, size);
|
||||||
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(read_doc,
|
PyDoc_STRVAR(read_doc,
|
||||||
"read([size]) -> read at most size bytes, returned as a string.\n"
|
"read([size]) -> read at most size bytes, returned as a string.\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
@ -384,10 +331,9 @@ static PyObject *
|
||||||
bytesio_read(bytesio *self, PyObject *args)
|
bytesio_read(bytesio *self, PyObject *args)
|
||||||
{
|
{
|
||||||
Py_ssize_t size, n;
|
Py_ssize_t size, n;
|
||||||
char *output;
|
|
||||||
PyObject *arg = Py_None;
|
PyObject *arg = Py_None;
|
||||||
|
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|O:read", &arg))
|
if (!PyArg_ParseTuple(args, "|O:read", &arg))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -415,11 +361,7 @@ bytesio_read(bytesio *self, PyObject *args)
|
||||||
size = 0;
|
size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(self->buf != NULL);
|
return read_bytes(self, size);
|
||||||
output = self->buf + self->pos;
|
|
||||||
self->pos += size;
|
|
||||||
|
|
||||||
return PyBytes_FromStringAndSize(output, size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -453,10 +395,9 @@ static PyObject *
|
||||||
bytesio_readline(bytesio *self, PyObject *args)
|
bytesio_readline(bytesio *self, PyObject *args)
|
||||||
{
|
{
|
||||||
Py_ssize_t size, n;
|
Py_ssize_t size, n;
|
||||||
char *output;
|
|
||||||
PyObject *arg = Py_None;
|
PyObject *arg = Py_None;
|
||||||
|
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|O:readline", &arg))
|
if (!PyArg_ParseTuple(args, "|O:readline", &arg))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -478,9 +419,7 @@ bytesio_readline(bytesio *self, PyObject *args)
|
||||||
|
|
||||||
n = scan_eol(self, size);
|
n = scan_eol(self, size);
|
||||||
|
|
||||||
output = self->buf + self->pos;
|
return read_bytes(self, n);
|
||||||
self->pos += n;
|
|
||||||
return PyBytes_FromStringAndSize(output, n);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(readlines_doc,
|
PyDoc_STRVAR(readlines_doc,
|
||||||
|
@ -498,7 +437,7 @@ bytesio_readlines(bytesio *self, PyObject *args)
|
||||||
char *output;
|
char *output;
|
||||||
PyObject *arg = Py_None;
|
PyObject *arg = Py_None;
|
||||||
|
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|O:readlines", &arg))
|
if (!PyArg_ParseTuple(args, "|O:readlines", &arg))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -523,7 +462,7 @@ bytesio_readlines(bytesio *self, PyObject *args)
|
||||||
if (!result)
|
if (!result)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
output = self->buf + self->pos;
|
output = PyBytes_AS_STRING(self->buf) + self->pos;
|
||||||
while ((n = scan_eol(self, -1)) != 0) {
|
while ((n = scan_eol(self, -1)) != 0) {
|
||||||
self->pos += n;
|
self->pos += n;
|
||||||
line = PyBytes_FromStringAndSize(output, n);
|
line = PyBytes_FromStringAndSize(output, n);
|
||||||
|
@ -558,7 +497,7 @@ bytesio_readinto(bytesio *self, PyObject *arg)
|
||||||
Py_buffer buffer;
|
Py_buffer buffer;
|
||||||
Py_ssize_t len, n;
|
Py_ssize_t len, n;
|
||||||
|
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
|
|
||||||
if (!PyArg_Parse(arg, "w*", &buffer))
|
if (!PyArg_Parse(arg, "w*", &buffer))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -572,7 +511,7 @@ bytesio_readinto(bytesio *self, PyObject *arg)
|
||||||
len = 0;
|
len = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(buffer.buf, self->buf + self->pos, len);
|
memcpy(buffer.buf, PyBytes_AS_STRING(self->buf) + self->pos, len);
|
||||||
assert(self->pos + len < PY_SSIZE_T_MAX);
|
assert(self->pos + len < PY_SSIZE_T_MAX);
|
||||||
assert(len >= 0);
|
assert(len >= 0);
|
||||||
self->pos += len;
|
self->pos += len;
|
||||||
|
@ -593,7 +532,7 @@ bytesio_truncate(bytesio *self, PyObject *args)
|
||||||
Py_ssize_t size;
|
Py_ssize_t size;
|
||||||
PyObject *arg = Py_None;
|
PyObject *arg = Py_None;
|
||||||
|
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
CHECK_EXPORTS(self);
|
CHECK_EXPORTS(self);
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
|
if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
|
||||||
|
@ -620,10 +559,6 @@ bytesio_truncate(bytesio *self, PyObject *args)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unshare(self, size, 1) < 0) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (size < self->string_size) {
|
if (size < self->string_size) {
|
||||||
self->string_size = size;
|
self->string_size = size;
|
||||||
if (resize_buffer(self, size) < 0)
|
if (resize_buffer(self, size) < 0)
|
||||||
|
@ -636,19 +571,16 @@ bytesio_truncate(bytesio *self, PyObject *args)
|
||||||
static PyObject *
|
static PyObject *
|
||||||
bytesio_iternext(bytesio *self)
|
bytesio_iternext(bytesio *self)
|
||||||
{
|
{
|
||||||
const char *next;
|
|
||||||
Py_ssize_t n;
|
Py_ssize_t n;
|
||||||
|
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
|
|
||||||
n = scan_eol(self, -1);
|
n = scan_eol(self, -1);
|
||||||
|
|
||||||
if (n == 0)
|
if (n == 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
next = self->buf + self->pos;
|
return read_bytes(self, n);
|
||||||
self->pos += n;
|
|
||||||
return PyBytes_FromStringAndSize(next, n);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(seek_doc,
|
PyDoc_STRVAR(seek_doc,
|
||||||
|
@ -666,7 +598,7 @@ bytesio_seek(bytesio *self, PyObject *args)
|
||||||
Py_ssize_t pos;
|
Py_ssize_t pos;
|
||||||
int mode = 0;
|
int mode = 0;
|
||||||
|
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
|
if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -721,7 +653,7 @@ bytesio_write(bytesio *self, PyObject *obj)
|
||||||
Py_buffer buf;
|
Py_buffer buf;
|
||||||
PyObject *result = NULL;
|
PyObject *result = NULL;
|
||||||
|
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
CHECK_EXPORTS(self);
|
CHECK_EXPORTS(self);
|
||||||
|
|
||||||
if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0)
|
if (PyObject_GetBuffer(obj, &buf, PyBUF_CONTIG_RO) < 0)
|
||||||
|
@ -749,7 +681,7 @@ bytesio_writelines(bytesio *self, PyObject *v)
|
||||||
PyObject *it, *item;
|
PyObject *it, *item;
|
||||||
PyObject *ret;
|
PyObject *ret;
|
||||||
|
|
||||||
CHECK_CLOSED(self, NULL);
|
CHECK_CLOSED(self);
|
||||||
|
|
||||||
it = PyObject_GetIter(v);
|
it = PyObject_GetIter(v);
|
||||||
if (it == NULL)
|
if (it == NULL)
|
||||||
|
@ -780,7 +712,7 @@ static PyObject *
|
||||||
bytesio_close(bytesio *self)
|
bytesio_close(bytesio *self)
|
||||||
{
|
{
|
||||||
CHECK_EXPORTS(self);
|
CHECK_EXPORTS(self);
|
||||||
reset(self);
|
Py_CLEAR(self->buf);
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -828,11 +760,11 @@ bytesio_getstate(bytesio *self)
|
||||||
static PyObject *
|
static PyObject *
|
||||||
bytesio_setstate(bytesio *self, PyObject *state)
|
bytesio_setstate(bytesio *self, PyObject *state)
|
||||||
{
|
{
|
||||||
|
PyObject *result;
|
||||||
PyObject *position_obj;
|
PyObject *position_obj;
|
||||||
PyObject *dict;
|
PyObject *dict;
|
||||||
Py_ssize_t pos;
|
Py_ssize_t pos;
|
||||||
|
|
||||||
CHECK_EXPORTS(self);
|
|
||||||
assert(state != NULL);
|
assert(state != NULL);
|
||||||
|
|
||||||
/* We allow the state tuple to be longer than 3, because we may need
|
/* We allow the state tuple to be longer than 3, because we may need
|
||||||
|
@ -844,13 +776,18 @@ bytesio_setstate(bytesio *self, PyObject *state)
|
||||||
Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
|
Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
CHECK_EXPORTS(self);
|
||||||
|
/* Reset the object to its default state. This is only needed to handle
|
||||||
|
the case of repeated calls to __setstate__. */
|
||||||
|
self->string_size = 0;
|
||||||
|
self->pos = 0;
|
||||||
|
|
||||||
/* Reset the object to its default state and set the value of the internal
|
/* Set the value of the internal buffer. If state[0] does not support the
|
||||||
* buffer. If state[0] does not support the buffer protocol, reinit() will
|
buffer protocol, bytesio_write will raise the appropriate TypeError. */
|
||||||
* raise the appropriate TypeError. */
|
result = bytesio_write(self, PyTuple_GET_ITEM(state, 0));
|
||||||
if (reinit(self, PyTuple_GET_ITEM(state, 0)) < 0) {
|
if (result == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
Py_DECREF(result);
|
||||||
|
|
||||||
/* Set carefully the position value. Alternatively, we could use the seek
|
/* Set carefully the position value. Alternatively, we could use the seek
|
||||||
method instead of modifying self->pos directly to better protect the
|
method instead of modifying self->pos directly to better protect the
|
||||||
|
@ -905,9 +842,7 @@ bytesio_dealloc(bytesio *self)
|
||||||
"deallocated BytesIO object has exported buffers");
|
"deallocated BytesIO object has exported buffers");
|
||||||
PyErr_Print();
|
PyErr_Print();
|
||||||
}
|
}
|
||||||
|
Py_CLEAR(self->buf);
|
||||||
reset(self);
|
|
||||||
|
|
||||||
Py_CLEAR(self->dict);
|
Py_CLEAR(self->dict);
|
||||||
if (self->weakreflist != NULL)
|
if (self->weakreflist != NULL)
|
||||||
PyObject_ClearWeakRefs((PyObject *) self);
|
PyObject_ClearWeakRefs((PyObject *) self);
|
||||||
|
@ -927,7 +862,7 @@ bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
/* tp_alloc initializes all the fields to zero. So we don't have to
|
/* tp_alloc initializes all the fields to zero. So we don't have to
|
||||||
initialize them here. */
|
initialize them here. */
|
||||||
|
|
||||||
self->buf = (char *)PyMem_Malloc(0);
|
self->buf = PyBytes_FromStringAndSize(NULL, 0);
|
||||||
if (self->buf == NULL) {
|
if (self->buf == NULL) {
|
||||||
Py_DECREF(self);
|
Py_DECREF(self);
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
|
@ -946,7 +881,33 @@ bytesio_init(bytesio *self, PyObject *args, PyObject *kwds)
|
||||||
&initvalue))
|
&initvalue))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
return reinit(self, initvalue);
|
/* In case, __init__ is called multiple times. */
|
||||||
|
self->string_size = 0;
|
||||||
|
self->pos = 0;
|
||||||
|
|
||||||
|
if (self->exports > 0) {
|
||||||
|
PyErr_SetString(PyExc_BufferError,
|
||||||
|
"Existing exports of data: object cannot be re-sized");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (initvalue && initvalue != Py_None) {
|
||||||
|
if (PyBytes_CheckExact(initvalue)) {
|
||||||
|
Py_INCREF(initvalue);
|
||||||
|
Py_XDECREF(self->buf);
|
||||||
|
self->buf = initvalue;
|
||||||
|
self->string_size = PyBytes_GET_SIZE(initvalue);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
PyObject *res;
|
||||||
|
res = bytesio_write(self, initvalue);
|
||||||
|
if (res == NULL)
|
||||||
|
return -1;
|
||||||
|
Py_DECREF(res);
|
||||||
|
self->pos = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -955,8 +916,8 @@ bytesio_sizeof(bytesio *self, void *unused)
|
||||||
Py_ssize_t res;
|
Py_ssize_t res;
|
||||||
|
|
||||||
res = sizeof(bytesio);
|
res = sizeof(bytesio);
|
||||||
if (self->buf)
|
if (self->buf && !SHARED_BUF(self))
|
||||||
res += self->buf_size;
|
res += _PySys_GetSizeOf(self->buf);
|
||||||
return PyLong_FromSsize_t(res);
|
return PyLong_FromSsize_t(res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1066,11 +1027,16 @@ bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
bytesio *b = (bytesio *) obj->source;
|
bytesio *b = (bytesio *) obj->source;
|
||||||
|
if (SHARED_BUF(b)) {
|
||||||
|
if (unshare_buffer(b, b->string_size) < 0)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
if (view == NULL) {
|
if (view == NULL) {
|
||||||
b->exports++;
|
b->exports++;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size,
|
ret = PyBuffer_FillInfo(view, (PyObject*)obj,
|
||||||
|
PyBytes_AS_STRING(b->buf), b->string_size,
|
||||||
0, flags);
|
0, flags);
|
||||||
if (ret >= 0) {
|
if (ret >= 0) {
|
||||||
b->exports++;
|
b->exports++;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue