mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
Rewrite readlines() to speed it up -- about a factor of 2 on my
Indigo2, reading a 9Meg file from the local disk.
This commit is contained in:
parent
5449b6e123
commit
6263d5451c
1 changed files with 90 additions and 26 deletions
|
@ -451,25 +451,29 @@ file_read(f, args)
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
bytesread = 0;
|
bytesread = 0;
|
||||||
Py_BEGIN_ALLOW_THREADS
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
Py_BEGIN_ALLOW_THREADS
|
||||||
|
errno = 0;
|
||||||
chunksize = fread(BUF(v) + bytesread, 1,
|
chunksize = fread(BUF(v) + bytesread, 1,
|
||||||
buffersize - bytesread, f->f_fp);
|
buffersize - bytesread, f->f_fp);
|
||||||
/* XXX Error check? */
|
Py_END_ALLOW_THREADS
|
||||||
if (chunksize == 0)
|
if (chunksize == 0) {
|
||||||
break;
|
if (!ferror(f->f_fp))
|
||||||
|
break;
|
||||||
|
PyErr_SetFromErrno(PyExc_IOError);
|
||||||
|
clearerr(f->f_fp);
|
||||||
|
Py_DECREF(v);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
bytesread += chunksize;
|
bytesread += chunksize;
|
||||||
if (bytesread < buffersize)
|
if (bytesread < buffersize)
|
||||||
break;
|
break;
|
||||||
if (bytesrequested < 0) {
|
if (bytesrequested < 0) {
|
||||||
buffersize = new_buffersize(f, buffersize);
|
buffersize = new_buffersize(f, buffersize);
|
||||||
Py_BLOCK_THREADS
|
|
||||||
if (_PyString_Resize(&v, buffersize) < 0)
|
if (_PyString_Resize(&v, buffersize) < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
Py_UNBLOCK_THREADS
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Py_END_ALLOW_THREADS
|
|
||||||
if (bytesread != buffersize)
|
if (bytesread != buffersize)
|
||||||
_PyString_Resize(&v, bytesread);
|
_PyString_Resize(&v, bytesread);
|
||||||
return v;
|
return v;
|
||||||
|
@ -488,24 +492,21 @@ file_readinto(f, args)
|
||||||
if (!PyArg_Parse(args, "w#", &ptr, &ntodo))
|
if (!PyArg_Parse(args, "w#", &ptr, &ntodo))
|
||||||
return NULL;
|
return NULL;
|
||||||
ndone = 0;
|
ndone = 0;
|
||||||
/*
|
while (ntodo > 0) {
|
||||||
** XXXX Is this correct? Other threads may see partially-completed
|
Py_BEGIN_ALLOW_THREADS
|
||||||
** reads if they look at the object we're reading into...
|
errno = 0;
|
||||||
*/
|
|
||||||
Py_BEGIN_ALLOW_THREADS
|
|
||||||
while(ntodo > 0) {
|
|
||||||
nnow = fread(ptr+ndone, 1, ntodo, f->f_fp);
|
nnow = fread(ptr+ndone, 1, ntodo, f->f_fp);
|
||||||
if (nnow < 0 ) {
|
Py_END_ALLOW_THREADS
|
||||||
|
if (nnow == 0) {
|
||||||
|
if (!ferror(f->f_fp))
|
||||||
|
break;
|
||||||
PyErr_SetFromErrno(PyExc_IOError);
|
PyErr_SetFromErrno(PyExc_IOError);
|
||||||
clearerr(f->f_fp);
|
clearerr(f->f_fp);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (nnow == 0)
|
|
||||||
break;
|
|
||||||
ndone += nnow;
|
ndone += nnow;
|
||||||
ntodo -= nnow;
|
ntodo -= nnow;
|
||||||
}
|
}
|
||||||
Py_END_ALLOW_THREADS
|
|
||||||
return PyInt_FromLong(ndone);
|
return PyInt_FromLong(ndone);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -675,6 +676,14 @@ file_readlines(f, args)
|
||||||
{
|
{
|
||||||
PyObject *list;
|
PyObject *list;
|
||||||
PyObject *line;
|
PyObject *line;
|
||||||
|
char small_buffer[SMALLCHUNK];
|
||||||
|
char *buffer = small_buffer;
|
||||||
|
size_t buffersize = SMALLCHUNK;
|
||||||
|
PyObject *big_buffer = NULL;
|
||||||
|
size_t nfilled = 0;
|
||||||
|
size_t nread;
|
||||||
|
char *p, *q, *end;
|
||||||
|
int err;
|
||||||
|
|
||||||
if (f->f_fp == NULL)
|
if (f->f_fp == NULL)
|
||||||
return err_closed();
|
return err_closed();
|
||||||
|
@ -683,18 +692,73 @@ file_readlines(f, args)
|
||||||
if ((list = PyList_New(0)) == NULL)
|
if ((list = PyList_New(0)) == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
line = getline(f, 0);
|
Py_BEGIN_ALLOW_THREADS
|
||||||
if (line != NULL && PyString_Size(line) == 0) {
|
errno = 0;
|
||||||
Py_DECREF(line);
|
nread = fread(buffer+nfilled, 1, buffersize-nfilled, f->f_fp);
|
||||||
break;
|
Py_END_ALLOW_THREADS
|
||||||
}
|
if (nread == 0) {
|
||||||
if (line == NULL || PyList_Append(list, line) != 0) {
|
if (nread == 0)
|
||||||
|
break;
|
||||||
|
PyErr_SetFromErrno(PyExc_IOError);
|
||||||
|
clearerr(f->f_fp);
|
||||||
|
error:
|
||||||
Py_DECREF(list);
|
Py_DECREF(list);
|
||||||
Py_XDECREF(line);
|
list = NULL;
|
||||||
return NULL;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
Py_DECREF(line);
|
p = memchr(buffer+nfilled, '\n', nread);
|
||||||
|
if (p == NULL) {
|
||||||
|
/* Need a larger buffer to fit this line */
|
||||||
|
nfilled += nread;
|
||||||
|
buffersize *= 2;
|
||||||
|
if (big_buffer == NULL) {
|
||||||
|
/* Create the big buffer */
|
||||||
|
big_buffer = PyString_FromStringAndSize(
|
||||||
|
NULL, buffersize);
|
||||||
|
if (big_buffer == NULL)
|
||||||
|
goto error;
|
||||||
|
buffer = PyString_AS_STRING(big_buffer);
|
||||||
|
memcpy(buffer, small_buffer, nfilled);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Grow the big buffer */
|
||||||
|
_PyString_Resize(&big_buffer, buffersize);
|
||||||
|
buffer = PyString_AS_STRING(big_buffer);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
end = buffer+nfilled+nread;
|
||||||
|
q = buffer;
|
||||||
|
do {
|
||||||
|
/* Process complete lines */
|
||||||
|
p++;
|
||||||
|
line = PyString_FromStringAndSize(q, p-q);
|
||||||
|
if (line == NULL)
|
||||||
|
goto error;
|
||||||
|
err = PyList_Append(list, line);
|
||||||
|
Py_DECREF(line);
|
||||||
|
if (err != 0)
|
||||||
|
goto error;
|
||||||
|
q = p;
|
||||||
|
p = memchr(q, '\n', end-q);
|
||||||
|
} while (p != NULL);
|
||||||
|
/* Move the remaining incomplete line to the start */
|
||||||
|
nfilled = end-q;
|
||||||
|
memmove(buffer, q, nfilled);
|
||||||
}
|
}
|
||||||
|
if (nfilled != 0) {
|
||||||
|
/* Partial last line */
|
||||||
|
line = PyString_FromStringAndSize(buffer, nfilled);
|
||||||
|
if (line == NULL)
|
||||||
|
goto error;
|
||||||
|
err = PyList_Append(list, line);
|
||||||
|
Py_DECREF(line);
|
||||||
|
if (err != 0)
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
cleanup:
|
||||||
|
if (big_buffer)
|
||||||
|
Py_DECREF(big_buffer);
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue