mirror of
https://github.com/python/cpython.git
synced 2025-09-14 12:46:49 +00:00
Commit strict str/bytes distinction.
From now on, trying to write str to a binary stream is an error (I'm still working on the reverse). There are still (at least) two failing tests: - test_asynchat - test_urllib2_localnet but I'm sure these will be fixed by someone.
This commit is contained in:
parent
245b42ec4b
commit
a74184eb1d
3 changed files with 59 additions and 119 deletions
13
Lib/io.py
13
Lib/io.py
|
@ -659,12 +659,14 @@ class BytesIO(BufferedIOBase):
|
||||||
def write(self, b):
|
def write(self, b):
|
||||||
if self.closed:
|
if self.closed:
|
||||||
raise ValueError("write to closed file")
|
raise ValueError("write to closed file")
|
||||||
|
if isinstance(b, str):
|
||||||
|
raise TypeError("can't write str to binary stream")
|
||||||
n = len(b)
|
n = len(b)
|
||||||
newpos = self._pos + n
|
newpos = self._pos + n
|
||||||
if newpos > len(self._buffer):
|
if newpos > len(self._buffer):
|
||||||
# Inserts null bytes between the current end of the file
|
# Inserts null bytes between the current end of the file
|
||||||
# and the new write position.
|
# and the new write position.
|
||||||
padding = '\x00' * (newpos - len(self._buffer) - n)
|
padding = b'\x00' * (newpos - len(self._buffer) - n)
|
||||||
self._buffer[self._pos:newpos - n] = padding
|
self._buffer[self._pos:newpos - n] = padding
|
||||||
self._buffer[self._pos:newpos] = b
|
self._buffer[self._pos:newpos] = b
|
||||||
self._pos = newpos
|
self._pos = newpos
|
||||||
|
@ -801,11 +803,8 @@ class BufferedWriter(_BufferedIOMixin):
|
||||||
def write(self, b):
|
def write(self, b):
|
||||||
if self.closed:
|
if self.closed:
|
||||||
raise ValueError("write to closed file")
|
raise ValueError("write to closed file")
|
||||||
if not isinstance(b, bytes):
|
if isinstance(b, str):
|
||||||
if hasattr(b, "__index__"):
|
raise TypeError("can't write str to binary stream")
|
||||||
raise TypeError("Can't write object of type %s" %
|
|
||||||
type(b).__name__)
|
|
||||||
b = bytes(b)
|
|
||||||
# XXX we can implement some more tricks to try and avoid partial writes
|
# XXX we can implement some more tricks to try and avoid partial writes
|
||||||
if len(self._write_buf) > self.buffer_size:
|
if len(self._write_buf) > self.buffer_size:
|
||||||
# We're full, so let's pre-flush the buffer
|
# We're full, so let's pre-flush the buffer
|
||||||
|
@ -1099,8 +1098,6 @@ class TextIOWrapper(TextIOBase):
|
||||||
s = s.replace("\n", self._writenl)
|
s = s.replace("\n", self._writenl)
|
||||||
# XXX What if we were just reading?
|
# XXX What if we were just reading?
|
||||||
b = s.encode(self._encoding)
|
b = s.encode(self._encoding)
|
||||||
if isinstance(b, str):
|
|
||||||
b = bytes(b)
|
|
||||||
self.buffer.write(b)
|
self.buffer.write(b)
|
||||||
if haslf and self.isatty():
|
if haslf and self.isatty():
|
||||||
self.flush()
|
self.flush()
|
||||||
|
|
|
@ -82,7 +82,13 @@ _getbuffer(PyObject *obj, PyBuffer *view)
|
||||||
|
|
||||||
if (buffer == NULL ||
|
if (buffer == NULL ||
|
||||||
PyUnicode_Check(obj) ||
|
PyUnicode_Check(obj) ||
|
||||||
buffer->bf_getbuffer == NULL) return -1;
|
buffer->bf_getbuffer == NULL)
|
||||||
|
{
|
||||||
|
PyErr_Format(PyExc_TypeError,
|
||||||
|
"Type %.100s doesn't support the buffer API",
|
||||||
|
Py_Type(obj)->tp_name);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
|
if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -167,7 +173,7 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
|
||||||
else if (size < alloc) {
|
else if (size < alloc) {
|
||||||
/* Within allocated size; quick exit */
|
/* Within allocated size; quick exit */
|
||||||
Py_Size(self) = size;
|
Py_Size(self) = size;
|
||||||
((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
|
((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
else if (size <= alloc * 1.125) {
|
else if (size <= alloc * 1.125) {
|
||||||
|
@ -181,7 +187,8 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
|
||||||
|
|
||||||
if (((PyBytesObject *)self)->ob_exports > 0) {
|
if (((PyBytesObject *)self)->ob_exports > 0) {
|
||||||
/*
|
/*
|
||||||
fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports, ((PyBytesObject *)self)->ob_bytes);
|
fprintf(stderr, "%d: %s", ((PyBytesObject *)self)->ob_exports,
|
||||||
|
((PyBytesObject *)self)->ob_bytes);
|
||||||
*/
|
*/
|
||||||
PyErr_SetString(PyExc_BufferError,
|
PyErr_SetString(PyExc_BufferError,
|
||||||
"Existing exports of data: object cannot be re-sized");
|
"Existing exports of data: object cannot be re-sized");
|
||||||
|
@ -262,8 +269,8 @@ bytes_iconcat(PyBytesObject *self, PyObject *other)
|
||||||
PyBuffer vo;
|
PyBuffer vo;
|
||||||
|
|
||||||
if (_getbuffer(other, &vo) < 0) {
|
if (_getbuffer(other, &vo) < 0) {
|
||||||
PyErr_Format(PyExc_TypeError,
|
PyErr_Format(PyExc_TypeError, "can't concat bytes to %.100s",
|
||||||
"can't concat bytes to %.100s", Py_Type(self)->tp_name);
|
Py_Type(self)->tp_name);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -757,8 +764,11 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
|
||||||
if (PyUnicode_Check(arg)) {
|
if (PyUnicode_Check(arg)) {
|
||||||
/* Encode via the codec registry */
|
/* Encode via the codec registry */
|
||||||
PyObject *encoded, *new;
|
PyObject *encoded, *new;
|
||||||
if (encoding == NULL)
|
if (encoding == NULL) {
|
||||||
encoding = PyUnicode_GetDefaultEncoding();
|
PyErr_SetString(PyExc_TypeError,
|
||||||
|
"string argument without an encoding");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
encoded = PyCodec_Encode(arg, encoding, errors);
|
encoded = PyCodec_Encode(arg, encoding, errors);
|
||||||
if (encoded == NULL)
|
if (encoded == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -954,12 +964,14 @@ bytes_richcompare(PyObject *self, PyObject *other, int op)
|
||||||
|
|
||||||
self_size = _getbuffer(self, &self_bytes);
|
self_size = _getbuffer(self, &self_bytes);
|
||||||
if (self_size < 0) {
|
if (self_size < 0) {
|
||||||
|
PyErr_Clear();
|
||||||
Py_INCREF(Py_NotImplemented);
|
Py_INCREF(Py_NotImplemented);
|
||||||
return Py_NotImplemented;
|
return Py_NotImplemented;
|
||||||
}
|
}
|
||||||
|
|
||||||
other_size = _getbuffer(other, &other_bytes);
|
other_size = _getbuffer(other, &other_bytes);
|
||||||
if (other_size < 0) {
|
if (other_size < 0) {
|
||||||
|
PyErr_Clear();
|
||||||
PyObject_ReleaseBuffer(self, &self_bytes);
|
PyObject_ReleaseBuffer(self, &self_bytes);
|
||||||
Py_INCREF(Py_NotImplemented);
|
Py_INCREF(Py_NotImplemented);
|
||||||
return Py_NotImplemented;
|
return Py_NotImplemented;
|
||||||
|
@ -1061,10 +1073,11 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
|
||||||
sub_len = PyBytes_GET_SIZE(subobj);
|
sub_len = PyBytes_GET_SIZE(subobj);
|
||||||
}
|
}
|
||||||
/* XXX --> use the modern buffer interface */
|
/* XXX --> use the modern buffer interface */
|
||||||
else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
|
else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) {
|
||||||
/* XXX - the "expected a character buffer object" is pretty
|
/* XXX - the "expected a character buffer object" is pretty
|
||||||
confusing for a non-expert. remap to something else ? */
|
confusing for a non-expert. remap to something else ? */
|
||||||
return -2;
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
if (dir > 0)
|
if (dir > 0)
|
||||||
return stringlib_find_slice(
|
return stringlib_find_slice(
|
||||||
|
@ -2021,48 +2034,23 @@ bytes_replace(PyBytesObject *self, PyObject *args)
|
||||||
{
|
{
|
||||||
Py_ssize_t count = -1;
|
Py_ssize_t count = -1;
|
||||||
PyObject *from, *to, *res;
|
PyObject *from, *to, *res;
|
||||||
const char *from_s, *to_s;
|
|
||||||
Py_ssize_t from_len, to_len;
|
|
||||||
int relfrom=0, relto=0;
|
|
||||||
PyBuffer vfrom, vto;
|
PyBuffer vfrom, vto;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
|
if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (PyBytes_Check(from)) {
|
if (_getbuffer(from, &vfrom) < 0)
|
||||||
from_s = PyBytes_AS_STRING(from);
|
|
||||||
from_len = PyBytes_GET_SIZE(from);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (PyObject_GetBuffer(from, &vfrom, PyBUF_CHARACTER) < 0)
|
|
||||||
return NULL;
|
return NULL;
|
||||||
from_s = vfrom.buf;
|
if (_getbuffer(to, &vto) < 0) {
|
||||||
from_len = vfrom.len;
|
|
||||||
relfrom = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (PyBytes_Check(to)) {
|
|
||||||
to_s = PyBytes_AS_STRING(to);
|
|
||||||
to_len = PyBytes_GET_SIZE(to);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (PyObject_GetBuffer(to, &vto, PyBUF_CHARACTER) < 0) {
|
|
||||||
if (relfrom)
|
|
||||||
PyObject_ReleaseBuffer(from, &vfrom);
|
PyObject_ReleaseBuffer(from, &vfrom);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
to_s = vto.buf;
|
|
||||||
to_len = vto.len;
|
|
||||||
relto = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
res = (PyObject *)replace((PyBytesObject *) self,
|
res = (PyObject *)replace((PyBytesObject *) self,
|
||||||
from_s, from_len,
|
vfrom.buf, vfrom.len,
|
||||||
to_s, to_len, count);
|
vto.buf, vto.len, count);
|
||||||
|
|
||||||
if (relfrom)
|
|
||||||
PyObject_ReleaseBuffer(from, &vfrom);
|
PyObject_ReleaseBuffer(from, &vfrom);
|
||||||
if (relto)
|
|
||||||
PyObject_ReleaseBuffer(to, &vto);
|
PyObject_ReleaseBuffer(to, &vto);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
|
@ -965,31 +965,11 @@ PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* For b/w compatibility we also accept Unicode objects provided
|
|
||||||
that no encodings is given and then redirect to
|
|
||||||
PyObject_Unicode() which then applies the additional logic for
|
|
||||||
Unicode subclasses.
|
|
||||||
|
|
||||||
NOTE: This API should really only be used for object which
|
|
||||||
represent *encoded* Unicode !
|
|
||||||
|
|
||||||
*/
|
|
||||||
if (PyUnicode_Check(obj)) {
|
|
||||||
if (encoding) {
|
|
||||||
PyErr_SetString(PyExc_TypeError,
|
|
||||||
"decoding Unicode is not supported");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
return PyObject_Unicode(obj);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
if (PyUnicode_Check(obj)) {
|
if (PyUnicode_Check(obj)) {
|
||||||
PyErr_SetString(PyExc_TypeError,
|
PyErr_SetString(PyExc_TypeError,
|
||||||
"decoding Unicode is not supported");
|
"decoding Unicode is not supported");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Coerce object */
|
/* Coerce object */
|
||||||
if (PyString_Check(obj)) {
|
if (PyString_Check(obj)) {
|
||||||
|
@ -6440,26 +6420,7 @@ able to handle UnicodeDecodeErrors.");
|
||||||
static PyObject *
|
static PyObject *
|
||||||
unicode_decode(PyUnicodeObject *self, PyObject *args)
|
unicode_decode(PyUnicodeObject *self, PyObject *args)
|
||||||
{
|
{
|
||||||
char *encoding = NULL;
|
PyErr_Format(PyExc_TypeError, "decoding str is not supported");
|
||||||
char *errors = NULL;
|
|
||||||
PyObject *v;
|
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
|
|
||||||
return NULL;
|
|
||||||
v = PyUnicode_AsDecodedObject((PyObject *)self, encoding, errors);
|
|
||||||
if (v == NULL)
|
|
||||||
goto onError;
|
|
||||||
if (!PyString_Check(v) && !PyUnicode_Check(v)) {
|
|
||||||
PyErr_Format(PyExc_TypeError,
|
|
||||||
"decoder did not return a string/unicode object "
|
|
||||||
"(type=%.400s)",
|
|
||||||
Py_Type(v)->tp_name);
|
|
||||||
Py_DECREF(v);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
return v;
|
|
||||||
|
|
||||||
onError:
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8136,17 +8097,11 @@ unicode_buffer_getbuffer(PyUnicodeObject *self, PyBuffer *view, int flags)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (flags & PyBUF_CHARACTER) {
|
if (flags & PyBUF_CHARACTER) {
|
||||||
PyObject *str;
|
PyErr_SetString(PyExc_SystemError, "can't use str as char buffer");
|
||||||
|
return -1;
|
||||||
str = _PyUnicode_AsDefaultEncodedString((PyObject *)self, NULL);
|
|
||||||
if (str == NULL) return -1;
|
|
||||||
return PyBuffer_FillInfo(view, (void *)PyString_AS_STRING(str),
|
|
||||||
PyString_GET_SIZE(str), 1, flags);
|
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
return PyBuffer_FillInfo(view, (void *)self->str,
|
return PyBuffer_FillInfo(view, (void *)self->str,
|
||||||
PyUnicode_GET_DATA_SIZE(self), 1, flags);
|
PyUnicode_GET_DATA_SIZE(self), 1, flags);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue