Merging the py3k-pep3137 branch back into the py3k branch.

No detailed change log; just check out the change log for the py3k-pep3137
branch.  The most obvious changes:

  - str8 renamed to bytes (PyString at the C level);
  - bytes renamed to buffer (PyBytes at the C level);
  - PyString and PyUnicode are no longer compatible.

I.e. we now have an immutable bytes type and a mutable bytes type.

The behavior of PyString was modified quite a bit, to make it more
bytes-like.  Some changes are still on the to-do list.
This commit is contained in:
Guido van Rossum 2007-11-06 21:34:58 +00:00
parent a19f80c6df
commit 98297ee781
148 changed files with 2533 additions and 3517 deletions

View file

@ -646,7 +646,7 @@ decode_str(const char *str, struct tok_state *tok)
"unknown encoding: %s", tok->enc);
return error_ret(tok);
}
str = PyBytes_AsString(utf8);
str = PyString_AS_STRING(utf8);
}
assert(tok->decoding_buffer == NULL);
tok->decoding_buffer = utf8; /* CAUTION */
@ -765,8 +765,8 @@ tok_nextc(register struct tok_state *tok)
tok->done = E_DECODE;
return EOF;
}
buflen = PyBytes_Size(u);
buf = PyBytes_AsString(u);
buflen = PyString_GET_SIZE(u);
buf = PyString_AS_STRING(u);
if (!buf) {
Py_DECREF(u);
tok->done = E_DECODE;
@ -1550,7 +1550,7 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
#else
static PyObject *
dec_utf8(const char *enc, const char *text, size_t len) {
PyObject *ret = NULL;
PyObject *ret = NULL;
PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
if (unicode_text) {
ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
@ -1560,7 +1560,7 @@ dec_utf8(const char *enc, const char *text, size_t len) {
PyErr_Clear();
}
else {
assert(PyBytes_Check(ret));
assert(PyString_Check(ret));
}
return ret;
}
@ -1573,8 +1573,8 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
/* convert source to original encondig */
PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
if (lineobj != NULL) {
int linelen = PyBytes_GET_SIZE(lineobj);
const char *line = PyBytes_AS_STRING(lineobj);
int linelen = PyString_GET_SIZE(lineobj);
const char *line = PyString_AS_STRING(lineobj);
text = PyObject_MALLOC(linelen + 1);
if (text != NULL && line != NULL) {
if (linelen)
@ -1582,19 +1582,18 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
text[linelen] = '\0';
}
Py_DECREF(lineobj);
/* adjust error offset */
if (*offset > 1) {
PyObject *offsetobj = dec_utf8(tok->encoding,
PyObject *offsetobj = dec_utf8(tok->encoding,
tok->buf,
*offset-1);
if (offsetobj) {
*offset = 1 +
PyBytes_GET_SIZE(offsetobj);
*offset = 1 + Py_Size(offsetobj);
Py_DECREF(offsetobj);
}
}
}
}
return text;