mirror of
https://github.com/python/cpython.git
synced 2025-11-01 18:51:43 +00:00
Merging the py3k-pep3137 branch back into the py3k branch.
No detailed change log; just check out the change log for the py3k-pep3137 branch. The most obvious changes: - str8 renamed to bytes (PyString at the C level); - bytes renamed to buffer (PyBytes at the C level); - PyString and PyUnicode are no longer compatible. I.e. we now have an immutable bytes type and a mutable bytes type. The behavior of PyString was modified quite a bit, to make it more bytes-like. Some changes are still on the to-do list.
This commit is contained in:
parent
a19f80c6df
commit
98297ee781
148 changed files with 2533 additions and 3517 deletions
|
|
@ -646,7 +646,7 @@ decode_str(const char *str, struct tok_state *tok)
|
|||
"unknown encoding: %s", tok->enc);
|
||||
return error_ret(tok);
|
||||
}
|
||||
str = PyBytes_AsString(utf8);
|
||||
str = PyString_AS_STRING(utf8);
|
||||
}
|
||||
assert(tok->decoding_buffer == NULL);
|
||||
tok->decoding_buffer = utf8; /* CAUTION */
|
||||
|
|
@ -765,8 +765,8 @@ tok_nextc(register struct tok_state *tok)
|
|||
tok->done = E_DECODE;
|
||||
return EOF;
|
||||
}
|
||||
buflen = PyBytes_Size(u);
|
||||
buf = PyBytes_AsString(u);
|
||||
buflen = PyString_GET_SIZE(u);
|
||||
buf = PyString_AS_STRING(u);
|
||||
if (!buf) {
|
||||
Py_DECREF(u);
|
||||
tok->done = E_DECODE;
|
||||
|
|
@ -1550,7 +1550,7 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
|
|||
#else
|
||||
static PyObject *
|
||||
dec_utf8(const char *enc, const char *text, size_t len) {
|
||||
PyObject *ret = NULL;
|
||||
PyObject *ret = NULL;
|
||||
PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
|
||||
if (unicode_text) {
|
||||
ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
|
||||
|
|
@ -1560,7 +1560,7 @@ dec_utf8(const char *enc, const char *text, size_t len) {
|
|||
PyErr_Clear();
|
||||
}
|
||||
else {
|
||||
assert(PyBytes_Check(ret));
|
||||
assert(PyString_Check(ret));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -1573,8 +1573,8 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
|
|||
/* convert source to original encondig */
|
||||
PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
|
||||
if (lineobj != NULL) {
|
||||
int linelen = PyBytes_GET_SIZE(lineobj);
|
||||
const char *line = PyBytes_AS_STRING(lineobj);
|
||||
int linelen = PyString_GET_SIZE(lineobj);
|
||||
const char *line = PyString_AS_STRING(lineobj);
|
||||
text = PyObject_MALLOC(linelen + 1);
|
||||
if (text != NULL && line != NULL) {
|
||||
if (linelen)
|
||||
|
|
@ -1582,19 +1582,18 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
|
|||
text[linelen] = '\0';
|
||||
}
|
||||
Py_DECREF(lineobj);
|
||||
|
||||
|
||||
/* adjust error offset */
|
||||
if (*offset > 1) {
|
||||
PyObject *offsetobj = dec_utf8(tok->encoding,
|
||||
PyObject *offsetobj = dec_utf8(tok->encoding,
|
||||
tok->buf,
|
||||
*offset-1);
|
||||
if (offsetobj) {
|
||||
*offset = 1 +
|
||||
PyBytes_GET_SIZE(offsetobj);
|
||||
*offset = 1 + Py_Size(offsetobj);
|
||||
Py_DECREF(offsetobj);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
return text;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue