Merging the py3k-pep3137 branch back into the py3k branch.

No detailed change log; just check out the change log for the py3k-pep3137 branch. The most obvious changes: - str8 renamed to bytes (PyString at the C level); - bytes renamed to buffer (PyBytes at the C level); - PyString and PyUnicode are no longer compatible. I.e. we now have an immutable bytes type and a mutable bytes type. The behavior of PyString was modified quite a bit, to make it more bytes-like. Some changes are still on the to-do list.
2025-11-03 03:22:27 +00:00 · 2007-11-06 21:34:58 +00:00 · 2007-11-06 21:34:58 +00:00 · 98297ee781
commit 98297ee781
parent a19f80c6df
148 changed files with 2533 additions and 3517 deletions
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@ -646,7 +646,7 @@ decode_str(const char *str, struct tok_state *tok)
 				"unknown encoding: %s", tok->enc);
 			return error_ret(tok);
 		}
-		str = PyBytes_AsString(utf8);
+		str = PyString_AS_STRING(utf8);
 	}
 	assert(tok->decoding_buffer == NULL);
 	tok->decoding_buffer = utf8; /* CAUTION */
@ -765,8 +765,8 @@ tok_nextc(register struct tok_state *tok)
 					tok->done = E_DECODE;
 					return EOF;
 				}
-				buflen = PyBytes_Size(u);
-				buf = PyBytes_AsString(u);
+				buflen = PyString_GET_SIZE(u);
+				buf = PyString_AS_STRING(u);
 				if (!buf) {
 					Py_DECREF(u);
 					tok->done = E_DECODE;
@ -1550,7 +1550,7 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
 #else
 static PyObject *
 dec_utf8(const char *enc, const char *text, size_t len) {
-	PyObject *ret = NULL;	
+	PyObject *ret = NULL;
 	PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
 	if (unicode_text) {
 		ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
@ -1560,7 +1560,7 @@ dec_utf8(const char *enc, const char *text, size_t len) {
 		PyErr_Clear();
 	}
        else {
-		assert(PyBytes_Check(ret));
+		assert(PyString_Check(ret));
 	}
 	return ret;
 }
@ -1573,8 +1573,8 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
 		/* convert source to original encondig */
 		PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
 		if (lineobj != NULL) {
-			int linelen = PyBytes_GET_SIZE(lineobj);
-			const char *line = PyBytes_AS_STRING(lineobj);
+			int linelen = PyString_GET_SIZE(lineobj);
+			const char *line = PyString_AS_STRING(lineobj);
 			text = PyObject_MALLOC(linelen + 1);
 			if (text != NULL && line != NULL) {
 				if (linelen)
@ -1582,19 +1582,18 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
 				text[linelen] = '\0';
 			}
 			Py_DECREF(lineobj);
-					
+
 			/* adjust error offset */
 			if (*offset > 1) {
-				PyObject *offsetobj = dec_utf8(tok->encoding, 
+				PyObject *offsetobj = dec_utf8(tok->encoding,
 							       tok->buf,
 							       *offset-1);
 				if (offsetobj) {
-					*offset = 1 +
-						PyBytes_GET_SIZE(offsetobj);
+					*offset = 1 + Py_Size(offsetobj);
 					Py_DECREF(offsetobj);
 				}
 			}
-			
+
 		}
 	}
 	return text;