Merging the py3k-pep3137 branch back into the py3k branch.

No detailed change log; just check out the change log for the py3k-pep3137
branch.  The most obvious changes:

  - str8 renamed to bytes (PyString at the C level);
  - bytes renamed to buffer (PyBytes at the C level);
  - PyString and PyUnicode are no longer compatible.

I.e. we now have an immutable bytes type and a mutable bytes type.

The behavior of PyString was modified quite a bit, to make it more
bytes-like.  Some changes are still on the to-do list.
This commit is contained in:
Guido van Rossum 2007-11-06 21:34:58 +00:00
parent a19f80c6df
commit 98297ee781
148 changed files with 2533 additions and 3517 deletions

View file

@ -25,26 +25,17 @@ functions should be applied to nil objects.
*/
/* Caching the hash (ob_shash) saves recalculation of a string's hash value.
Interning strings (ob_sstate) tries to ensure that only one string
object with a given value exists, so equality tests can be one pointer
comparison. This is generally restricted to strings that "look like"
Python identifiers, although the sys.intern() function can be used to force
interning of any string.
Together, these sped the interpreter by up to 20%. */
This significantly speeds up dict lookups. */
typedef struct {
PyObject_VAR_HEAD
long ob_shash;
int ob_sstate;
char ob_sval[1];
/* Invariants:
* ob_sval contains space for 'ob_size+1' elements.
* ob_sval[ob_size] == 0.
* ob_shash is the hash of the string or -1 if not computed yet.
* ob_sstate != 0 iff the string object is in stringobject.c's
* 'interned' dictionary; in this case the two references
* from 'interned' to this object are *not counted* in ob_refcnt.
*/
} PyStringObject;
@ -74,86 +65,20 @@ PyAPI_FUNC(PyObject *) PyString_DecodeEscape(const char *, Py_ssize_t,
const char *, Py_ssize_t,
const char *);
PyAPI_FUNC(void) PyString_InternInPlace(PyObject **);
PyAPI_FUNC(void) PyString_InternImmortal(PyObject **);
PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *);
PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void);
/* Use only if you know it's a string */
#define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate)
/* Macro, trading safety for speed */
#define PyString_AS_STRING(op) (assert(PyString_Check(op)),(((PyStringObject *)(op))->ob_sval))
#define PyString_AS_STRING(op) (assert(PyString_Check(op)), \
(((PyStringObject *)(op))->ob_sval))
#define PyString_GET_SIZE(op) (assert(PyString_Check(op)),Py_Size(op))
/* _PyString_Join(sep, x) is like sep.join(x). sep must be PyStringObject*,
x must be an iterable object. */
PyAPI_FUNC(PyObject *) _PyString_Join(PyObject *sep, PyObject *x);
/* --- Generic Codecs ----------------------------------------------------- */
/* Create an object by decoding the encoded string s of the
given size. */
PyAPI_FUNC(PyObject*) PyString_Decode(
const char *s, /* encoded string */
Py_ssize_t size, /* size of buffer */
const char *encoding, /* encoding */
const char *errors /* error handling */
);
/* Encodes a string object and returns the result as Python
object. */
PyAPI_FUNC(PyObject*) PyString_AsEncodedObject(
PyObject *str, /* string object */
const char *encoding, /* encoding */
const char *errors /* error handling */
);
/* Encodes a string object and returns the result as Python string
object.
If the codec returns an Unicode object, the object is converted
back to a string using the default encoding.
DEPRECATED - use PyString_AsEncodedObject() instead. */
PyAPI_FUNC(PyObject*) PyString_AsEncodedString(
PyObject *str, /* string object */
const char *encoding, /* encoding */
const char *errors /* error handling */
);
/* Decodes a string object and returns the result as Python
object. */
PyAPI_FUNC(PyObject*) PyString_AsDecodedObject(
PyObject *str, /* string object */
const char *encoding, /* encoding */
const char *errors /* error handling */
);
/* Decodes a string object and returns the result as Python string
object.
If the codec returns an Unicode object, the object is converted
back to a string using the default encoding.
DEPRECATED - use PyString_AsDecodedObject() instead. */
PyAPI_FUNC(PyObject*) PyString_AsDecodedString(
PyObject *str, /* string object */
const char *encoding, /* encoding */
const char *errors /* error handling */
);
/* Provides access to the internal data buffer and size of a string
object or the default encoded version of an Unicode object. Passing
NULL as *len parameter will force the string buffer to be
0-terminated (passing a string with embedded NULL characters will
cause an exception). */
PyAPI_FUNC(int) PyString_AsStringAndSize(
register PyObject *obj, /* string or Unicode object */
register char **s, /* pointer to buffer variable */
@ -162,6 +87,12 @@ PyAPI_FUNC(int) PyString_AsStringAndSize(
strings) */
);
/* Flags used by string formatting */
#define F_LJUST (1<<0)
#define F_SIGN (1<<1)
#define F_BLANK (1<<2)
#define F_ALT (1<<3)
#define F_ZERO (1<<4)
#ifdef __cplusplus
}