mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Improve string forms and PyUnicode_Resize() documentation
Remove also the FIXME for resize_copy(): as discussed with Martin, copy the string on resize if the string is not resizable is just fine.
This commit is contained in:
parent
77bb47b312
commit
8cfcbed4e3
2 changed files with 24 additions and 15 deletions
|
@ -206,7 +206,7 @@ extern "C" {
|
||||||
immediately follow the structure. utf8_length and wstr_length can be found
|
immediately follow the structure. utf8_length and wstr_length can be found
|
||||||
in the length field; the utf8 pointer is equal to the data pointer. */
|
in the length field; the utf8 pointer is equal to the data pointer. */
|
||||||
typedef struct {
|
typedef struct {
|
||||||
/* Unicode strings can be in 4 states:
|
/* There a 4 forms of Unicode strings:
|
||||||
|
|
||||||
- compact ascii:
|
- compact ascii:
|
||||||
|
|
||||||
|
@ -227,7 +227,7 @@ typedef struct {
|
||||||
* ascii = 0
|
* ascii = 0
|
||||||
* utf8 != data
|
* utf8 != data
|
||||||
|
|
||||||
- string created by the legacy API (not ready):
|
- legacy string, not ready:
|
||||||
|
|
||||||
* structure = PyUnicodeObject
|
* structure = PyUnicodeObject
|
||||||
* kind = PyUnicode_WCHAR_KIND
|
* kind = PyUnicode_WCHAR_KIND
|
||||||
|
@ -239,7 +239,7 @@ typedef struct {
|
||||||
* interned = SSTATE_NOT_INTERNED
|
* interned = SSTATE_NOT_INTERNED
|
||||||
* ascii = 0
|
* ascii = 0
|
||||||
|
|
||||||
- string created by the legacy API, ready:
|
- legacy string, ready:
|
||||||
|
|
||||||
* structure = PyUnicodeObject structure
|
* structure = PyUnicodeObject structure
|
||||||
* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
|
* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
|
||||||
|
@ -249,10 +249,16 @@ typedef struct {
|
||||||
* data.any is not NULL
|
* data.any is not NULL
|
||||||
* utf8 = data if ascii is 1
|
* utf8 = data if ascii is 1
|
||||||
|
|
||||||
String created by the legacy API becomes ready when calling
|
Compact strings use only one memory block (structure + characters),
|
||||||
PyUnicode_READY().
|
whereas legacy strings use one block for the structure and one block
|
||||||
|
for characters.
|
||||||
|
|
||||||
See also _PyUnicode_CheckConsistency(). */
|
Legacy strings are created by PyUnicode_FromUnicode() and
|
||||||
|
PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
|
||||||
|
when PyUnicode_READY() is called.
|
||||||
|
|
||||||
|
See also _PyUnicode_CheckConsistency().
|
||||||
|
*/
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
Py_ssize_t length; /* Number of code points in the string */
|
Py_ssize_t length; /* Number of code points in the string */
|
||||||
Py_hash_t hash; /* Hash value; -1 if not set */
|
Py_hash_t hash; /* Hash value; -1 if not set */
|
||||||
|
@ -721,19 +727,22 @@ PyAPI_FUNC(int) PyUnicode_WriteChar(
|
||||||
PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
|
PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Resize an already allocated Unicode object to the new size length.
|
/* Resize an Unicode object allocated by the legacy API (e.g.
|
||||||
|
PyUnicode_FromUnicode). Unicode objects allocated by the new API (e.g.
|
||||||
|
PyUnicode_New) cannot be resized by this function.
|
||||||
|
|
||||||
|
The length is a number of Py_UNICODE characters (and not the number of code
|
||||||
|
points).
|
||||||
|
|
||||||
*unicode is modified to point to the new (resized) object and 0
|
*unicode is modified to point to the new (resized) object and 0
|
||||||
returned on success.
|
returned on success.
|
||||||
|
|
||||||
This API may only be called by the function which also called the
|
If the refcount on the object is 1, the function resizes the string in
|
||||||
Unicode constructor. The refcount on the object must be 1. Otherwise,
|
place, which is usually faster than allocating a new string (and copy
|
||||||
an error is returned.
|
characters).
|
||||||
|
|
||||||
Error handling is implemented as follows: an exception is set, -1
|
Error handling is implemented as follows: an exception is set, -1
|
||||||
is returned and *unicode left untouched.
|
is returned and *unicode left untouched. */
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
PyAPI_FUNC(int) PyUnicode_Resize(
|
PyAPI_FUNC(int) PyUnicode_Resize(
|
||||||
PyObject **unicode, /* Pointer to the Unicode object */
|
PyObject **unicode, /* Pointer to the Unicode object */
|
||||||
|
|
|
@ -536,7 +536,8 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return copy;
|
return copy;
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
PyUnicodeObject *w;
|
PyUnicodeObject *w;
|
||||||
assert(_PyUnicode_WSTR(unicode) != NULL);
|
assert(_PyUnicode_WSTR(unicode) != NULL);
|
||||||
assert(_PyUnicode_DATA_ANY(unicode) == NULL);
|
assert(_PyUnicode_DATA_ANY(unicode) == NULL);
|
||||||
|
@ -1294,7 +1295,6 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
|
||||||
if (old_length == length)
|
if (old_length == length)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* FIXME: really create a new object? */
|
|
||||||
if (!unicode_resizable(unicode)) {
|
if (!unicode_resizable(unicode)) {
|
||||||
PyObject *copy = resize_copy(unicode, length);
|
PyObject *copy = resize_copy(unicode, length);
|
||||||
if (copy == NULL)
|
if (copy == NULL)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue