mirror of
https://github.com/python/cpython.git
synced 2025-10-22 06:32:43 +00:00
Add a note in PyUnicode_CopyCharacters() doc: it doesn't write null character
Cleanup also the code (avoid the goto).
This commit is contained in:
parent
ff1ef074ed
commit
a0702ab1fe
2 changed files with 47 additions and 44 deletions
|
@ -521,9 +521,9 @@ PyAPI_FUNC(int) _PyUnicode_Ready(
|
||||||
/* Copy character from one unicode object into another, this function performs
|
/* Copy character from one unicode object into another, this function performs
|
||||||
character conversion when necessary and falls back to memcpy if possible.
|
character conversion when necessary and falls back to memcpy if possible.
|
||||||
|
|
||||||
Fail if 'to' is smaller than how_many or smaller than len(from)-from_start,
|
Fail if to is too small (smaller than how_many or smaller than
|
||||||
or if kind(from[from_start:from_start+how_many]) > kind(to), or if to has
|
len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
|
||||||
more than 1 reference.
|
kind(to), or if to has more than 1 reference.
|
||||||
|
|
||||||
Return the number of written character, or return -1 and raise an exception
|
Return the number of written character, or return -1 and raise an exception
|
||||||
on error.
|
on error.
|
||||||
|
@ -533,6 +533,8 @@ PyAPI_FUNC(int) _PyUnicode_Ready(
|
||||||
how_many = min(how_many, len(from) - from_start)
|
how_many = min(how_many, len(from) - from_start)
|
||||||
to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
|
to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
|
||||||
return how_many
|
return how_many
|
||||||
|
|
||||||
|
Note: The function doesn't write a terminating null character.
|
||||||
*/
|
*/
|
||||||
#ifndef Py_LIMITED_API
|
#ifndef Py_LIMITED_API
|
||||||
PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
|
PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
|
||||||
|
|
|
@ -615,8 +615,8 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
||||||
PyObject *from, Py_ssize_t from_start,
|
PyObject *from, Py_ssize_t from_start,
|
||||||
Py_ssize_t how_many)
|
Py_ssize_t how_many)
|
||||||
{
|
{
|
||||||
unsigned int from_kind;
|
unsigned int from_kind, to_kind;
|
||||||
unsigned int to_kind;
|
void *from_data, *to_data;
|
||||||
|
|
||||||
assert(PyUnicode_Check(from));
|
assert(PyUnicode_Check(from));
|
||||||
assert(PyUnicode_Check(to));
|
assert(PyUnicode_Check(to));
|
||||||
|
@ -645,44 +645,20 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
||||||
_PyUnicode_DIRTY(to);
|
_PyUnicode_DIRTY(to);
|
||||||
|
|
||||||
from_kind = PyUnicode_KIND(from);
|
from_kind = PyUnicode_KIND(from);
|
||||||
|
from_data = PyUnicode_DATA(from);
|
||||||
to_kind = PyUnicode_KIND(to);
|
to_kind = PyUnicode_KIND(to);
|
||||||
|
to_data = PyUnicode_DATA(to);
|
||||||
|
|
||||||
if (from_kind == to_kind) {
|
if (from_kind == to_kind) {
|
||||||
/* fast path */
|
/* fast path */
|
||||||
Py_MEMCPY((char*)PyUnicode_DATA(to)
|
Py_MEMCPY((char*)to_data
|
||||||
+ PyUnicode_KIND_SIZE(to_kind, to_start),
|
+ PyUnicode_KIND_SIZE(to_kind, to_start),
|
||||||
(char*)PyUnicode_DATA(from)
|
(char*)from_data
|
||||||
+ PyUnicode_KIND_SIZE(from_kind, from_start),
|
+ PyUnicode_KIND_SIZE(from_kind, from_start),
|
||||||
PyUnicode_KIND_SIZE(to_kind, how_many));
|
PyUnicode_KIND_SIZE(to_kind, how_many));
|
||||||
return how_many;
|
|
||||||
}
|
}
|
||||||
|
else if (from_kind == PyUnicode_1BYTE_KIND
|
||||||
if (from_kind > to_kind) {
|
&& to_kind == PyUnicode_2BYTE_KIND)
|
||||||
/* slow path to check for character overflow */
|
|
||||||
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
|
|
||||||
void *from_data = PyUnicode_DATA(from);
|
|
||||||
void *to_data = PyUnicode_DATA(to);
|
|
||||||
Py_UCS4 ch, maxchar;
|
|
||||||
Py_ssize_t i;
|
|
||||||
int overflow;
|
|
||||||
|
|
||||||
maxchar = 0;
|
|
||||||
overflow = 0;
|
|
||||||
for (i=0; i < how_many; i++) {
|
|
||||||
ch = PyUnicode_READ(from_kind, from_data, from_start + i);
|
|
||||||
if (ch > maxchar) {
|
|
||||||
maxchar = ch;
|
|
||||||
if (maxchar > to_maxchar) {
|
|
||||||
overflow = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
|
|
||||||
}
|
|
||||||
if (!overflow)
|
|
||||||
return how_many;
|
|
||||||
}
|
|
||||||
else if (from_kind == PyUnicode_1BYTE_KIND && to_kind == PyUnicode_2BYTE_KIND)
|
|
||||||
{
|
{
|
||||||
_PyUnicode_CONVERT_BYTES(
|
_PyUnicode_CONVERT_BYTES(
|
||||||
Py_UCS1, Py_UCS2,
|
Py_UCS1, Py_UCS2,
|
||||||
|
@ -690,7 +666,6 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
||||||
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
|
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
|
||||||
PyUnicode_2BYTE_DATA(to) + to_start
|
PyUnicode_2BYTE_DATA(to) + to_start
|
||||||
);
|
);
|
||||||
return how_many;
|
|
||||||
}
|
}
|
||||||
else if (from_kind == PyUnicode_1BYTE_KIND
|
else if (from_kind == PyUnicode_1BYTE_KIND
|
||||||
&& to_kind == PyUnicode_4BYTE_KIND)
|
&& to_kind == PyUnicode_4BYTE_KIND)
|
||||||
|
@ -701,7 +676,6 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
||||||
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
|
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
|
||||||
PyUnicode_4BYTE_DATA(to) + to_start
|
PyUnicode_4BYTE_DATA(to) + to_start
|
||||||
);
|
);
|
||||||
return how_many;
|
|
||||||
}
|
}
|
||||||
else if (from_kind == PyUnicode_2BYTE_KIND
|
else if (from_kind == PyUnicode_2BYTE_KIND
|
||||||
&& to_kind == PyUnicode_4BYTE_KIND)
|
&& to_kind == PyUnicode_4BYTE_KIND)
|
||||||
|
@ -712,14 +686,41 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
||||||
PyUnicode_2BYTE_DATA(from) + from_start + how_many,
|
PyUnicode_2BYTE_DATA(from) + from_start + how_many,
|
||||||
PyUnicode_4BYTE_DATA(to) + to_start
|
PyUnicode_4BYTE_DATA(to) + to_start
|
||||||
);
|
);
|
||||||
return how_many;
|
|
||||||
}
|
}
|
||||||
PyErr_Format(PyExc_ValueError,
|
else {
|
||||||
"Cannot copy UCS%u characters "
|
int invalid_kinds;
|
||||||
"into a string of UCS%u characters",
|
if (from_kind > to_kind) {
|
||||||
1 << (from_kind - 1),
|
/* slow path to check for character overflow */
|
||||||
1 << (to_kind -1));
|
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
|
||||||
return -1;
|
Py_UCS4 ch, maxchar;
|
||||||
|
Py_ssize_t i;
|
||||||
|
|
||||||
|
maxchar = 0;
|
||||||
|
invalid_kinds = 0;
|
||||||
|
for (i=0; i < how_many; i++) {
|
||||||
|
ch = PyUnicode_READ(from_kind, from_data, from_start + i);
|
||||||
|
if (ch > maxchar) {
|
||||||
|
maxchar = ch;
|
||||||
|
if (maxchar > to_maxchar) {
|
||||||
|
invalid_kinds = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
invalid_kinds = 1;
|
||||||
|
if (invalid_kinds) {
|
||||||
|
PyErr_Format(PyExc_ValueError,
|
||||||
|
"Cannot copy UCS%u characters "
|
||||||
|
"into a string of UCS%u characters",
|
||||||
|
1 << (from_kind - 1),
|
||||||
|
1 << (to_kind -1));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return how_many;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Find the maximum code point and count the number of surrogate pairs so a
|
/* Find the maximum code point and count the number of surrogate pairs so a
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue