mirror of
https://github.com/python/cpython.git
synced 2025-08-31 05:58:33 +00:00
PyUnicode_CopyCharacters() checks for buffer and character overflow
It now returns the number of written characters on success.
This commit is contained in:
parent
fb5f5f2420
commit
be78eaf2de
2 changed files with 91 additions and 84 deletions
|
@ -519,10 +519,22 @@ PyAPI_FUNC(int) _PyUnicode_Ready(
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Copy character from one unicode object into another, this function performs
|
/* Copy character from one unicode object into another, this function performs
|
||||||
character conversion when nessesary and falls back to memcpy if possible.
|
character conversion when necessary and falls back to memcpy if possible.
|
||||||
Return -1 and raise an exception on error, return 0 on success. */
|
|
||||||
|
Fail if 'to' is smaller than how_many or smaller than len(from)-from_start,
|
||||||
|
or if kind(from[from_start:from_start+how_many]) > kind(to).
|
||||||
|
|
||||||
|
Return the number of written character, or return -1 and raise an exception
|
||||||
|
on error.
|
||||||
|
|
||||||
|
Pseudo-code:
|
||||||
|
|
||||||
|
how_many = min(how_many, len(from) - from_start)
|
||||||
|
to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
|
||||||
|
return how_many
|
||||||
|
*/
|
||||||
#ifndef Py_LIMITED_API
|
#ifndef Py_LIMITED_API
|
||||||
PyAPI_FUNC(int) PyUnicode_CopyCharacters(
|
PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
|
||||||
PyObject *to,
|
PyObject *to,
|
||||||
Py_ssize_t to_start,
|
Py_ssize_t to_start,
|
||||||
PyObject *from,
|
PyObject *from,
|
||||||
|
|
|
@ -606,13 +606,13 @@ unicode_convert_wchar_to_ucs4(const wchar_t *begin, const wchar_t *end,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int
|
Py_ssize_t
|
||||||
PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
||||||
PyObject *from, Py_ssize_t from_start,
|
PyObject *from, Py_ssize_t from_start,
|
||||||
Py_ssize_t how_many)
|
Py_ssize_t how_many)
|
||||||
{
|
{
|
||||||
int from_kind;
|
unsigned int from_kind;
|
||||||
int to_kind;
|
unsigned int to_kind;
|
||||||
|
|
||||||
assert(PyUnicode_Check(from));
|
assert(PyUnicode_Check(from));
|
||||||
assert(PyUnicode_Check(to));
|
assert(PyUnicode_Check(to));
|
||||||
|
@ -622,94 +622,89 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
||||||
if (PyUnicode_READY(to))
|
if (PyUnicode_READY(to))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
how_many = PY_MIN(PyUnicode_GET_LENGTH(from), how_many);
|
||||||
|
if (to_start + how_many > PyUnicode_GET_LENGTH(to)) {
|
||||||
|
PyErr_Format(PyExc_ValueError,
|
||||||
|
"Cannot write %zi characters at %zi "
|
||||||
|
"in a string of %zi characters",
|
||||||
|
how_many, to_start, PyUnicode_GET_LENGTH(to));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
from_kind = PyUnicode_KIND(from);
|
from_kind = PyUnicode_KIND(from);
|
||||||
to_kind = PyUnicode_KIND(to);
|
to_kind = PyUnicode_KIND(to);
|
||||||
|
|
||||||
if (from_kind == to_kind) {
|
if (from_kind == to_kind) {
|
||||||
const Py_ssize_t char_size = PyUnicode_CHARACTER_SIZE(to);
|
/* fast path */
|
||||||
Py_MEMCPY(PyUnicode_1BYTE_DATA(to) + (to_start * char_size),
|
Py_MEMCPY((char*)PyUnicode_DATA(to)
|
||||||
PyUnicode_1BYTE_DATA(from) + (from_start * char_size),
|
+ PyUnicode_KIND_SIZE(to_kind, to_start),
|
||||||
how_many * char_size);
|
(char*)PyUnicode_DATA(from)
|
||||||
return 0;
|
+ PyUnicode_KIND_SIZE(from_kind, from_start),
|
||||||
|
PyUnicode_KIND_SIZE(to_kind, how_many));
|
||||||
|
return how_many;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (from_kind > to_kind) {
|
||||||
|
/* slow path to check for character overflow */
|
||||||
|
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
|
||||||
|
void *from_data = PyUnicode_DATA(from);
|
||||||
|
void *to_data = PyUnicode_DATA(to);
|
||||||
|
Py_UCS4 ch, maxchar;
|
||||||
|
Py_ssize_t i;
|
||||||
|
int overflow;
|
||||||
|
|
||||||
switch (from_kind) {
|
maxchar = 0;
|
||||||
case PyUnicode_1BYTE_KIND:
|
for (i=0; i < how_many; i++) {
|
||||||
switch (to_kind) {
|
ch = PyUnicode_READ(from_kind, from_data, from_start + i);
|
||||||
case PyUnicode_2BYTE_KIND:
|
if (ch > maxchar) {
|
||||||
_PyUnicode_CONVERT_BYTES(
|
maxchar = ch;
|
||||||
unsigned char, Py_UCS2,
|
if (maxchar > to_maxchar) {
|
||||||
PyUnicode_1BYTE_DATA(from) + from_start,
|
overflow = 1;
|
||||||
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
|
|
||||||
PyUnicode_2BYTE_DATA(to) + to_start
|
|
||||||
);
|
|
||||||
break;
|
break;
|
||||||
case PyUnicode_4BYTE_KIND:
|
}
|
||||||
_PyUnicode_CONVERT_BYTES(
|
|
||||||
unsigned char, Py_UCS4,
|
|
||||||
PyUnicode_1BYTE_DATA(from) + from_start,
|
|
||||||
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
|
|
||||||
PyUnicode_4BYTE_DATA(to) + to_start
|
|
||||||
);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
goto invalid_state;
|
|
||||||
}
|
}
|
||||||
break;
|
PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
|
||||||
case PyUnicode_2BYTE_KIND:
|
}
|
||||||
switch (to_kind) {
|
if (!overflow)
|
||||||
case PyUnicode_1BYTE_KIND:
|
return how_many;
|
||||||
_PyUnicode_CONVERT_BYTES(
|
}
|
||||||
Py_UCS2, unsigned char,
|
else if (from_kind == PyUnicode_1BYTE_KIND && to_kind == PyUnicode_2BYTE_KIND)
|
||||||
PyUnicode_2BYTE_DATA(from) + from_start,
|
{
|
||||||
PyUnicode_2BYTE_DATA(from) + from_start + how_many,
|
_PyUnicode_CONVERT_BYTES(
|
||||||
PyUnicode_1BYTE_DATA(to) + to_start
|
Py_UCS1, Py_UCS2,
|
||||||
);
|
PyUnicode_1BYTE_DATA(from) + from_start,
|
||||||
break;
|
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
|
||||||
case PyUnicode_4BYTE_KIND:
|
PyUnicode_2BYTE_DATA(to) + to_start
|
||||||
_PyUnicode_CONVERT_BYTES(
|
);
|
||||||
Py_UCS2, Py_UCS4,
|
return how_many;
|
||||||
PyUnicode_2BYTE_DATA(from) + from_start,
|
}
|
||||||
PyUnicode_2BYTE_DATA(from) + from_start + how_many,
|
else if (from_kind == PyUnicode_1BYTE_KIND
|
||||||
PyUnicode_4BYTE_DATA(to) + to_start
|
&& to_kind == PyUnicode_4BYTE_KIND)
|
||||||
);
|
{
|
||||||
break;
|
_PyUnicode_CONVERT_BYTES(
|
||||||
default:
|
Py_UCS1, Py_UCS4,
|
||||||
goto invalid_state;
|
PyUnicode_1BYTE_DATA(from) + from_start,
|
||||||
}
|
PyUnicode_1BYTE_DATA(from) + from_start + how_many,
|
||||||
break;
|
PyUnicode_4BYTE_DATA(to) + to_start
|
||||||
case PyUnicode_4BYTE_KIND:
|
);
|
||||||
switch (to_kind) {
|
return how_many;
|
||||||
case PyUnicode_1BYTE_KIND:
|
}
|
||||||
_PyUnicode_CONVERT_BYTES(
|
else if (from_kind == PyUnicode_2BYTE_KIND
|
||||||
Py_UCS4, unsigned char,
|
&& to_kind == PyUnicode_4BYTE_KIND)
|
||||||
PyUnicode_4BYTE_DATA(from) + from_start,
|
{
|
||||||
PyUnicode_4BYTE_DATA(from) + from_start + how_many,
|
_PyUnicode_CONVERT_BYTES(
|
||||||
PyUnicode_1BYTE_DATA(to) + to_start
|
Py_UCS2, Py_UCS4,
|
||||||
);
|
PyUnicode_2BYTE_DATA(from) + from_start,
|
||||||
break;
|
PyUnicode_2BYTE_DATA(from) + from_start + how_many,
|
||||||
case PyUnicode_2BYTE_KIND:
|
PyUnicode_4BYTE_DATA(to) + to_start
|
||||||
_PyUnicode_CONVERT_BYTES(
|
);
|
||||||
Py_UCS4, Py_UCS2,
|
return how_many;
|
||||||
PyUnicode_4BYTE_DATA(from) + from_start,
|
|
||||||
PyUnicode_4BYTE_DATA(from) + from_start + how_many,
|
|
||||||
PyUnicode_2BYTE_DATA(to) + to_start
|
|
||||||
);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
goto invalid_state;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
goto invalid_state;
|
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
|
|
||||||
invalid_state:
|
|
||||||
PyErr_Format(PyExc_ValueError,
|
PyErr_Format(PyExc_ValueError,
|
||||||
"Impossible kind state (from=%i, to=%i) "
|
"Cannot copy UCS%u characters "
|
||||||
"in PyUnicode_CopyCharacters",
|
"into a string of UCS%u characters",
|
||||||
from_kind, to_kind);
|
1 << (from_kind - 1),
|
||||||
|
1 << (to_kind -1));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue