Optimize _PyUnicode_FastCopyCharacters() when maxchar(from) > maxchar(to)

This commit is contained in:
Victor Stinner 2012-06-16 02:22:37 +02:00
parent f05e17ece9
commit c9d369f1bf
2 changed files with 79 additions and 57 deletions

View file

@ -1128,7 +1128,6 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
{ {
unsigned int from_kind, to_kind; unsigned int from_kind, to_kind;
void *from_data, *to_data; void *from_data, *to_data;
int fast;
assert(0 <= how_many); assert(0 <= how_many);
assert(0 <= from_start); assert(0 <= from_start);
@ -1137,41 +1136,40 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
assert(PyUnicode_IS_READY(from)); assert(PyUnicode_IS_READY(from));
assert(from_start + how_many <= PyUnicode_GET_LENGTH(from)); assert(from_start + how_many <= PyUnicode_GET_LENGTH(from));
if (how_many == 0)
return 0;
assert(PyUnicode_Check(to)); assert(PyUnicode_Check(to));
assert(PyUnicode_IS_READY(to)); assert(PyUnicode_IS_READY(to));
assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
if (how_many == 0)
return 0;
from_kind = PyUnicode_KIND(from); from_kind = PyUnicode_KIND(from);
from_data = PyUnicode_DATA(from); from_data = PyUnicode_DATA(from);
to_kind = PyUnicode_KIND(to); to_kind = PyUnicode_KIND(to);
to_data = PyUnicode_DATA(to); to_data = PyUnicode_DATA(to);
#ifdef Py_DEBUG if (from_kind == to_kind) {
if (!check_maxchar if (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to)) {
&& (from_kind > to_kind /* Writing Latin-1 characters into an ASCII string requires to
|| (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to)))) check that all written characters are pure ASCII */
{ #ifndef Py_DEBUG
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); if (check_maxchar) {
Py_UCS4 ch; Py_UCS4 max_char;
Py_ssize_t i; max_char = ucs1lib_find_max_char(from_data,
for (i=0; i < how_many; i++) { (char*)from_data + how_many);
ch = PyUnicode_READ(from_kind, from_data, from_start + i); if (max_char >= 128)
assert(ch <= to_maxchar); return -1;
} }
} #else
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
Py_UCS4 ch;
Py_ssize_t i;
for (i=0; i < how_many; i++) {
ch = PyUnicode_READ(from_kind, from_data, from_start + i);
assert(ch <= to_maxchar);
}
#endif #endif
fast = (from_kind == to_kind); }
if (check_maxchar
&& (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to)))
{
/* deny latin1 => ascii */
fast = 0;
}
if (fast) {
Py_MEMCPY((char*)to_data + to_kind * to_start, Py_MEMCPY((char*)to_data + to_kind * to_start,
(char*)from_data + from_kind * from_start, (char*)from_data + from_kind * from_start,
to_kind * how_many); to_kind * how_many);
@ -1207,42 +1205,62 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
); );
} }
else { else {
/* check if max_char(from substring) <= max_char(to) */ assert (PyUnicode_MAX_CHAR_VALUE(from) > PyUnicode_MAX_CHAR_VALUE(to));
if (from_kind > to_kind
/* latin1 => ascii */ #ifndef Py_DEBUG
|| (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to))) if (!check_maxchar) {
if (from_kind == PyUnicode_2BYTE_KIND
&& to_kind == PyUnicode_1BYTE_KIND)
{
_PyUnicode_CONVERT_BYTES(
Py_UCS2, Py_UCS1,
PyUnicode_2BYTE_DATA(from) + from_start,
PyUnicode_2BYTE_DATA(from) + from_start + how_many,
PyUnicode_1BYTE_DATA(to) + to_start
);
}
else if (from_kind == PyUnicode_4BYTE_KIND
&& to_kind == PyUnicode_1BYTE_KIND)
{
_PyUnicode_CONVERT_BYTES(
Py_UCS4, Py_UCS1,
PyUnicode_4BYTE_DATA(from) + from_start,
PyUnicode_4BYTE_DATA(from) + from_start + how_many,
PyUnicode_1BYTE_DATA(to) + to_start
);
}
else if (from_kind == PyUnicode_4BYTE_KIND
&& to_kind == PyUnicode_2BYTE_KIND)
{
_PyUnicode_CONVERT_BYTES(
Py_UCS4, Py_UCS2,
PyUnicode_4BYTE_DATA(from) + from_start,
PyUnicode_4BYTE_DATA(from) + from_start + how_many,
PyUnicode_2BYTE_DATA(to) + to_start
);
}
else {
assert(0);
return -1;
}
}
else
#endif
{ {
/* slow path to check for character overflow */
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
Py_UCS4 ch; Py_UCS4 ch;
Py_ssize_t i; Py_ssize_t i;
#ifdef Py_DEBUG
for (i=0; i < how_many; i++) { for (i=0; i < how_many; i++) {
ch = PyUnicode_READ(from_kind, from_data, from_start + i); ch = PyUnicode_READ(from_kind, from_data, from_start + i);
#ifndef Py_DEBUG
assert(ch <= to_maxchar); assert(ch <= to_maxchar);
#else
if (ch > to_maxchar)
return -1;
#endif
PyUnicode_WRITE(to_kind, to_data, to_start + i, ch); PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
} }
#else
if (!check_maxchar) {
for (i=0; i < how_many; i++) {
ch = PyUnicode_READ(from_kind, from_data, from_start + i);
PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
}
}
else {
for (i=0; i < how_many; i++) {
ch = PyUnicode_READ(from_kind, from_data, from_start + i);
if (ch > to_maxchar)
return 1;
PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
}
}
#endif
}
else {
assert(0 && "inconsistent state");
return 1;
} }
} }
return 0; return 0;
@ -13876,9 +13894,11 @@ PyUnicode_Format(PyObject *format, PyObject *args)
} }
} }
_PyUnicode_FastCopyCharacters(writer.buffer, writer.pos, if (len) {
temp, pindex, len); _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
writer.pos += len; temp, pindex, len);
writer.pos += len;
}
if (width > len) { if (width > len) {
sublen = width - len; sublen = width - len;
FILL(writer.kind, writer.data, ' ', writer.pos, sublen); FILL(writer.kind, writer.data, ' ', writer.pos, sublen);

View file

@ -786,8 +786,10 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format,
goto done; goto done;
/* Then the source string. */ /* Then the source string. */
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, if (len) {
value, 0, len); _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
value, 0, len);
}
writer->pos += (len + rpad); writer->pos += (len + rpad);
result = 0; result = 0;