mirror of
https://github.com/python/cpython.git
synced 2025-09-27 18:59:43 +00:00
Don't check for the maximum character when copying from unicodeobject.c
* Create copy_characters() function which doesn't check for the maximum character in release mode * _PyUnicode_CheckConsistency() is no more static to be able to use it in _PyUnicode_FormatAdvanced() (in formatter_unicode.c) * _PyUnicode_CheckConsistency() checks the string hash
This commit is contained in:
parent
05d1189566
commit
fb9ea8c57e
3 changed files with 198 additions and 193 deletions
|
@ -2030,6 +2030,13 @@ PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
|
||||||
);
|
);
|
||||||
#endif /* Py_LIMITED_API */
|
#endif /* Py_LIMITED_API */
|
||||||
|
|
||||||
|
#if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
|
||||||
|
/* FIXME: use PyObject* type for op */
|
||||||
|
PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
|
||||||
|
void *op,
|
||||||
|
int check_content);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -239,6 +239,11 @@ const unsigned char _Py_ascii_whitespace[] = {
|
||||||
/* forward */
|
/* forward */
|
||||||
static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length);
|
static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length);
|
||||||
static PyObject* get_latin1_char(unsigned char ch);
|
static PyObject* get_latin1_char(unsigned char ch);
|
||||||
|
static void copy_characters(
|
||||||
|
PyObject *to, Py_ssize_t to_start,
|
||||||
|
PyObject *from, Py_ssize_t from_start,
|
||||||
|
Py_ssize_t how_many);
|
||||||
|
static int unicode_is_singleton(PyObject *unicode);
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
unicode_encode_call_errorhandler(const char *errors,
|
unicode_encode_call_errorhandler(const char *errors,
|
||||||
|
@ -296,7 +301,7 @@ PyUnicode_GetMax(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef Py_DEBUG
|
#ifdef Py_DEBUG
|
||||||
static int
|
int
|
||||||
/* FIXME: use PyObject* type for op */
|
/* FIXME: use PyObject* type for op */
|
||||||
_PyUnicode_CheckConsistency(void *op, int check_content)
|
_PyUnicode_CheckConsistency(void *op, int check_content)
|
||||||
{
|
{
|
||||||
|
@ -395,6 +400,8 @@ _PyUnicode_CheckConsistency(void *op, int check_content)
|
||||||
else
|
else
|
||||||
assert(maxchar >= 0x10000);
|
assert(maxchar >= 0x10000);
|
||||||
}
|
}
|
||||||
|
if (check_content && !unicode_is_singleton((PyObject*)ascii))
|
||||||
|
assert(ascii->hash == -1);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -601,13 +608,7 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode));
|
copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode));
|
||||||
if (PyUnicode_CopyCharacters(copy, 0,
|
copy_characters(copy, 0, unicode, 0, copy_length);
|
||||||
unicode, 0,
|
|
||||||
copy_length) < 0)
|
|
||||||
{
|
|
||||||
Py_DECREF(copy);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
return copy;
|
return copy;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -953,47 +954,55 @@ _PyUnicode_Dirty(PyObject *unicode)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_ssize_t
|
static int
|
||||||
PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
_copy_characters(PyObject *to, Py_ssize_t to_start,
|
||||||
PyObject *from, Py_ssize_t from_start,
|
PyObject *from, Py_ssize_t from_start,
|
||||||
Py_ssize_t how_many)
|
Py_ssize_t how_many, int check_maxchar)
|
||||||
{
|
{
|
||||||
unsigned int from_kind, to_kind;
|
unsigned int from_kind, to_kind;
|
||||||
void *from_data, *to_data;
|
void *from_data, *to_data;
|
||||||
|
int fast;
|
||||||
|
|
||||||
if (!PyUnicode_Check(from) || !PyUnicode_Check(to)) {
|
assert(PyUnicode_Check(from));
|
||||||
PyErr_BadInternalCall();
|
assert(PyUnicode_Check(to));
|
||||||
return -1;
|
assert(PyUnicode_IS_READY(from));
|
||||||
}
|
assert(PyUnicode_IS_READY(to));
|
||||||
|
|
||||||
if (PyUnicode_READY(from))
|
assert(PyUnicode_GET_LENGTH(from) >= how_many);
|
||||||
return -1;
|
assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
|
||||||
if (PyUnicode_READY(to))
|
assert(0 <= how_many);
|
||||||
return -1;
|
|
||||||
|
|
||||||
how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many);
|
|
||||||
if (to_start + how_many > PyUnicode_GET_LENGTH(to)) {
|
|
||||||
PyErr_Format(PyExc_SystemError,
|
|
||||||
"Cannot write %zi characters at %zi "
|
|
||||||
"in a string of %zi characters",
|
|
||||||
how_many, to_start, PyUnicode_GET_LENGTH(to));
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (how_many == 0)
|
if (how_many == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (_PyUnicode_Dirty(to))
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
from_kind = PyUnicode_KIND(from);
|
from_kind = PyUnicode_KIND(from);
|
||||||
from_data = PyUnicode_DATA(from);
|
from_data = PyUnicode_DATA(from);
|
||||||
to_kind = PyUnicode_KIND(to);
|
to_kind = PyUnicode_KIND(to);
|
||||||
to_data = PyUnicode_DATA(to);
|
to_data = PyUnicode_DATA(to);
|
||||||
|
|
||||||
if (from_kind == to_kind
|
#ifdef Py_DEBUG
|
||||||
/* deny latin1 => ascii */
|
if (!check_maxchar
|
||||||
&& !(!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to)))
|
&& (from_kind > to_kind
|
||||||
|
|| (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to))))
|
||||||
{
|
{
|
||||||
|
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
|
||||||
|
Py_UCS4 ch;
|
||||||
|
Py_ssize_t i;
|
||||||
|
for (i=0; i < how_many; i++) {
|
||||||
|
ch = PyUnicode_READ(from_kind, from_data, from_start + i);
|
||||||
|
assert(ch <= to_maxchar);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
fast = (from_kind == to_kind);
|
||||||
|
if (check_maxchar
|
||||||
|
&& (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to)))
|
||||||
|
{
|
||||||
|
/* deny latin1 => ascii */
|
||||||
|
fast = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fast) {
|
||||||
Py_MEMCPY((char*)to_data
|
Py_MEMCPY((char*)to_data
|
||||||
+ PyUnicode_KIND_SIZE(to_kind, to_start),
|
+ PyUnicode_KIND_SIZE(to_kind, to_start),
|
||||||
(char*)from_data
|
(char*)from_data
|
||||||
|
@ -1031,8 +1040,6 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
int invalid_kinds;
|
|
||||||
|
|
||||||
/* check if max_char(from substring) <= max_char(to) */
|
/* check if max_char(from substring) <= max_char(to) */
|
||||||
if (from_kind > to_kind
|
if (from_kind > to_kind
|
||||||
/* latin1 => ascii */
|
/* latin1 => ascii */
|
||||||
|
@ -1040,34 +1047,77 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
||||||
{
|
{
|
||||||
/* slow path to check for character overflow */
|
/* slow path to check for character overflow */
|
||||||
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
|
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
|
||||||
Py_UCS4 ch, maxchar;
|
Py_UCS4 ch;
|
||||||
Py_ssize_t i;
|
Py_ssize_t i;
|
||||||
|
|
||||||
maxchar = 0;
|
|
||||||
invalid_kinds = 0;
|
|
||||||
for (i=0; i < how_many; i++) {
|
for (i=0; i < how_many; i++) {
|
||||||
ch = PyUnicode_READ(from_kind, from_data, from_start + i);
|
ch = PyUnicode_READ(from_kind, from_data, from_start + i);
|
||||||
if (ch > maxchar) {
|
if (check_maxchar) {
|
||||||
maxchar = ch;
|
if (ch > to_maxchar)
|
||||||
if (maxchar > to_maxchar) {
|
return 1;
|
||||||
invalid_kinds = 1;
|
}
|
||||||
break;
|
else {
|
||||||
}
|
assert(ch <= to_maxchar);
|
||||||
}
|
}
|
||||||
PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
|
PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
invalid_kinds = 1;
|
|
||||||
if (invalid_kinds) {
|
|
||||||
PyErr_Format(PyExc_SystemError,
|
|
||||||
"Cannot copy %s characters "
|
|
||||||
"into a string of %s characters",
|
|
||||||
unicode_kind_name(from),
|
|
||||||
unicode_kind_name(to));
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
copy_characters(PyObject *to, Py_ssize_t to_start,
|
||||||
|
PyObject *from, Py_ssize_t from_start,
|
||||||
|
Py_ssize_t how_many)
|
||||||
|
{
|
||||||
|
(void)_copy_characters(to, to_start, from, from_start, how_many, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_ssize_t
|
||||||
|
PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
|
||||||
|
PyObject *from, Py_ssize_t from_start,
|
||||||
|
Py_ssize_t how_many)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (!PyUnicode_Check(from) || !PyUnicode_Check(to)) {
|
||||||
|
PyErr_BadInternalCall();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PyUnicode_READY(from))
|
||||||
|
return -1;
|
||||||
|
if (PyUnicode_READY(to))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many);
|
||||||
|
if (to_start + how_many > PyUnicode_GET_LENGTH(to)) {
|
||||||
|
PyErr_Format(PyExc_SystemError,
|
||||||
|
"Cannot write %zi characters at %zi "
|
||||||
|
"in a string of %zi characters",
|
||||||
|
how_many, to_start, PyUnicode_GET_LENGTH(to));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (how_many == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (_PyUnicode_Dirty(to))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
err = _copy_characters(to, to_start, from, from_start, how_many, 1);
|
||||||
|
if (err) {
|
||||||
|
PyErr_Format(PyExc_SystemError,
|
||||||
|
"Cannot copy %s characters "
|
||||||
|
"into a string of %s characters",
|
||||||
|
unicode_kind_name(from),
|
||||||
|
unicode_kind_name(to));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
return how_many;
|
return how_many;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1327,6 +1377,23 @@ unicode_dealloc(register PyUnicodeObject *unicode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef Py_DEBUG
|
||||||
|
static int
|
||||||
|
unicode_is_singleton(PyObject *unicode)
|
||||||
|
{
|
||||||
|
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
|
||||||
|
if (unicode == unicode_empty)
|
||||||
|
return 1;
|
||||||
|
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
|
||||||
|
{
|
||||||
|
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
|
||||||
|
if (ch < 256 && unicode_latin1[ch] == unicode)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
unicode_resizable(PyObject *unicode)
|
unicode_resizable(PyObject *unicode)
|
||||||
{
|
{
|
||||||
|
@ -1334,15 +1401,9 @@ unicode_resizable(PyObject *unicode)
|
||||||
return 0;
|
return 0;
|
||||||
if (PyUnicode_CHECK_INTERNED(unicode))
|
if (PyUnicode_CHECK_INTERNED(unicode))
|
||||||
return 0;
|
return 0;
|
||||||
assert(unicode != unicode_empty);
|
|
||||||
#ifdef Py_DEBUG
|
#ifdef Py_DEBUG
|
||||||
if (_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND
|
/* singleton refcount is greater than 1 */
|
||||||
&& PyUnicode_GET_LENGTH(unicode) == 1)
|
assert(!unicode_is_singleton(unicode));
|
||||||
{
|
|
||||||
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
|
|
||||||
if (ch < 256 && unicode_latin1[ch] == unicode)
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -1971,7 +2032,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
int precision = 0;
|
int precision = 0;
|
||||||
int zeropad;
|
int zeropad;
|
||||||
const char* f;
|
const char* f;
|
||||||
PyUnicodeObject *string;
|
PyObject *string;
|
||||||
/* used by sprintf */
|
/* used by sprintf */
|
||||||
char fmt[61]; /* should be enough for %0width.precisionlld */
|
char fmt[61]; /* should be enough for %0width.precisionlld */
|
||||||
Py_UCS4 maxchar = 127; /* result is ASCII by default */
|
Py_UCS4 maxchar = 127; /* result is ASCII by default */
|
||||||
|
@ -2270,7 +2331,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
/* Since we've analyzed how much space we need,
|
/* Since we've analyzed how much space we need,
|
||||||
we don't have to resize the string.
|
we don't have to resize the string.
|
||||||
There can be no errors beyond this point. */
|
There can be no errors beyond this point. */
|
||||||
string = (PyUnicodeObject *)PyUnicode_New(n, maxchar);
|
string = PyUnicode_New(n, maxchar);
|
||||||
if (!string)
|
if (!string)
|
||||||
goto fail;
|
goto fail;
|
||||||
kind = PyUnicode_KIND(string);
|
kind = PyUnicode_KIND(string);
|
||||||
|
@ -2321,10 +2382,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
(void) va_arg(vargs, char *);
|
(void) va_arg(vargs, char *);
|
||||||
size = PyUnicode_GET_LENGTH(*callresult);
|
size = PyUnicode_GET_LENGTH(*callresult);
|
||||||
assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
|
assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
|
||||||
if (PyUnicode_CopyCharacters((PyObject*)string, i,
|
copy_characters(string, i, *callresult, 0, size);
|
||||||
*callresult, 0,
|
|
||||||
size) < 0)
|
|
||||||
goto fail;
|
|
||||||
i += size;
|
i += size;
|
||||||
/* We're done with the unicode()/repr() => forget it */
|
/* We're done with the unicode()/repr() => forget it */
|
||||||
Py_DECREF(*callresult);
|
Py_DECREF(*callresult);
|
||||||
|
@ -2338,10 +2396,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
Py_ssize_t size;
|
Py_ssize_t size;
|
||||||
assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
|
assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
|
||||||
size = PyUnicode_GET_LENGTH(obj);
|
size = PyUnicode_GET_LENGTH(obj);
|
||||||
if (PyUnicode_CopyCharacters((PyObject*)string, i,
|
copy_characters(string, i, obj, 0, size);
|
||||||
obj, 0,
|
|
||||||
size) < 0)
|
|
||||||
goto fail;
|
|
||||||
i += size;
|
i += size;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -2353,19 +2408,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
if (obj) {
|
if (obj) {
|
||||||
size = PyUnicode_GET_LENGTH(obj);
|
size = PyUnicode_GET_LENGTH(obj);
|
||||||
assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
|
assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
|
||||||
if (PyUnicode_CopyCharacters((PyObject*)string, i,
|
copy_characters(string, i, obj, 0, size);
|
||||||
obj, 0,
|
|
||||||
size) < 0)
|
|
||||||
goto fail;
|
|
||||||
i += size;
|
i += size;
|
||||||
} else {
|
} else {
|
||||||
size = PyUnicode_GET_LENGTH(*callresult);
|
size = PyUnicode_GET_LENGTH(*callresult);
|
||||||
assert(PyUnicode_KIND(*callresult) <=
|
assert(PyUnicode_KIND(*callresult) <=
|
||||||
PyUnicode_KIND(string));
|
PyUnicode_KIND(string));
|
||||||
if (PyUnicode_CopyCharacters((PyObject*)string, i,
|
copy_characters(string, i, *callresult, 0, size);
|
||||||
*callresult,
|
|
||||||
0, size) < 0)
|
|
||||||
goto fail;
|
|
||||||
i += size;
|
i += size;
|
||||||
Py_DECREF(*callresult);
|
Py_DECREF(*callresult);
|
||||||
}
|
}
|
||||||
|
@ -2376,14 +2425,12 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
case 'R':
|
case 'R':
|
||||||
case 'A':
|
case 'A':
|
||||||
{
|
{
|
||||||
|
Py_ssize_t size = PyUnicode_GET_LENGTH(*callresult);
|
||||||
/* unused, since we already have the result */
|
/* unused, since we already have the result */
|
||||||
(void) va_arg(vargs, PyObject *);
|
(void) va_arg(vargs, PyObject *);
|
||||||
assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
|
assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
|
||||||
if (PyUnicode_CopyCharacters((PyObject*)string, i,
|
copy_characters(string, i, *callresult, 0, size);
|
||||||
*callresult, 0,
|
i += size;
|
||||||
PyUnicode_GET_LENGTH(*callresult)) < 0)
|
|
||||||
goto fail;
|
|
||||||
i += PyUnicode_GET_LENGTH(*callresult);
|
|
||||||
/* We're done with the unicode()/repr() => forget it */
|
/* We're done with the unicode()/repr() => forget it */
|
||||||
Py_DECREF(*callresult);
|
Py_DECREF(*callresult);
|
||||||
/* switch to next unicode()/repr() result */
|
/* switch to next unicode()/repr() result */
|
||||||
|
@ -8795,24 +8842,12 @@ fixup(PyObject *self,
|
||||||
/* If the maxchar increased so that the kind changed, not all
|
/* If the maxchar increased so that the kind changed, not all
|
||||||
characters are representable anymore and we need to fix the
|
characters are representable anymore and we need to fix the
|
||||||
string again. This only happens in very few cases. */
|
string again. This only happens in very few cases. */
|
||||||
if (PyUnicode_CopyCharacters(v, 0,
|
copy_characters(v, 0, self, 0, PyUnicode_GET_LENGTH(self));
|
||||||
(PyObject*)self, 0,
|
|
||||||
PyUnicode_GET_LENGTH(self)) < 0)
|
|
||||||
{
|
|
||||||
Py_DECREF(u);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
maxchar_old = fixfct(v);
|
maxchar_old = fixfct(v);
|
||||||
assert(maxchar_old > 0 && maxchar_old <= maxchar_new);
|
assert(maxchar_old > 0 && maxchar_old <= maxchar_new);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (PyUnicode_CopyCharacters(v, 0,
|
copy_characters(v, 0, u, 0, PyUnicode_GET_LENGTH(self));
|
||||||
u, 0,
|
|
||||||
PyUnicode_GET_LENGTH(self)) < 0)
|
|
||||||
{
|
|
||||||
Py_DECREF(u);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_DECREF(u);
|
Py_DECREF(u);
|
||||||
|
@ -9016,7 +9051,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
PyObject **items;
|
PyObject **items;
|
||||||
PyObject *item;
|
PyObject *item;
|
||||||
Py_ssize_t sz, i, res_offset;
|
Py_ssize_t sz, i, res_offset;
|
||||||
Py_UCS4 maxchar = 0;
|
Py_UCS4 maxchar;
|
||||||
Py_UCS4 item_maxchar;
|
Py_UCS4 item_maxchar;
|
||||||
|
|
||||||
fseq = PySequence_Fast(seq, "");
|
fseq = PySequence_Fast(seq, "");
|
||||||
|
@ -9031,44 +9066,45 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
seqlen = PySequence_Fast_GET_SIZE(fseq);
|
seqlen = PySequence_Fast_GET_SIZE(fseq);
|
||||||
/* If empty sequence, return u"". */
|
/* If empty sequence, return u"". */
|
||||||
if (seqlen == 0) {
|
if (seqlen == 0) {
|
||||||
res = PyUnicode_New(0, 0);
|
Py_DECREF(fseq);
|
||||||
goto Done;
|
Py_INCREF(unicode_empty);
|
||||||
|
res = unicode_empty;
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
items = PySequence_Fast_ITEMS(fseq);
|
|
||||||
/* If singleton sequence with an exact Unicode, return that. */
|
/* If singleton sequence with an exact Unicode, return that. */
|
||||||
if (seqlen == 1) {
|
items = PySequence_Fast_ITEMS(fseq);
|
||||||
item = items[0];
|
if (seqlen == 1 && PyUnicode_CheckExact(items[0])) {
|
||||||
if (PyUnicode_CheckExact(item)) {
|
res = items[0];
|
||||||
Py_INCREF(item);
|
Py_INCREF(res);
|
||||||
res = item;
|
Py_DECREF(fseq);
|
||||||
goto Done;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Set up sep and seplen */
|
||||||
|
if (separator == NULL) {
|
||||||
|
/* fall back to a blank space separator */
|
||||||
|
sep = PyUnicode_FromOrdinal(' ');
|
||||||
|
if (!sep)
|
||||||
|
goto onError;
|
||||||
|
maxchar = 32;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* Set up sep and seplen */
|
if (!PyUnicode_Check(separator)) {
|
||||||
if (separator == NULL) {
|
PyErr_Format(PyExc_TypeError,
|
||||||
/* fall back to a blank space separator */
|
"separator: expected str instance,"
|
||||||
sep = PyUnicode_FromOrdinal(' ');
|
" %.80s found",
|
||||||
if (!sep)
|
Py_TYPE(separator)->tp_name);
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (!PyUnicode_Check(separator)) {
|
|
||||||
PyErr_Format(PyExc_TypeError,
|
|
||||||
"separator: expected str instance,"
|
|
||||||
" %.80s found",
|
|
||||||
Py_TYPE(separator)->tp_name);
|
|
||||||
goto onError;
|
|
||||||
}
|
|
||||||
if (PyUnicode_READY(separator))
|
|
||||||
goto onError;
|
|
||||||
sep = separator;
|
|
||||||
seplen = PyUnicode_GET_LENGTH(separator);
|
|
||||||
maxchar = PyUnicode_MAX_CHAR_VALUE(separator);
|
|
||||||
/* inc refcount to keep this code path symmetric with the
|
|
||||||
above case of a blank separator */
|
|
||||||
Py_INCREF(sep);
|
|
||||||
}
|
}
|
||||||
|
if (PyUnicode_READY(separator))
|
||||||
|
goto onError;
|
||||||
|
sep = separator;
|
||||||
|
seplen = PyUnicode_GET_LENGTH(separator);
|
||||||
|
maxchar = PyUnicode_MAX_CHAR_VALUE(separator);
|
||||||
|
/* inc refcount to keep this code path symmetric with the
|
||||||
|
above case of a blank separator */
|
||||||
|
Py_INCREF(sep);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* There are at least two things to join, or else we have a subclass
|
/* There are at least two things to join, or else we have a subclass
|
||||||
|
@ -9108,36 +9144,21 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
|
|
||||||
/* Catenate everything. */
|
/* Catenate everything. */
|
||||||
for (i = 0, res_offset = 0; i < seqlen; ++i) {
|
for (i = 0, res_offset = 0; i < seqlen; ++i) {
|
||||||
Py_ssize_t itemlen, copied;
|
Py_ssize_t itemlen;
|
||||||
item = items[i];
|
item = items[i];
|
||||||
/* Copy item, and maybe the separator. */
|
/* Copy item, and maybe the separator. */
|
||||||
if (i && seplen != 0) {
|
if (i && seplen != 0) {
|
||||||
copied = PyUnicode_CopyCharacters(res, res_offset,
|
copy_characters(res, res_offset, sep, 0, seplen);
|
||||||
sep, 0, seplen);
|
|
||||||
if (copied < 0)
|
|
||||||
goto onError;
|
|
||||||
#ifdef Py_DEBUG
|
|
||||||
res_offset += copied;
|
|
||||||
#else
|
|
||||||
res_offset += seplen;
|
res_offset += seplen;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
itemlen = PyUnicode_GET_LENGTH(item);
|
itemlen = PyUnicode_GET_LENGTH(item);
|
||||||
if (itemlen != 0) {
|
if (itemlen != 0) {
|
||||||
copied = PyUnicode_CopyCharacters(res, res_offset,
|
copy_characters(res, res_offset, item, 0, itemlen);
|
||||||
item, 0, itemlen);
|
|
||||||
if (copied < 0)
|
|
||||||
goto onError;
|
|
||||||
#ifdef Py_DEBUG
|
|
||||||
res_offset += copied;
|
|
||||||
#else
|
|
||||||
res_offset += itemlen;
|
res_offset += itemlen;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert(res_offset == PyUnicode_GET_LENGTH(res));
|
assert(res_offset == PyUnicode_GET_LENGTH(res));
|
||||||
|
|
||||||
Done:
|
|
||||||
Py_DECREF(fseq);
|
Py_DECREF(fseq);
|
||||||
Py_XDECREF(sep);
|
Py_XDECREF(sep);
|
||||||
assert(_PyUnicode_CheckConsistency(res, 1));
|
assert(_PyUnicode_CheckConsistency(res, 1));
|
||||||
|
@ -9212,14 +9233,7 @@ pad(PyObject *self,
|
||||||
FILL(kind, data, fill, 0, left);
|
FILL(kind, data, fill, 0, left);
|
||||||
if (right)
|
if (right)
|
||||||
FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
|
FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
|
||||||
if (PyUnicode_CopyCharacters(u, left,
|
copy_characters(u, left, self, 0, _PyUnicode_LENGTH(self));
|
||||||
(PyObject*)self, 0,
|
|
||||||
_PyUnicode_LENGTH(self)) < 0)
|
|
||||||
{
|
|
||||||
Py_DECREF(u);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(_PyUnicode_CheckConsistency(u, 1));
|
assert(_PyUnicode_CheckConsistency(u, 1));
|
||||||
return u;
|
return u;
|
||||||
}
|
}
|
||||||
|
@ -9536,12 +9550,7 @@ replace(PyObject *self, PyObject *str1,
|
||||||
u = PyUnicode_New(slen, maxchar);
|
u = PyUnicode_New(slen, maxchar);
|
||||||
if (!u)
|
if (!u)
|
||||||
goto error;
|
goto error;
|
||||||
if (PyUnicode_CopyCharacters(u, 0,
|
copy_characters(u, 0, self, 0, slen);
|
||||||
(PyObject*)self, 0, slen) < 0)
|
|
||||||
{
|
|
||||||
Py_DECREF(u);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
rkind = PyUnicode_KIND(u);
|
rkind = PyUnicode_KIND(u);
|
||||||
for (i = 0; i < PyUnicode_GET_LENGTH(u); i++)
|
for (i = 0; i < PyUnicode_GET_LENGTH(u); i++)
|
||||||
if (PyUnicode_READ(rkind, PyUnicode_DATA(u), i) == u1) {
|
if (PyUnicode_READ(rkind, PyUnicode_DATA(u), i) == u1) {
|
||||||
|
@ -10160,12 +10169,8 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
|
||||||
maxchar);
|
maxchar);
|
||||||
if (w == NULL)
|
if (w == NULL)
|
||||||
goto onError;
|
goto onError;
|
||||||
if (PyUnicode_CopyCharacters(w, 0, u, 0, PyUnicode_GET_LENGTH(u)) < 0)
|
copy_characters(w, 0, u, 0, PyUnicode_GET_LENGTH(u));
|
||||||
goto onError;
|
copy_characters(w, PyUnicode_GET_LENGTH(u), v, 0, PyUnicode_GET_LENGTH(v));
|
||||||
if (PyUnicode_CopyCharacters(w, PyUnicode_GET_LENGTH(u),
|
|
||||||
v, 0,
|
|
||||||
PyUnicode_GET_LENGTH(v)) < 0)
|
|
||||||
goto onError;
|
|
||||||
Py_DECREF(u);
|
Py_DECREF(u);
|
||||||
Py_DECREF(v);
|
Py_DECREF(v);
|
||||||
assert(_PyUnicode_CheckConsistency(w, 1));
|
assert(_PyUnicode_CheckConsistency(w, 1));
|
||||||
|
@ -10181,9 +10186,6 @@ static void
|
||||||
unicode_append_inplace(PyObject **p_left, PyObject *right)
|
unicode_append_inplace(PyObject **p_left, PyObject *right)
|
||||||
{
|
{
|
||||||
Py_ssize_t left_len, right_len, new_len;
|
Py_ssize_t left_len, right_len, new_len;
|
||||||
#ifdef Py_DEBUG
|
|
||||||
Py_ssize_t copied;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
assert(PyUnicode_IS_READY(*p_left));
|
assert(PyUnicode_IS_READY(*p_left));
|
||||||
assert(PyUnicode_IS_READY(right));
|
assert(PyUnicode_IS_READY(right));
|
||||||
|
@ -10210,14 +10212,8 @@ unicode_append_inplace(PyObject **p_left, PyObject *right)
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
/* copy 'right' into the newly allocated area of 'left' */
|
/* copy 'right' into the newly allocated area of 'left' */
|
||||||
#ifdef Py_DEBUG
|
copy_characters(*p_left, left_len, right, 0, right_len);
|
||||||
copied = PyUnicode_CopyCharacters(*p_left, left_len,
|
_PyUnicode_DIRTY(*p_left);
|
||||||
right, 0,
|
|
||||||
right_len);
|
|
||||||
assert(0 <= copied);
|
|
||||||
#else
|
|
||||||
PyUnicode_CopyCharacters(*p_left, left_len, right, 0, right_len);
|
|
||||||
#endif
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
|
@ -10270,7 +10266,6 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
|
||||||
if (res == NULL)
|
if (res == NULL)
|
||||||
goto error;
|
goto error;
|
||||||
Py_DECREF(left);
|
Py_DECREF(left);
|
||||||
assert(_PyUnicode_CheckConsistency(res, 1));
|
|
||||||
*p_left = res;
|
*p_left = res;
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -12332,8 +12327,6 @@ unicode__format__(PyObject* self, PyObject* args)
|
||||||
|
|
||||||
out = _PyUnicode_FormatAdvanced(self, format_spec, 0,
|
out = _PyUnicode_FormatAdvanced(self, format_spec, 0,
|
||||||
PyUnicode_GET_LENGTH(format_spec));
|
PyUnicode_GET_LENGTH(format_spec));
|
||||||
if (out != NULL)
|
|
||||||
assert(_PyUnicode_CheckConsistency(out, 1));
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13174,7 +13167,11 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
length = PyUnicode_GET_LENGTH(unicode);
|
length = PyUnicode_GET_LENGTH(unicode);
|
||||||
|
|
||||||
_PyUnicode_LENGTH(self) = length;
|
_PyUnicode_LENGTH(self) = length;
|
||||||
|
#ifdef Py_DEBUG
|
||||||
|
_PyUnicode_HASH(self) = -1;
|
||||||
|
#else
|
||||||
_PyUnicode_HASH(self) = _PyUnicode_HASH(unicode);
|
_PyUnicode_HASH(self) = _PyUnicode_HASH(unicode);
|
||||||
|
#endif
|
||||||
_PyUnicode_STATE(self).interned = 0;
|
_PyUnicode_STATE(self).interned = 0;
|
||||||
_PyUnicode_STATE(self).kind = kind;
|
_PyUnicode_STATE(self).kind = kind;
|
||||||
_PyUnicode_STATE(self).compact = 0;
|
_PyUnicode_STATE(self).compact = 0;
|
||||||
|
@ -13230,6 +13227,9 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
PyUnicode_KIND_SIZE(kind, length + 1));
|
PyUnicode_KIND_SIZE(kind, length + 1));
|
||||||
Py_DECREF(unicode);
|
Py_DECREF(unicode);
|
||||||
assert(_PyUnicode_CheckConsistency(self, 1));
|
assert(_PyUnicode_CheckConsistency(self, 1));
|
||||||
|
#ifdef Py_DEBUG
|
||||||
|
_PyUnicode_HASH(self) = _PyUnicode_HASH(unicode);
|
||||||
|
#endif
|
||||||
return (PyObject *)self;
|
return (PyObject *)self;
|
||||||
|
|
||||||
onError:
|
onError:
|
||||||
|
|
|
@ -1284,33 +1284,31 @@ _PyUnicode_FormatAdvanced(PyObject *obj,
|
||||||
Py_ssize_t start, Py_ssize_t end)
|
Py_ssize_t start, Py_ssize_t end)
|
||||||
{
|
{
|
||||||
InternalFormatSpec format;
|
InternalFormatSpec format;
|
||||||
PyObject *result = NULL;
|
PyObject *result;
|
||||||
|
|
||||||
/* check for the special case of zero length format spec, make
|
/* check for the special case of zero length format spec, make
|
||||||
it equivalent to str(obj) */
|
it equivalent to str(obj) */
|
||||||
if (start == end) {
|
if (start == end)
|
||||||
result = PyObject_Str(obj);
|
return PyObject_Str(obj);
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* parse the format_spec */
|
/* parse the format_spec */
|
||||||
if (!parse_internal_render_format_spec(format_spec, start, end,
|
if (!parse_internal_render_format_spec(format_spec, start, end,
|
||||||
&format, 's', '<'))
|
&format, 's', '<'))
|
||||||
goto done;
|
return NULL;
|
||||||
|
|
||||||
/* type conversion? */
|
/* type conversion? */
|
||||||
switch (format.type) {
|
switch (format.type) {
|
||||||
case 's':
|
case 's':
|
||||||
/* no type conversion needed, already a string. do the formatting */
|
/* no type conversion needed, already a string. do the formatting */
|
||||||
result = format_string_internal(obj, &format);
|
result = format_string_internal(obj, &format);
|
||||||
|
if (result != NULL)
|
||||||
|
assert(_PyUnicode_CheckConsistency(result, 1));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
/* unknown */
|
/* unknown */
|
||||||
unknown_presentation_type(format.type, obj->ob_type->tp_name);
|
unknown_presentation_type(format.type, obj->ob_type->tp_name);
|
||||||
goto done;
|
result = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
done:
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue