Don't check for the maximum character when copying from unicodeobject.c

* Create copy_characters() function which doesn't check for the maximum
   character in release mode
 * _PyUnicode_CheckConsistency() is no more static to be able to use it
   in _PyUnicode_FormatAdvanced() (in formatter_unicode.c)
 * _PyUnicode_CheckConsistency() checks the string hash
This commit is contained in:
Victor Stinner 2011-10-06 01:45:57 +02:00
parent 05d1189566
commit fb9ea8c57e
3 changed files with 198 additions and 193 deletions

View file

@ -2030,6 +2030,13 @@ PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
); );
#endif /* Py_LIMITED_API */ #endif /* Py_LIMITED_API */
#if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
/* FIXME: use PyObject* type for op */
PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
void *op,
int check_content);
#endif
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -239,6 +239,11 @@ const unsigned char _Py_ascii_whitespace[] = {
/* forward */ /* forward */
static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length); static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length);
static PyObject* get_latin1_char(unsigned char ch); static PyObject* get_latin1_char(unsigned char ch);
static void copy_characters(
PyObject *to, Py_ssize_t to_start,
PyObject *from, Py_ssize_t from_start,
Py_ssize_t how_many);
static int unicode_is_singleton(PyObject *unicode);
static PyObject * static PyObject *
unicode_encode_call_errorhandler(const char *errors, unicode_encode_call_errorhandler(const char *errors,
@ -296,7 +301,7 @@ PyUnicode_GetMax(void)
} }
#ifdef Py_DEBUG #ifdef Py_DEBUG
static int int
/* FIXME: use PyObject* type for op */ /* FIXME: use PyObject* type for op */
_PyUnicode_CheckConsistency(void *op, int check_content) _PyUnicode_CheckConsistency(void *op, int check_content)
{ {
@ -395,6 +400,8 @@ _PyUnicode_CheckConsistency(void *op, int check_content)
else else
assert(maxchar >= 0x10000); assert(maxchar >= 0x10000);
} }
if (check_content && !unicode_is_singleton((PyObject*)ascii))
assert(ascii->hash == -1);
return 1; return 1;
} }
#endif #endif
@ -601,13 +608,7 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
return NULL; return NULL;
copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode));
if (PyUnicode_CopyCharacters(copy, 0, copy_characters(copy, 0, unicode, 0, copy_length);
unicode, 0,
copy_length) < 0)
{
Py_DECREF(copy);
return NULL;
}
return copy; return copy;
} }
else { else {
@ -953,47 +954,55 @@ _PyUnicode_Dirty(PyObject *unicode)
return 0; return 0;
} }
Py_ssize_t static int
PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, _copy_characters(PyObject *to, Py_ssize_t to_start,
PyObject *from, Py_ssize_t from_start, PyObject *from, Py_ssize_t from_start,
Py_ssize_t how_many) Py_ssize_t how_many, int check_maxchar)
{ {
unsigned int from_kind, to_kind; unsigned int from_kind, to_kind;
void *from_data, *to_data; void *from_data, *to_data;
int fast;
if (!PyUnicode_Check(from) || !PyUnicode_Check(to)) { assert(PyUnicode_Check(from));
PyErr_BadInternalCall(); assert(PyUnicode_Check(to));
return -1; assert(PyUnicode_IS_READY(from));
} assert(PyUnicode_IS_READY(to));
if (PyUnicode_READY(from)) assert(PyUnicode_GET_LENGTH(from) >= how_many);
return -1; assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
if (PyUnicode_READY(to)) assert(0 <= how_many);
return -1;
how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many);
if (to_start + how_many > PyUnicode_GET_LENGTH(to)) {
PyErr_Format(PyExc_SystemError,
"Cannot write %zi characters at %zi "
"in a string of %zi characters",
how_many, to_start, PyUnicode_GET_LENGTH(to));
return -1;
}
if (how_many == 0) if (how_many == 0)
return 0; return 0;
if (_PyUnicode_Dirty(to))
return -1;
from_kind = PyUnicode_KIND(from); from_kind = PyUnicode_KIND(from);
from_data = PyUnicode_DATA(from); from_data = PyUnicode_DATA(from);
to_kind = PyUnicode_KIND(to); to_kind = PyUnicode_KIND(to);
to_data = PyUnicode_DATA(to); to_data = PyUnicode_DATA(to);
if (from_kind == to_kind #ifdef Py_DEBUG
/* deny latin1 => ascii */ if (!check_maxchar
&& !(!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to))) && (from_kind > to_kind
|| (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to))))
{ {
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
Py_UCS4 ch;
Py_ssize_t i;
for (i=0; i < how_many; i++) {
ch = PyUnicode_READ(from_kind, from_data, from_start + i);
assert(ch <= to_maxchar);
}
}
#endif
fast = (from_kind == to_kind);
if (check_maxchar
&& (!PyUnicode_IS_ASCII(from) && PyUnicode_IS_ASCII(to)))
{
/* deny latin1 => ascii */
fast = 0;
}
if (fast) {
Py_MEMCPY((char*)to_data Py_MEMCPY((char*)to_data
+ PyUnicode_KIND_SIZE(to_kind, to_start), + PyUnicode_KIND_SIZE(to_kind, to_start),
(char*)from_data (char*)from_data
@ -1031,8 +1040,6 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
); );
} }
else { else {
int invalid_kinds;
/* check if max_char(from substring) <= max_char(to) */ /* check if max_char(from substring) <= max_char(to) */
if (from_kind > to_kind if (from_kind > to_kind
/* latin1 => ascii */ /* latin1 => ascii */
@ -1040,34 +1047,77 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
{ {
/* slow path to check for character overflow */ /* slow path to check for character overflow */
const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); const Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
Py_UCS4 ch, maxchar; Py_UCS4 ch;
Py_ssize_t i; Py_ssize_t i;
maxchar = 0;
invalid_kinds = 0;
for (i=0; i < how_many; i++) { for (i=0; i < how_many; i++) {
ch = PyUnicode_READ(from_kind, from_data, from_start + i); ch = PyUnicode_READ(from_kind, from_data, from_start + i);
if (ch > maxchar) { if (check_maxchar) {
maxchar = ch; if (ch > to_maxchar)
if (maxchar > to_maxchar) { return 1;
invalid_kinds = 1; }
break; else {
} assert(ch <= to_maxchar);
} }
PyUnicode_WRITE(to_kind, to_data, to_start + i, ch); PyUnicode_WRITE(to_kind, to_data, to_start + i, ch);
} }
} }
else else {
invalid_kinds = 1;
if (invalid_kinds) {
PyErr_Format(PyExc_SystemError,
"Cannot copy %s characters "
"into a string of %s characters",
unicode_kind_name(from),
unicode_kind_name(to));
return -1; return -1;
} }
} }
return 0;
}
static void
copy_characters(PyObject *to, Py_ssize_t to_start,
PyObject *from, Py_ssize_t from_start,
Py_ssize_t how_many)
{
(void)_copy_characters(to, to_start, from, from_start, how_many, 0);
}
Py_ssize_t
PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
PyObject *from, Py_ssize_t from_start,
Py_ssize_t how_many)
{
int err;
if (!PyUnicode_Check(from) || !PyUnicode_Check(to)) {
PyErr_BadInternalCall();
return -1;
}
if (PyUnicode_READY(from))
return -1;
if (PyUnicode_READY(to))
return -1;
how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many);
if (to_start + how_many > PyUnicode_GET_LENGTH(to)) {
PyErr_Format(PyExc_SystemError,
"Cannot write %zi characters at %zi "
"in a string of %zi characters",
how_many, to_start, PyUnicode_GET_LENGTH(to));
return -1;
}
if (how_many == 0)
return 0;
if (_PyUnicode_Dirty(to))
return -1;
err = _copy_characters(to, to_start, from, from_start, how_many, 1);
if (err) {
PyErr_Format(PyExc_SystemError,
"Cannot copy %s characters "
"into a string of %s characters",
unicode_kind_name(from),
unicode_kind_name(to));
return -1;
}
return how_many; return how_many;
} }
@ -1327,6 +1377,23 @@ unicode_dealloc(register PyUnicodeObject *unicode)
} }
} }
#ifdef Py_DEBUG
static int
unicode_is_singleton(PyObject *unicode)
{
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
if (unicode == unicode_empty)
return 1;
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
{
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
if (ch < 256 && unicode_latin1[ch] == unicode)
return 1;
}
return 0;
}
#endif
static int static int
unicode_resizable(PyObject *unicode) unicode_resizable(PyObject *unicode)
{ {
@ -1334,15 +1401,9 @@ unicode_resizable(PyObject *unicode)
return 0; return 0;
if (PyUnicode_CHECK_INTERNED(unicode)) if (PyUnicode_CHECK_INTERNED(unicode))
return 0; return 0;
assert(unicode != unicode_empty);
#ifdef Py_DEBUG #ifdef Py_DEBUG
if (_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND /* singleton refcount is greater than 1 */
&& PyUnicode_GET_LENGTH(unicode) == 1) assert(!unicode_is_singleton(unicode));
{
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
if (ch < 256 && unicode_latin1[ch] == unicode)
return 0;
}
#endif #endif
return 1; return 1;
} }
@ -1971,7 +2032,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
int precision = 0; int precision = 0;
int zeropad; int zeropad;
const char* f; const char* f;
PyUnicodeObject *string; PyObject *string;
/* used by sprintf */ /* used by sprintf */
char fmt[61]; /* should be enough for %0width.precisionlld */ char fmt[61]; /* should be enough for %0width.precisionlld */
Py_UCS4 maxchar = 127; /* result is ASCII by default */ Py_UCS4 maxchar = 127; /* result is ASCII by default */
@ -2270,7 +2331,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
/* Since we've analyzed how much space we need, /* Since we've analyzed how much space we need,
we don't have to resize the string. we don't have to resize the string.
There can be no errors beyond this point. */ There can be no errors beyond this point. */
string = (PyUnicodeObject *)PyUnicode_New(n, maxchar); string = PyUnicode_New(n, maxchar);
if (!string) if (!string)
goto fail; goto fail;
kind = PyUnicode_KIND(string); kind = PyUnicode_KIND(string);
@ -2321,10 +2382,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
(void) va_arg(vargs, char *); (void) va_arg(vargs, char *);
size = PyUnicode_GET_LENGTH(*callresult); size = PyUnicode_GET_LENGTH(*callresult);
assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
if (PyUnicode_CopyCharacters((PyObject*)string, i, copy_characters(string, i, *callresult, 0, size);
*callresult, 0,
size) < 0)
goto fail;
i += size; i += size;
/* We're done with the unicode()/repr() => forget it */ /* We're done with the unicode()/repr() => forget it */
Py_DECREF(*callresult); Py_DECREF(*callresult);
@ -2338,10 +2396,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
Py_ssize_t size; Py_ssize_t size;
assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
size = PyUnicode_GET_LENGTH(obj); size = PyUnicode_GET_LENGTH(obj);
if (PyUnicode_CopyCharacters((PyObject*)string, i, copy_characters(string, i, obj, 0, size);
obj, 0,
size) < 0)
goto fail;
i += size; i += size;
break; break;
} }
@ -2353,19 +2408,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
if (obj) { if (obj) {
size = PyUnicode_GET_LENGTH(obj); size = PyUnicode_GET_LENGTH(obj);
assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
if (PyUnicode_CopyCharacters((PyObject*)string, i, copy_characters(string, i, obj, 0, size);
obj, 0,
size) < 0)
goto fail;
i += size; i += size;
} else { } else {
size = PyUnicode_GET_LENGTH(*callresult); size = PyUnicode_GET_LENGTH(*callresult);
assert(PyUnicode_KIND(*callresult) <= assert(PyUnicode_KIND(*callresult) <=
PyUnicode_KIND(string)); PyUnicode_KIND(string));
if (PyUnicode_CopyCharacters((PyObject*)string, i, copy_characters(string, i, *callresult, 0, size);
*callresult,
0, size) < 0)
goto fail;
i += size; i += size;
Py_DECREF(*callresult); Py_DECREF(*callresult);
} }
@ -2376,14 +2425,12 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
case 'R': case 'R':
case 'A': case 'A':
{ {
Py_ssize_t size = PyUnicode_GET_LENGTH(*callresult);
/* unused, since we already have the result */ /* unused, since we already have the result */
(void) va_arg(vargs, PyObject *); (void) va_arg(vargs, PyObject *);
assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
if (PyUnicode_CopyCharacters((PyObject*)string, i, copy_characters(string, i, *callresult, 0, size);
*callresult, 0, i += size;
PyUnicode_GET_LENGTH(*callresult)) < 0)
goto fail;
i += PyUnicode_GET_LENGTH(*callresult);
/* We're done with the unicode()/repr() => forget it */ /* We're done with the unicode()/repr() => forget it */
Py_DECREF(*callresult); Py_DECREF(*callresult);
/* switch to next unicode()/repr() result */ /* switch to next unicode()/repr() result */
@ -8795,24 +8842,12 @@ fixup(PyObject *self,
/* If the maxchar increased so that the kind changed, not all /* If the maxchar increased so that the kind changed, not all
characters are representable anymore and we need to fix the characters are representable anymore and we need to fix the
string again. This only happens in very few cases. */ string again. This only happens in very few cases. */
if (PyUnicode_CopyCharacters(v, 0, copy_characters(v, 0, self, 0, PyUnicode_GET_LENGTH(self));
(PyObject*)self, 0,
PyUnicode_GET_LENGTH(self)) < 0)
{
Py_DECREF(u);
return NULL;
}
maxchar_old = fixfct(v); maxchar_old = fixfct(v);
assert(maxchar_old > 0 && maxchar_old <= maxchar_new); assert(maxchar_old > 0 && maxchar_old <= maxchar_new);
} }
else { else {
if (PyUnicode_CopyCharacters(v, 0, copy_characters(v, 0, u, 0, PyUnicode_GET_LENGTH(self));
u, 0,
PyUnicode_GET_LENGTH(self)) < 0)
{
Py_DECREF(u);
return NULL;
}
} }
Py_DECREF(u); Py_DECREF(u);
@ -9016,7 +9051,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
PyObject **items; PyObject **items;
PyObject *item; PyObject *item;
Py_ssize_t sz, i, res_offset; Py_ssize_t sz, i, res_offset;
Py_UCS4 maxchar = 0; Py_UCS4 maxchar;
Py_UCS4 item_maxchar; Py_UCS4 item_maxchar;
fseq = PySequence_Fast(seq, ""); fseq = PySequence_Fast(seq, "");
@ -9031,44 +9066,45 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
seqlen = PySequence_Fast_GET_SIZE(fseq); seqlen = PySequence_Fast_GET_SIZE(fseq);
/* If empty sequence, return u"". */ /* If empty sequence, return u"". */
if (seqlen == 0) { if (seqlen == 0) {
res = PyUnicode_New(0, 0); Py_DECREF(fseq);
goto Done; Py_INCREF(unicode_empty);
res = unicode_empty;
return res;
} }
items = PySequence_Fast_ITEMS(fseq);
/* If singleton sequence with an exact Unicode, return that. */ /* If singleton sequence with an exact Unicode, return that. */
if (seqlen == 1) { items = PySequence_Fast_ITEMS(fseq);
item = items[0]; if (seqlen == 1 && PyUnicode_CheckExact(items[0])) {
if (PyUnicode_CheckExact(item)) { res = items[0];
Py_INCREF(item); Py_INCREF(res);
res = item; Py_DECREF(fseq);
goto Done; return res;
} }
/* Set up sep and seplen */
if (separator == NULL) {
/* fall back to a blank space separator */
sep = PyUnicode_FromOrdinal(' ');
if (!sep)
goto onError;
maxchar = 32;
} }
else { else {
/* Set up sep and seplen */ if (!PyUnicode_Check(separator)) {
if (separator == NULL) { PyErr_Format(PyExc_TypeError,
/* fall back to a blank space separator */ "separator: expected str instance,"
sep = PyUnicode_FromOrdinal(' '); " %.80s found",
if (!sep) Py_TYPE(separator)->tp_name);
goto onError; goto onError;
}
else {
if (!PyUnicode_Check(separator)) {
PyErr_Format(PyExc_TypeError,
"separator: expected str instance,"
" %.80s found",
Py_TYPE(separator)->tp_name);
goto onError;
}
if (PyUnicode_READY(separator))
goto onError;
sep = separator;
seplen = PyUnicode_GET_LENGTH(separator);
maxchar = PyUnicode_MAX_CHAR_VALUE(separator);
/* inc refcount to keep this code path symmetric with the
above case of a blank separator */
Py_INCREF(sep);
} }
if (PyUnicode_READY(separator))
goto onError;
sep = separator;
seplen = PyUnicode_GET_LENGTH(separator);
maxchar = PyUnicode_MAX_CHAR_VALUE(separator);
/* inc refcount to keep this code path symmetric with the
above case of a blank separator */
Py_INCREF(sep);
} }
/* There are at least two things to join, or else we have a subclass /* There are at least two things to join, or else we have a subclass
@ -9108,36 +9144,21 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
/* Catenate everything. */ /* Catenate everything. */
for (i = 0, res_offset = 0; i < seqlen; ++i) { for (i = 0, res_offset = 0; i < seqlen; ++i) {
Py_ssize_t itemlen, copied; Py_ssize_t itemlen;
item = items[i]; item = items[i];
/* Copy item, and maybe the separator. */ /* Copy item, and maybe the separator. */
if (i && seplen != 0) { if (i && seplen != 0) {
copied = PyUnicode_CopyCharacters(res, res_offset, copy_characters(res, res_offset, sep, 0, seplen);
sep, 0, seplen);
if (copied < 0)
goto onError;
#ifdef Py_DEBUG
res_offset += copied;
#else
res_offset += seplen; res_offset += seplen;
#endif
} }
itemlen = PyUnicode_GET_LENGTH(item); itemlen = PyUnicode_GET_LENGTH(item);
if (itemlen != 0) { if (itemlen != 0) {
copied = PyUnicode_CopyCharacters(res, res_offset, copy_characters(res, res_offset, item, 0, itemlen);
item, 0, itemlen);
if (copied < 0)
goto onError;
#ifdef Py_DEBUG
res_offset += copied;
#else
res_offset += itemlen; res_offset += itemlen;
#endif
} }
} }
assert(res_offset == PyUnicode_GET_LENGTH(res)); assert(res_offset == PyUnicode_GET_LENGTH(res));
Done:
Py_DECREF(fseq); Py_DECREF(fseq);
Py_XDECREF(sep); Py_XDECREF(sep);
assert(_PyUnicode_CheckConsistency(res, 1)); assert(_PyUnicode_CheckConsistency(res, 1));
@ -9212,14 +9233,7 @@ pad(PyObject *self,
FILL(kind, data, fill, 0, left); FILL(kind, data, fill, 0, left);
if (right) if (right)
FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right); FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
if (PyUnicode_CopyCharacters(u, left, copy_characters(u, left, self, 0, _PyUnicode_LENGTH(self));
(PyObject*)self, 0,
_PyUnicode_LENGTH(self)) < 0)
{
Py_DECREF(u);
return NULL;
}
assert(_PyUnicode_CheckConsistency(u, 1)); assert(_PyUnicode_CheckConsistency(u, 1));
return u; return u;
} }
@ -9536,12 +9550,7 @@ replace(PyObject *self, PyObject *str1,
u = PyUnicode_New(slen, maxchar); u = PyUnicode_New(slen, maxchar);
if (!u) if (!u)
goto error; goto error;
if (PyUnicode_CopyCharacters(u, 0, copy_characters(u, 0, self, 0, slen);
(PyObject*)self, 0, slen) < 0)
{
Py_DECREF(u);
return NULL;
}
rkind = PyUnicode_KIND(u); rkind = PyUnicode_KIND(u);
for (i = 0; i < PyUnicode_GET_LENGTH(u); i++) for (i = 0; i < PyUnicode_GET_LENGTH(u); i++)
if (PyUnicode_READ(rkind, PyUnicode_DATA(u), i) == u1) { if (PyUnicode_READ(rkind, PyUnicode_DATA(u), i) == u1) {
@ -10160,12 +10169,8 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
maxchar); maxchar);
if (w == NULL) if (w == NULL)
goto onError; goto onError;
if (PyUnicode_CopyCharacters(w, 0, u, 0, PyUnicode_GET_LENGTH(u)) < 0) copy_characters(w, 0, u, 0, PyUnicode_GET_LENGTH(u));
goto onError; copy_characters(w, PyUnicode_GET_LENGTH(u), v, 0, PyUnicode_GET_LENGTH(v));
if (PyUnicode_CopyCharacters(w, PyUnicode_GET_LENGTH(u),
v, 0,
PyUnicode_GET_LENGTH(v)) < 0)
goto onError;
Py_DECREF(u); Py_DECREF(u);
Py_DECREF(v); Py_DECREF(v);
assert(_PyUnicode_CheckConsistency(w, 1)); assert(_PyUnicode_CheckConsistency(w, 1));
@ -10181,9 +10186,6 @@ static void
unicode_append_inplace(PyObject **p_left, PyObject *right) unicode_append_inplace(PyObject **p_left, PyObject *right)
{ {
Py_ssize_t left_len, right_len, new_len; Py_ssize_t left_len, right_len, new_len;
#ifdef Py_DEBUG
Py_ssize_t copied;
#endif
assert(PyUnicode_IS_READY(*p_left)); assert(PyUnicode_IS_READY(*p_left));
assert(PyUnicode_IS_READY(right)); assert(PyUnicode_IS_READY(right));
@ -10210,14 +10212,8 @@ unicode_append_inplace(PyObject **p_left, PyObject *right)
goto error; goto error;
} }
/* copy 'right' into the newly allocated area of 'left' */ /* copy 'right' into the newly allocated area of 'left' */
#ifdef Py_DEBUG copy_characters(*p_left, left_len, right, 0, right_len);
copied = PyUnicode_CopyCharacters(*p_left, left_len, _PyUnicode_DIRTY(*p_left);
right, 0,
right_len);
assert(0 <= copied);
#else
PyUnicode_CopyCharacters(*p_left, left_len, right, 0, right_len);
#endif
return; return;
error: error:
@ -10270,7 +10266,6 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
if (res == NULL) if (res == NULL)
goto error; goto error;
Py_DECREF(left); Py_DECREF(left);
assert(_PyUnicode_CheckConsistency(res, 1));
*p_left = res; *p_left = res;
return; return;
@ -12332,8 +12327,6 @@ unicode__format__(PyObject* self, PyObject* args)
out = _PyUnicode_FormatAdvanced(self, format_spec, 0, out = _PyUnicode_FormatAdvanced(self, format_spec, 0,
PyUnicode_GET_LENGTH(format_spec)); PyUnicode_GET_LENGTH(format_spec));
if (out != NULL)
assert(_PyUnicode_CheckConsistency(out, 1));
return out; return out;
} }
@ -13174,7 +13167,11 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
length = PyUnicode_GET_LENGTH(unicode); length = PyUnicode_GET_LENGTH(unicode);
_PyUnicode_LENGTH(self) = length; _PyUnicode_LENGTH(self) = length;
#ifdef Py_DEBUG
_PyUnicode_HASH(self) = -1;
#else
_PyUnicode_HASH(self) = _PyUnicode_HASH(unicode); _PyUnicode_HASH(self) = _PyUnicode_HASH(unicode);
#endif
_PyUnicode_STATE(self).interned = 0; _PyUnicode_STATE(self).interned = 0;
_PyUnicode_STATE(self).kind = kind; _PyUnicode_STATE(self).kind = kind;
_PyUnicode_STATE(self).compact = 0; _PyUnicode_STATE(self).compact = 0;
@ -13230,6 +13227,9 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
PyUnicode_KIND_SIZE(kind, length + 1)); PyUnicode_KIND_SIZE(kind, length + 1));
Py_DECREF(unicode); Py_DECREF(unicode);
assert(_PyUnicode_CheckConsistency(self, 1)); assert(_PyUnicode_CheckConsistency(self, 1));
#ifdef Py_DEBUG
_PyUnicode_HASH(self) = _PyUnicode_HASH(unicode);
#endif
return (PyObject *)self; return (PyObject *)self;
onError: onError:

View file

@ -1284,33 +1284,31 @@ _PyUnicode_FormatAdvanced(PyObject *obj,
Py_ssize_t start, Py_ssize_t end) Py_ssize_t start, Py_ssize_t end)
{ {
InternalFormatSpec format; InternalFormatSpec format;
PyObject *result = NULL; PyObject *result;
/* check for the special case of zero length format spec, make /* check for the special case of zero length format spec, make
it equivalent to str(obj) */ it equivalent to str(obj) */
if (start == end) { if (start == end)
result = PyObject_Str(obj); return PyObject_Str(obj);
goto done;
}
/* parse the format_spec */ /* parse the format_spec */
if (!parse_internal_render_format_spec(format_spec, start, end, if (!parse_internal_render_format_spec(format_spec, start, end,
&format, 's', '<')) &format, 's', '<'))
goto done; return NULL;
/* type conversion? */ /* type conversion? */
switch (format.type) { switch (format.type) {
case 's': case 's':
/* no type conversion needed, already a string. do the formatting */ /* no type conversion needed, already a string. do the formatting */
result = format_string_internal(obj, &format); result = format_string_internal(obj, &format);
if (result != NULL)
assert(_PyUnicode_CheckConsistency(result, 1));
break; break;
default: default:
/* unknown */ /* unknown */
unknown_presentation_type(format.type, obj->ob_type->tp_name); unknown_presentation_type(format.type, obj->ob_type->tp_name);
goto done; result = NULL;
} }
done:
return result; return result;
} }