mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
remove MAX_MAXCHAR because it's unsafe for computing maximum codepoitn value (see #18183)
This commit is contained in:
parent
0e547b66dc
commit
7e30373126
3 changed files with 32 additions and 31 deletions
|
@ -566,6 +566,9 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
self.assertEqual('\U0008fffe'.lower(), '\U0008fffe')
|
self.assertEqual('\U0008fffe'.lower(), '\U0008fffe')
|
||||||
self.assertEqual('\u2177'.lower(), '\u2177')
|
self.assertEqual('\u2177'.lower(), '\u2177')
|
||||||
|
|
||||||
|
# See issue #18183 for this one.
|
||||||
|
'\U00010000\U00100000'.lower()
|
||||||
|
|
||||||
def test_casefold(self):
|
def test_casefold(self):
|
||||||
self.assertEqual('hello'.casefold(), 'hello')
|
self.assertEqual('hello'.casefold(), 'hello')
|
||||||
self.assertEqual('hELlo'.casefold(), 'hello')
|
self.assertEqual('hELlo'.casefold(), 'hello')
|
||||||
|
|
|
@ -12,6 +12,9 @@ What's New in Python 3.3.3 release candidate 1?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #18183: Fix various unicode operations on strings with large unicode
|
||||||
|
codepoints.
|
||||||
|
|
||||||
- Issue #18180: Fix ref leak in _PyImport_GetDynLoadWindows().
|
- Issue #18180: Fix ref leak in _PyImport_GetDynLoadWindows().
|
||||||
|
|
||||||
- Issue #18038: SyntaxError raised during compilation sources with illegal
|
- Issue #18038: SyntaxError raised during compilation sources with illegal
|
||||||
|
|
|
@ -112,11 +112,6 @@ extern "C" {
|
||||||
#define _PyUnicode_DATA_ANY(op) \
|
#define _PyUnicode_DATA_ANY(op) \
|
||||||
(((PyUnicodeObject*)(op))->data.any)
|
(((PyUnicodeObject*)(op))->data.any)
|
||||||
|
|
||||||
/* Optimized version of Py_MAX() to compute the maximum character:
|
|
||||||
use it when your are computing the second argument of PyUnicode_New() */
|
|
||||||
#define MAX_MAXCHAR(maxchar1, maxchar2) \
|
|
||||||
((maxchar1) | (maxchar2))
|
|
||||||
|
|
||||||
#undef PyUnicode_READY
|
#undef PyUnicode_READY
|
||||||
#define PyUnicode_READY(op) \
|
#define PyUnicode_READY(op) \
|
||||||
(assert(_PyUnicode_CHECK(op)), \
|
(assert(_PyUnicode_CHECK(op)), \
|
||||||
|
@ -2495,7 +2490,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
case 'c':
|
case 'c':
|
||||||
{
|
{
|
||||||
Py_UCS4 ordinal = va_arg(count, int);
|
Py_UCS4 ordinal = va_arg(count, int);
|
||||||
maxchar = MAX_MAXCHAR(maxchar, ordinal);
|
maxchar = Py_MAX(maxchar, ordinal);
|
||||||
n++;
|
n++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -2591,7 +2586,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
/* since PyUnicode_DecodeUTF8 returns already flexible
|
/* since PyUnicode_DecodeUTF8 returns already flexible
|
||||||
unicode objects, there is no need to call ready on them */
|
unicode objects, there is no need to call ready on them */
|
||||||
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
|
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
|
||||||
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
|
maxchar = Py_MAX(maxchar, argmaxchar);
|
||||||
n += PyUnicode_GET_LENGTH(str);
|
n += PyUnicode_GET_LENGTH(str);
|
||||||
/* Remember the str and switch to the next slot */
|
/* Remember the str and switch to the next slot */
|
||||||
*callresult++ = str;
|
*callresult++ = str;
|
||||||
|
@ -2604,7 +2599,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
if (PyUnicode_READY(obj) == -1)
|
if (PyUnicode_READY(obj) == -1)
|
||||||
goto fail;
|
goto fail;
|
||||||
argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
|
argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
|
||||||
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
|
maxchar = Py_MAX(maxchar, argmaxchar);
|
||||||
n += PyUnicode_GET_LENGTH(obj);
|
n += PyUnicode_GET_LENGTH(obj);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -2619,7 +2614,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
if (PyUnicode_READY(obj) == -1)
|
if (PyUnicode_READY(obj) == -1)
|
||||||
goto fail;
|
goto fail;
|
||||||
argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
|
argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
|
||||||
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
|
maxchar = Py_MAX(maxchar, argmaxchar);
|
||||||
n += PyUnicode_GET_LENGTH(obj);
|
n += PyUnicode_GET_LENGTH(obj);
|
||||||
*callresult++ = NULL;
|
*callresult++ = NULL;
|
||||||
}
|
}
|
||||||
|
@ -2632,7 +2627,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj);
|
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj);
|
||||||
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
|
maxchar = Py_MAX(maxchar, argmaxchar);
|
||||||
n += PyUnicode_GET_LENGTH(str_obj);
|
n += PyUnicode_GET_LENGTH(str_obj);
|
||||||
*callresult++ = str_obj;
|
*callresult++ = str_obj;
|
||||||
}
|
}
|
||||||
|
@ -2651,7 +2646,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
|
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
|
||||||
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
|
maxchar = Py_MAX(maxchar, argmaxchar);
|
||||||
n += PyUnicode_GET_LENGTH(str);
|
n += PyUnicode_GET_LENGTH(str);
|
||||||
/* Remember the str and switch to the next slot */
|
/* Remember the str and switch to the next slot */
|
||||||
*callresult++ = str;
|
*callresult++ = str;
|
||||||
|
@ -2670,7 +2665,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr);
|
argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr);
|
||||||
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
|
maxchar = Py_MAX(maxchar, argmaxchar);
|
||||||
n += PyUnicode_GET_LENGTH(repr);
|
n += PyUnicode_GET_LENGTH(repr);
|
||||||
/* Remember the repr and switch to the next slot */
|
/* Remember the repr and switch to the next slot */
|
||||||
*callresult++ = repr;
|
*callresult++ = repr;
|
||||||
|
@ -2689,7 +2684,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii);
|
argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii);
|
||||||
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
|
maxchar = Py_MAX(maxchar, argmaxchar);
|
||||||
n += PyUnicode_GET_LENGTH(ascii);
|
n += PyUnicode_GET_LENGTH(ascii);
|
||||||
/* Remember the repr and switch to the next slot */
|
/* Remember the repr and switch to the next slot */
|
||||||
*callresult++ = ascii;
|
*callresult++ = ascii;
|
||||||
|
@ -8628,11 +8623,11 @@ fix_decimal_and_space_to_ascii(PyObject *self)
|
||||||
}
|
}
|
||||||
if (fixed != 0) {
|
if (fixed != 0) {
|
||||||
modified = 1;
|
modified = 1;
|
||||||
maxchar = MAX_MAXCHAR(maxchar, fixed);
|
maxchar = Py_MAX(maxchar, fixed);
|
||||||
PyUnicode_WRITE(kind, data, i, fixed);
|
PyUnicode_WRITE(kind, data, i, fixed);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
maxchar = MAX_MAXCHAR(maxchar, ch);
|
maxchar = Py_MAX(maxchar, ch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8673,7 +8668,7 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
|
||||||
int decimal = Py_UNICODE_TODECIMAL(ch);
|
int decimal = Py_UNICODE_TODECIMAL(ch);
|
||||||
if (decimal >= 0)
|
if (decimal >= 0)
|
||||||
ch = '0' + decimal;
|
ch = '0' + decimal;
|
||||||
maxchar = MAX_MAXCHAR(maxchar, ch);
|
maxchar = Py_MAX(maxchar, ch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8914,7 +8909,7 @@ _PyUnicode_InsertThousandsGrouping(
|
||||||
if (unicode == NULL) {
|
if (unicode == NULL) {
|
||||||
*maxchar = 127;
|
*maxchar = 127;
|
||||||
if (len != n_digits) {
|
if (len != n_digits) {
|
||||||
*maxchar = MAX_MAXCHAR(*maxchar,
|
*maxchar = Py_MAX(*maxchar,
|
||||||
PyUnicode_MAX_CHAR_VALUE(thousands_sep));
|
PyUnicode_MAX_CHAR_VALUE(thousands_sep));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9309,14 +9304,14 @@ do_capitalize(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *ma
|
||||||
c = PyUnicode_READ(kind, data, 0);
|
c = PyUnicode_READ(kind, data, 0);
|
||||||
n_res = _PyUnicode_ToUpperFull(c, mapped);
|
n_res = _PyUnicode_ToUpperFull(c, mapped);
|
||||||
for (j = 0; j < n_res; j++) {
|
for (j = 0; j < n_res; j++) {
|
||||||
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
|
*maxchar = Py_MAX(*maxchar, mapped[j]);
|
||||||
res[k++] = mapped[j];
|
res[k++] = mapped[j];
|
||||||
}
|
}
|
||||||
for (i = 1; i < length; i++) {
|
for (i = 1; i < length; i++) {
|
||||||
c = PyUnicode_READ(kind, data, i);
|
c = PyUnicode_READ(kind, data, i);
|
||||||
n_res = lower_ucs4(kind, data, length, i, c, mapped);
|
n_res = lower_ucs4(kind, data, length, i, c, mapped);
|
||||||
for (j = 0; j < n_res; j++) {
|
for (j = 0; j < n_res; j++) {
|
||||||
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
|
*maxchar = Py_MAX(*maxchar, mapped[j]);
|
||||||
res[k++] = mapped[j];
|
res[k++] = mapped[j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9341,7 +9336,7 @@ do_swapcase(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxc
|
||||||
mapped[0] = c;
|
mapped[0] = c;
|
||||||
}
|
}
|
||||||
for (j = 0; j < n_res; j++) {
|
for (j = 0; j < n_res; j++) {
|
||||||
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
|
*maxchar = Py_MAX(*maxchar, mapped[j]);
|
||||||
res[k++] = mapped[j];
|
res[k++] = mapped[j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9362,7 +9357,7 @@ do_upper_or_lower(int kind, void *data, Py_ssize_t length, Py_UCS4 *res,
|
||||||
else
|
else
|
||||||
n_res = _PyUnicode_ToUpperFull(c, mapped);
|
n_res = _PyUnicode_ToUpperFull(c, mapped);
|
||||||
for (j = 0; j < n_res; j++) {
|
for (j = 0; j < n_res; j++) {
|
||||||
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
|
*maxchar = Py_MAX(*maxchar, mapped[j]);
|
||||||
res[k++] = mapped[j];
|
res[k++] = mapped[j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9391,7 +9386,7 @@ do_casefold(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxc
|
||||||
Py_UCS4 mapped[3];
|
Py_UCS4 mapped[3];
|
||||||
int j, n_res = _PyUnicode_ToFoldedFull(c, mapped);
|
int j, n_res = _PyUnicode_ToFoldedFull(c, mapped);
|
||||||
for (j = 0; j < n_res; j++) {
|
for (j = 0; j < n_res; j++) {
|
||||||
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
|
*maxchar = Py_MAX(*maxchar, mapped[j]);
|
||||||
res[k++] = mapped[j];
|
res[k++] = mapped[j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9416,7 +9411,7 @@ do_title(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar
|
||||||
n_res = _PyUnicode_ToTitleFull(c, mapped);
|
n_res = _PyUnicode_ToTitleFull(c, mapped);
|
||||||
|
|
||||||
for (j = 0; j < n_res; j++) {
|
for (j = 0; j < n_res; j++) {
|
||||||
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
|
*maxchar = Py_MAX(*maxchar, mapped[j]);
|
||||||
res[k++] = mapped[j];
|
res[k++] = mapped[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9571,7 +9566,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
|
||||||
goto onError;
|
goto onError;
|
||||||
sz += PyUnicode_GET_LENGTH(item);
|
sz += PyUnicode_GET_LENGTH(item);
|
||||||
item_maxchar = PyUnicode_MAX_CHAR_VALUE(item);
|
item_maxchar = PyUnicode_MAX_CHAR_VALUE(item);
|
||||||
maxchar = MAX_MAXCHAR(maxchar, item_maxchar);
|
maxchar = Py_MAX(maxchar, item_maxchar);
|
||||||
if (i != 0)
|
if (i != 0)
|
||||||
sz += seplen;
|
sz += seplen;
|
||||||
if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
|
if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
|
||||||
|
@ -9747,7 +9742,7 @@ pad(PyObject *self,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
maxchar = PyUnicode_MAX_CHAR_VALUE(self);
|
maxchar = PyUnicode_MAX_CHAR_VALUE(self);
|
||||||
maxchar = MAX_MAXCHAR(maxchar, fill);
|
maxchar = Py_MAX(maxchar, fill);
|
||||||
u = PyUnicode_New(left + _PyUnicode_LENGTH(self) + right, maxchar);
|
u = PyUnicode_New(left + _PyUnicode_LENGTH(self) + right, maxchar);
|
||||||
if (!u)
|
if (!u)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -10061,7 +10056,7 @@ replace(PyObject *self, PyObject *str1,
|
||||||
/* Replacing str1 with str2 may cause a maxchar reduction in the
|
/* Replacing str1 with str2 may cause a maxchar reduction in the
|
||||||
result string. */
|
result string. */
|
||||||
mayshrink = (maxchar_str2 < maxchar);
|
mayshrink = (maxchar_str2 < maxchar);
|
||||||
maxchar = MAX_MAXCHAR(maxchar, maxchar_str2);
|
maxchar = Py_MAX(maxchar, maxchar_str2);
|
||||||
|
|
||||||
if (len1 == len2) {
|
if (len1 == len2) {
|
||||||
/* same length */
|
/* same length */
|
||||||
|
@ -10647,7 +10642,7 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
|
||||||
|
|
||||||
maxchar = PyUnicode_MAX_CHAR_VALUE(u);
|
maxchar = PyUnicode_MAX_CHAR_VALUE(u);
|
||||||
maxchar2 = PyUnicode_MAX_CHAR_VALUE(v);
|
maxchar2 = PyUnicode_MAX_CHAR_VALUE(v);
|
||||||
maxchar = MAX_MAXCHAR(maxchar, maxchar2);
|
maxchar = Py_MAX(maxchar, maxchar2);
|
||||||
|
|
||||||
/* Concat the two Unicode strings */
|
/* Concat the two Unicode strings */
|
||||||
w = PyUnicode_New(new_len, maxchar);
|
w = PyUnicode_New(new_len, maxchar);
|
||||||
|
@ -10734,7 +10729,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
|
||||||
else {
|
else {
|
||||||
maxchar = PyUnicode_MAX_CHAR_VALUE(left);
|
maxchar = PyUnicode_MAX_CHAR_VALUE(left);
|
||||||
maxchar2 = PyUnicode_MAX_CHAR_VALUE(right);
|
maxchar2 = PyUnicode_MAX_CHAR_VALUE(right);
|
||||||
maxchar = MAX_MAXCHAR(maxchar, maxchar2);
|
maxchar = Py_MAX(maxchar, maxchar2);
|
||||||
|
|
||||||
/* Concat the two Unicode strings */
|
/* Concat the two Unicode strings */
|
||||||
res = PyUnicode_New(new_len, maxchar);
|
res = PyUnicode_New(new_len, maxchar);
|
||||||
|
@ -13846,15 +13841,15 @@ PyUnicode_Format(PyObject *format, PyObject *args)
|
||||||
if (!(flags & F_LJUST)) {
|
if (!(flags & F_LJUST)) {
|
||||||
if (sign) {
|
if (sign) {
|
||||||
if ((width-1) > len)
|
if ((width-1) > len)
|
||||||
bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill);
|
bufmaxchar = Py_MAX(bufmaxchar, fill);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (width > len)
|
if (width > len)
|
||||||
bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill);
|
bufmaxchar = Py_MAX(bufmaxchar, fill);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len);
|
maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len);
|
||||||
bufmaxchar = MAX_MAXCHAR(bufmaxchar, maxchar);
|
bufmaxchar = Py_MAX(bufmaxchar, maxchar);
|
||||||
|
|
||||||
buflen = width;
|
buflen = width;
|
||||||
if (sign && len == width)
|
if (sign && len == width)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue