mirror of
https://github.com/python/cpython.git
synced 2025-08-30 21:48:47 +00:00
Close #14648: Compute correctly maxchar in str.format() for substrin
This commit is contained in:
parent
0b7d7c9544
commit
ece58deb9f
4 changed files with 50 additions and 6 deletions
|
@ -710,6 +710,15 @@ PyAPI_FUNC(PyObject*) PyUnicode_Substring(
|
||||||
Py_ssize_t start,
|
Py_ssize_t start,
|
||||||
Py_ssize_t end);
|
Py_ssize_t end);
|
||||||
|
|
||||||
|
#ifndef Py_LIMITED_API
|
||||||
|
/* Compute the maximum character of the substring unicode[start:end].
|
||||||
|
Return 127 for an empty string. */
|
||||||
|
PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
|
||||||
|
PyObject *unicode,
|
||||||
|
Py_ssize_t start,
|
||||||
|
Py_ssize_t end);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Copy the string into a UCS4 buffer including the null character if copy_null
|
/* Copy the string into a UCS4 buffer including the null character if copy_null
|
||||||
is set. Return NULL and raise an exception on error. Raise a ValueError if
|
is set. Return NULL and raise an exception on error. Raise a ValueError if
|
||||||
the buffer is smaller than the string. Return buffer on success.
|
the buffer is smaller than the string. Return buffer on success.
|
||||||
|
|
|
@ -924,6 +924,14 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
self.assertRaises(ValueError, format, '', '#')
|
self.assertRaises(ValueError, format, '', '#')
|
||||||
self.assertRaises(ValueError, format, '', '#20')
|
self.assertRaises(ValueError, format, '', '#20')
|
||||||
|
|
||||||
|
# Non-ASCII
|
||||||
|
self.assertEqual("{0:s}{1:s}".format("ABC", "\u0410\u0411\u0412"),
|
||||||
|
'ABC\u0410\u0411\u0412')
|
||||||
|
self.assertEqual("{0:.3s}".format("ABC\u0410\u0411\u0412"),
|
||||||
|
'ABC')
|
||||||
|
self.assertEqual("{0:.0s}".format("ABC\u0410\u0411\u0412"),
|
||||||
|
'')
|
||||||
|
|
||||||
def test_format_map(self):
|
def test_format_map(self):
|
||||||
self.assertEqual(''.format_map({}), '')
|
self.assertEqual(''.format_map({}), '')
|
||||||
self.assertEqual('a'.format_map({}), 'a')
|
self.assertEqual('a'.format_map({}), 'a')
|
||||||
|
@ -1056,8 +1064,6 @@ class UnicodeTest(string_tests.CommonTest,
|
||||||
self.assertEqual('%f' % INF, 'inf')
|
self.assertEqual('%f' % INF, 'inf')
|
||||||
self.assertEqual('%F' % INF, 'INF')
|
self.assertEqual('%F' % INF, 'INF')
|
||||||
|
|
||||||
self.assertEqual(format("\u0410\u0411\u0412", "s"), "АБВ")
|
|
||||||
|
|
||||||
def test_startswith_endswith_errors(self):
|
def test_startswith_endswith_errors(self):
|
||||||
for meth in ('foo'.startswith, 'foo'.endswith):
|
for meth in ('foo'.startswith, 'foo'.endswith):
|
||||||
with self.assertRaises(TypeError) as cm:
|
with self.assertRaises(TypeError) as cm:
|
||||||
|
|
|
@ -1957,6 +1957,37 @@ PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Py_UCS4
|
||||||
|
_PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end)
|
||||||
|
{
|
||||||
|
enum PyUnicode_Kind kind;
|
||||||
|
void *startptr, *endptr;
|
||||||
|
|
||||||
|
assert(PyUnicode_IS_READY(unicode));
|
||||||
|
assert(0 <= start);
|
||||||
|
assert(end <= PyUnicode_GET_LENGTH(unicode));
|
||||||
|
assert(start <= end);
|
||||||
|
|
||||||
|
if (start == 0 && end == PyUnicode_GET_LENGTH(unicode))
|
||||||
|
return PyUnicode_MAX_CHAR_VALUE(unicode);
|
||||||
|
|
||||||
|
if (start == end)
|
||||||
|
return 127;
|
||||||
|
|
||||||
|
kind = PyUnicode_KIND(unicode);
|
||||||
|
startptr = PyUnicode_DATA(unicode);
|
||||||
|
endptr = (char*)startptr + end * kind;
|
||||||
|
if (start)
|
||||||
|
startptr = (char*)startptr + start * kind;
|
||||||
|
switch(kind)
|
||||||
|
{
|
||||||
|
case PyUnicode_1BYTE_KIND: return ucs1lib_find_max_char(startptr, endptr);
|
||||||
|
case PyUnicode_2BYTE_KIND: return ucs2lib_find_max_char(startptr, endptr);
|
||||||
|
default:
|
||||||
|
case PyUnicode_4BYTE_KIND: return ucs4lib_find_max_char(startptr, endptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Ensure that a string uses the most efficient storage, if it is not the
|
/* Ensure that a string uses the most efficient storage, if it is not the
|
||||||
case: create a new string with of the right kind. Write NULL into *p_unicode
|
case: create a new string with of the right kind. Write NULL into *p_unicode
|
||||||
on error. */
|
on error. */
|
||||||
|
|
|
@ -716,7 +716,7 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format)
|
||||||
Py_ssize_t pos;
|
Py_ssize_t pos;
|
||||||
Py_ssize_t len = PyUnicode_GET_LENGTH(value);
|
Py_ssize_t len = PyUnicode_GET_LENGTH(value);
|
||||||
PyObject *result = NULL;
|
PyObject *result = NULL;
|
||||||
Py_UCS4 maxchar = 127;
|
Py_UCS4 maxchar;
|
||||||
|
|
||||||
/* sign is not allowed on strings */
|
/* sign is not allowed on strings */
|
||||||
if (format->sign != '\0') {
|
if (format->sign != '\0') {
|
||||||
|
@ -747,11 +747,9 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format)
|
||||||
len = format->precision;
|
len = format->precision;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (len)
|
|
||||||
maxchar = PyUnicode_MAX_CHAR_VALUE(value);
|
|
||||||
|
|
||||||
calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
|
calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
|
||||||
|
|
||||||
|
maxchar = _PyUnicode_FindMaxChar(value, 0, len);
|
||||||
if (lpad != 0 || rpad != 0)
|
if (lpad != 0 || rpad != 0)
|
||||||
maxchar = Py_MAX(maxchar, format->fill_char);
|
maxchar = Py_MAX(maxchar, format->fill_char);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue