mirror of
https://github.com/python/cpython.git
synced 2025-08-04 17:08:35 +00:00
gh-70278: Fix PyUnicode_FromFormat() with precision for %s and %V (GH-120365)
PyUnicode_FromFormat() no longer produces the ending \ufffd character for truncated C string when use precision with %s and %V. It now truncates the string before the start of truncated multibyte sequences.
This commit is contained in:
parent
22b8a35d6e
commit
6eb23b1311
3 changed files with 59 additions and 4 deletions
|
@ -419,8 +419,29 @@ class CAPITest(unittest.TestCase):
|
|||
# truncated string
|
||||
check_format('abc',
|
||||
b'%.3s', b'abcdef')
|
||||
check_format('abc[',
|
||||
b'%.6s', 'abc[\u20ac]'.encode('utf8'))
|
||||
check_format('abc[\u20ac',
|
||||
b'%.7s', 'abc[\u20ac]'.encode('utf8'))
|
||||
check_format('abc[\ufffd',
|
||||
b'%.5s', 'abc[\u20ac]'.encode('utf8'))
|
||||
b'%.5s', b'abc[\xff]')
|
||||
check_format('abc[',
|
||||
b'%.6s', b'abc[\xe2\x82]')
|
||||
check_format('abc[\ufffd]',
|
||||
b'%.7s', b'abc[\xe2\x82]')
|
||||
check_format('abc[\ufffd',
|
||||
b'%.7s', b'abc[\xe2\x82\0')
|
||||
check_format(' abc[',
|
||||
b'%10.6s', 'abc[\u20ac]'.encode('utf8'))
|
||||
check_format(' abc[\u20ac',
|
||||
b'%10.7s', 'abc[\u20ac]'.encode('utf8'))
|
||||
check_format(' abc[\ufffd',
|
||||
b'%10.5s', b'abc[\xff]')
|
||||
check_format(' abc[',
|
||||
b'%10.6s', b'abc[\xe2\x82]')
|
||||
check_format(' abc[\ufffd]',
|
||||
b'%10.7s', b'abc[\xe2\x82]')
|
||||
|
||||
check_format("'\\u20acABC'",
|
||||
b'%A', '\u20acABC')
|
||||
check_format("'\\u20",
|
||||
|
@ -433,10 +454,31 @@ class CAPITest(unittest.TestCase):
|
|||
b'%.3S', '\u20acABCDEF')
|
||||
check_format('\u20acAB',
|
||||
b'%.3U', '\u20acABCDEF')
|
||||
|
||||
check_format('\u20acAB',
|
||||
b'%.3V', '\u20acABCDEF', None)
|
||||
check_format('abc[',
|
||||
b'%.6V', None, 'abc[\u20ac]'.encode('utf8'))
|
||||
check_format('abc[\u20ac',
|
||||
b'%.7V', None, 'abc[\u20ac]'.encode('utf8'))
|
||||
check_format('abc[\ufffd',
|
||||
b'%.5V', None, 'abc[\u20ac]'.encode('utf8'))
|
||||
b'%.5V', None, b'abc[\xff]')
|
||||
check_format('abc[',
|
||||
b'%.6V', None, b'abc[\xe2\x82]')
|
||||
check_format('abc[\ufffd]',
|
||||
b'%.7V', None, b'abc[\xe2\x82]')
|
||||
check_format(' abc[',
|
||||
b'%10.6V', None, 'abc[\u20ac]'.encode('utf8'))
|
||||
check_format(' abc[\u20ac',
|
||||
b'%10.7V', None, 'abc[\u20ac]'.encode('utf8'))
|
||||
check_format(' abc[\ufffd',
|
||||
b'%10.5V', None, b'abc[\xff]')
|
||||
check_format(' abc[',
|
||||
b'%10.6V', None, b'abc[\xe2\x82]')
|
||||
check_format(' abc[\ufffd]',
|
||||
b'%10.7V', None, b'abc[\xe2\x82]')
|
||||
check_format(' abc[\ufffd',
|
||||
b'%10.7V', None, b'abc[\xe2\x82\0')
|
||||
|
||||
# following tests comes from #7330
|
||||
# test width modifier and precision modifier with %S
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue