mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
[3.13] gh-52551: Fix encoding issues in strftime() (GH-125193) (GH-125657)
Fix time.strftime(), the strftime() method and formatting of the
datetime classes datetime, date and time.
* Characters not encodable in the current locale are now acceptable in
the format string.
* Surrogate pairs and sequence of surrogatescape-encoded bytes are no
longer recombinated.
* Embedded null character no longer terminates the format string.
This fixes also gh-78662 and gh-124531.
(cherry picked from commit ad3eac1963
)
This commit is contained in:
parent
d894d467a6
commit
08ccbb9b3f
5 changed files with 296 additions and 214 deletions
|
@ -2855,11 +2855,32 @@ class TestDateTime(TestDate):
|
|||
self.assertEqual(t.strftime("%z"), "-0200" + z)
|
||||
self.assertEqual(t.strftime("%:z"), "-02:00:" + z)
|
||||
|
||||
# bpo-34482: Check that surrogates don't cause a crash.
|
||||
try:
|
||||
t.strftime('%y\ud800%m %H\ud800%M')
|
||||
except UnicodeEncodeError:
|
||||
pass
|
||||
def test_strftime_special(self):
|
||||
t = self.theclass(2004, 12, 31, 6, 22, 33, 47)
|
||||
s1 = t.strftime('%c')
|
||||
s2 = t.strftime('%B')
|
||||
# gh-52551, gh-78662: Unicode strings should pass through strftime,
|
||||
# independently from locale.
|
||||
self.assertEqual(t.strftime('\U0001f40d'), '\U0001f40d')
|
||||
self.assertEqual(t.strftime('\U0001f4bb%c\U0001f40d%B'), f'\U0001f4bb{s1}\U0001f40d{s2}')
|
||||
self.assertEqual(t.strftime('%c\U0001f4bb%B\U0001f40d'), f'{s1}\U0001f4bb{s2}\U0001f40d')
|
||||
# Lone surrogates should pass through.
|
||||
self.assertEqual(t.strftime('\ud83d'), '\ud83d')
|
||||
self.assertEqual(t.strftime('\udc0d'), '\udc0d')
|
||||
self.assertEqual(t.strftime('\ud83d%c\udc0d%B'), f'\ud83d{s1}\udc0d{s2}')
|
||||
self.assertEqual(t.strftime('%c\ud83d%B\udc0d'), f'{s1}\ud83d{s2}\udc0d')
|
||||
self.assertEqual(t.strftime('%c\udc0d%B\ud83d'), f'{s1}\udc0d{s2}\ud83d')
|
||||
# Surrogate pairs should not recombine.
|
||||
self.assertEqual(t.strftime('\ud83d\udc0d'), '\ud83d\udc0d')
|
||||
self.assertEqual(t.strftime('%c\ud83d\udc0d%B'), f'{s1}\ud83d\udc0d{s2}')
|
||||
# Surrogate-escaped bytes should not recombine.
|
||||
self.assertEqual(t.strftime('\udcf0\udc9f\udc90\udc8d'), '\udcf0\udc9f\udc90\udc8d')
|
||||
self.assertEqual(t.strftime('%c\udcf0\udc9f\udc90\udc8d%B'), f'{s1}\udcf0\udc9f\udc90\udc8d{s2}')
|
||||
# gh-124531: The null character should not terminate the format string.
|
||||
self.assertEqual(t.strftime('\0'), '\0')
|
||||
self.assertEqual(t.strftime('\0'*1000), '\0'*1000)
|
||||
self.assertEqual(t.strftime('\0%c\0%B'), f'\0{s1}\0{s2}')
|
||||
self.assertEqual(t.strftime('%c\0%B\0'), f'{s1}\0{s2}\0')
|
||||
|
||||
def test_extract(self):
|
||||
dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234)
|
||||
|
@ -3633,6 +3654,33 @@ class TestTime(HarmlessMixedComparison, unittest.TestCase):
|
|||
# gh-85432: The parameter was named "fmt" in the pure-Python impl.
|
||||
t.strftime(format="%f")
|
||||
|
||||
def test_strftime_special(self):
|
||||
t = self.theclass(1, 2, 3, 4)
|
||||
s1 = t.strftime('%I%p%Z')
|
||||
s2 = t.strftime('%X')
|
||||
# gh-52551, gh-78662: Unicode strings should pass through strftime,
|
||||
# independently from locale.
|
||||
self.assertEqual(t.strftime('\U0001f40d'), '\U0001f40d')
|
||||
self.assertEqual(t.strftime('\U0001f4bb%I%p%Z\U0001f40d%X'), f'\U0001f4bb{s1}\U0001f40d{s2}')
|
||||
self.assertEqual(t.strftime('%I%p%Z\U0001f4bb%X\U0001f40d'), f'{s1}\U0001f4bb{s2}\U0001f40d')
|
||||
# Lone surrogates should pass through.
|
||||
self.assertEqual(t.strftime('\ud83d'), '\ud83d')
|
||||
self.assertEqual(t.strftime('\udc0d'), '\udc0d')
|
||||
self.assertEqual(t.strftime('\ud83d%I%p%Z\udc0d%X'), f'\ud83d{s1}\udc0d{s2}')
|
||||
self.assertEqual(t.strftime('%I%p%Z\ud83d%X\udc0d'), f'{s1}\ud83d{s2}\udc0d')
|
||||
self.assertEqual(t.strftime('%I%p%Z\udc0d%X\ud83d'), f'{s1}\udc0d{s2}\ud83d')
|
||||
# Surrogate pairs should not recombine.
|
||||
self.assertEqual(t.strftime('\ud83d\udc0d'), '\ud83d\udc0d')
|
||||
self.assertEqual(t.strftime('%I%p%Z\ud83d\udc0d%X'), f'{s1}\ud83d\udc0d{s2}')
|
||||
# Surrogate-escaped bytes should not recombine.
|
||||
self.assertEqual(t.strftime('\udcf0\udc9f\udc90\udc8d'), '\udcf0\udc9f\udc90\udc8d')
|
||||
self.assertEqual(t.strftime('%I%p%Z\udcf0\udc9f\udc90\udc8d%X'), f'{s1}\udcf0\udc9f\udc90\udc8d{s2}')
|
||||
# gh-124531: The null character should not terminate the format string.
|
||||
self.assertEqual(t.strftime('\0'), '\0')
|
||||
self.assertEqual(t.strftime('\0'*1000), '\0'*1000)
|
||||
self.assertEqual(t.strftime('\0%I%p%Z\0%X'), f'\0{s1}\0{s2}')
|
||||
self.assertEqual(t.strftime('%I%p%Z\0%X\0'), f'{s1}\0{s2}\0')
|
||||
|
||||
def test_format(self):
|
||||
t = self.theclass(1, 2, 3, 4)
|
||||
self.assertEqual(t.__format__(''), str(t))
|
||||
|
@ -4084,9 +4132,8 @@ class TestTimeTZ(TestTime, TZInfoBase, unittest.TestCase):
|
|||
self.assertRaises(TypeError, t.strftime, "%Z")
|
||||
|
||||
# Issue #6697:
|
||||
if '_Fast' in self.__class__.__name__:
|
||||
Badtzname.tz = '\ud800'
|
||||
self.assertRaises(ValueError, t.strftime, "%Z")
|
||||
Badtzname.tz = '\ud800'
|
||||
self.assertEqual(t.strftime("%Z"), '\ud800')
|
||||
|
||||
def test_hash_edge_cases(self):
|
||||
# Offsets that overflow a basic time.
|
||||
|
|
|
@ -181,8 +181,33 @@ class TimeTestCase(unittest.TestCase):
|
|||
self.fail('conversion specifier: %r failed.' % format)
|
||||
|
||||
self.assertRaises(TypeError, time.strftime, b'%S', tt)
|
||||
# embedded null character
|
||||
self.assertRaises(ValueError, time.strftime, '%S\0', tt)
|
||||
|
||||
def test_strftime_special(self):
|
||||
tt = time.gmtime(self.t)
|
||||
s1 = time.strftime('%c', tt)
|
||||
s2 = time.strftime('%B', tt)
|
||||
# gh-52551, gh-78662: Unicode strings should pass through strftime,
|
||||
# independently from locale.
|
||||
self.assertEqual(time.strftime('\U0001f40d', tt), '\U0001f40d')
|
||||
self.assertEqual(time.strftime('\U0001f4bb%c\U0001f40d%B', tt), f'\U0001f4bb{s1}\U0001f40d{s2}')
|
||||
self.assertEqual(time.strftime('%c\U0001f4bb%B\U0001f40d', tt), f'{s1}\U0001f4bb{s2}\U0001f40d')
|
||||
# Lone surrogates should pass through.
|
||||
self.assertEqual(time.strftime('\ud83d', tt), '\ud83d')
|
||||
self.assertEqual(time.strftime('\udc0d', tt), '\udc0d')
|
||||
self.assertEqual(time.strftime('\ud83d%c\udc0d%B', tt), f'\ud83d{s1}\udc0d{s2}')
|
||||
self.assertEqual(time.strftime('%c\ud83d%B\udc0d', tt), f'{s1}\ud83d{s2}\udc0d')
|
||||
self.assertEqual(time.strftime('%c\udc0d%B\ud83d', tt), f'{s1}\udc0d{s2}\ud83d')
|
||||
# Surrogate pairs should not recombine.
|
||||
self.assertEqual(time.strftime('\ud83d\udc0d', tt), '\ud83d\udc0d')
|
||||
self.assertEqual(time.strftime('%c\ud83d\udc0d%B', tt), f'{s1}\ud83d\udc0d{s2}')
|
||||
# Surrogate-escaped bytes should not recombine.
|
||||
self.assertEqual(time.strftime('\udcf0\udc9f\udc90\udc8d', tt), '\udcf0\udc9f\udc90\udc8d')
|
||||
self.assertEqual(time.strftime('%c\udcf0\udc9f\udc90\udc8d%B', tt), f'{s1}\udcf0\udc9f\udc90\udc8d{s2}')
|
||||
# gh-124531: The null character should not terminate the format string.
|
||||
self.assertEqual(time.strftime('\0', tt), '\0')
|
||||
self.assertEqual(time.strftime('\0'*1000, tt), '\0'*1000)
|
||||
self.assertEqual(time.strftime('\0%c\0%B', tt), f'\0{s1}\0{s2}')
|
||||
self.assertEqual(time.strftime('%c\0%B\0', tt), f'{s1}\0{s2}\0')
|
||||
|
||||
def _bounds_checking(self, func):
|
||||
# Make sure that strftime() checks the bounds of the various parts
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue