gh-53203: Fix strptime() for %c, %x and %X formats on some locales (#135971)

* Add detection of decimal non-ASCII alt digits.
* Add support of non-decimal alt digits on locale lzh_TW.
* Accept only numbers in correct range if alt digits are known.
* Fix bug in detecting the position of the week day name on locales byn_ER and wal_ET.
* Fix support of single-digit hour on locales ar_SA and bg_BG.
* Add support for %T, %R, %r, %C, %OC.
* Prepare code to use nl_langinfo().
This commit is contained in:
Serhiy Storchaka 2025-06-27 10:50:59 +03:00 committed by GitHub
parent 0c6c09b737
commit 07183ebce3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 164 additions and 56 deletions

View file

@ -221,14 +221,16 @@ class StrptimeTests(unittest.TestCase):
self.assertRaises(ValueError, _strptime._strptime_time, data_string="%d",
format="%A")
for bad_format in ("%", "% ", "%\n"):
with self.assertRaisesRegex(ValueError, "stray % in format "):
with (self.subTest(format=bad_format),
self.assertRaisesRegex(ValueError, "stray % in format ")):
_strptime._strptime_time("2005", bad_format)
for bad_format in ("%e", "%Oe", "%O", "%O ", "%Ee", "%E", "%E ",
"%.", "%+", "%_", "%~", "%\\",
for bad_format in ("%i", "%Oi", "%O", "%O ", "%Ee", "%E", "%E ",
"%.", "%+", "%~", "%\\",
"%O.", "%O+", "%O_", "%O~", "%O\\"):
directive = bad_format[1:].rstrip()
with self.assertRaisesRegex(ValueError,
f"'{re.escape(directive)}' is a bad directive in format "):
with (self.subTest(format=bad_format),
self.assertRaisesRegex(ValueError,
f"'{re.escape(directive)}' is a bad directive in format ")):
_strptime._strptime_time("2005", bad_format)
msg_week_no_year_or_weekday = r"ISO week directive '%V' must be used with " \
@ -480,13 +482,11 @@ class StrptimeTests(unittest.TestCase):
# * Year is not included: ha_NG.
# * Use non-Gregorian calendar: lo_LA, thai, th_TH.
# On Windows: ar_IN, ar_SA, fa_IR, ps_AF.
#
# BUG: Generates regexp that does not match the current date and time
# for lzh_TW.
@run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP',
'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG',
'csb_PL', 'br_FR', 'gez_ET', 'brx_IN',
'my_MM', 'or_IN', 'shn_MM', 'az_IR')
'my_MM', 'or_IN', 'shn_MM', 'az_IR',
'byn_ER', 'wal_ET', 'lzh_TW')
def test_date_time_locale(self):
# Test %c directive
loc = locale.getlocale(locale.LC_TIME)[0]
@ -525,11 +525,9 @@ class StrptimeTests(unittest.TestCase):
# NB: Does not roundtrip because use non-Gregorian calendar:
# lo_LA, thai, th_TH. On Windows: ar_IN, ar_SA, fa_IR, ps_AF.
# BUG: Generates regexp that does not match the current date
# for lzh_TW.
@run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP',
'he_IL', 'eu_ES', 'ar_AE',
'az_IR', 'my_MM', 'or_IN', 'shn_MM')
'az_IR', 'my_MM', 'or_IN', 'shn_MM', 'lzh_TW')
def test_date_locale(self):
# Test %x directive
now = time.time()
@ -546,7 +544,7 @@ class StrptimeTests(unittest.TestCase):
# NB: Dates before 1969 do not roundtrip on many locales, including C.
@unittest.skipIf(support.linked_to_musl(), "musl libc issue, bpo-46390")
@run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP',
'eu_ES', 'ar_AE', 'my_MM', 'shn_MM')
'eu_ES', 'ar_AE', 'my_MM', 'shn_MM', 'lzh_TW')
def test_date_locale2(self):
# Test %x directive
loc = locale.getlocale(locale.LC_TIME)[0]
@ -562,11 +560,11 @@ class StrptimeTests(unittest.TestCase):
# norwegian, nynorsk.
# * Hours are in 12-hour notation without AM/PM indication: hy_AM,
# ms_MY, sm_WS.
# BUG: Generates regexp that does not match the current time for lzh_TW.
@run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP',
'aa_ET', 'am_ET', 'az_IR', 'byn_ER', 'fa_IR', 'gez_ET',
'my_MM', 'om_ET', 'or_IN', 'shn_MM', 'sid_ET', 'so_SO',
'ti_ET', 'tig_ER', 'wal_ET')
'ti_ET', 'tig_ER', 'wal_ET', 'lzh_TW',
'ar_SA', 'bg_BG')
def test_time_locale(self):
# Test %X directive
loc = locale.getlocale(locale.LC_TIME)[0]