mirror of
https://github.com/python/cpython.git
synced 2025-12-04 00:30:19 +00:00
Locale data that contains regex metacharacters are now properly escaped.
Closes bug #1039270.
This commit is contained in:
parent
579b3e2416
commit
4f35c71543
3 changed files with 22 additions and 2 deletions
|
|
@ -15,6 +15,7 @@ import locale
|
||||||
import calendar
|
import calendar
|
||||||
from re import compile as re_compile
|
from re import compile as re_compile
|
||||||
from re import IGNORECASE
|
from re import IGNORECASE
|
||||||
|
from re import escape as re_escape
|
||||||
from datetime import date as datetime_date
|
from datetime import date as datetime_date
|
||||||
try:
|
try:
|
||||||
from thread import allocate_lock as _thread_allocate_lock
|
from thread import allocate_lock as _thread_allocate_lock
|
||||||
|
|
@ -232,7 +233,7 @@ class TimeRE(dict):
|
||||||
return ''
|
return ''
|
||||||
to_convert = to_convert[:]
|
to_convert = to_convert[:]
|
||||||
to_convert.sort(key=len, reverse=True)
|
to_convert.sort(key=len, reverse=True)
|
||||||
regex = '|'.join(to_convert)
|
regex = '|'.join(re_escape(stuff) for stuff in to_convert)
|
||||||
regex = '(?P<%s>%s' % (directive, regex)
|
regex = '(?P<%s>%s' % (directive, regex)
|
||||||
return '%s)' % regex
|
return '%s)' % regex
|
||||||
|
|
||||||
|
|
@ -245,7 +246,8 @@ class TimeRE(dict):
|
||||||
"""
|
"""
|
||||||
processed_format = ''
|
processed_format = ''
|
||||||
# The sub() call escapes all characters that might be misconstrued
|
# The sub() call escapes all characters that might be misconstrued
|
||||||
# as regex syntax.
|
# as regex syntax. Cannot use re.escape since we have to deal with
|
||||||
|
# format directives (%m, etc.).
|
||||||
regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
|
regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
|
||||||
format = regex_chars.sub(r"\\\1", format)
|
format = regex_chars.sub(r"\\\1", format)
|
||||||
whitespace_replacement = re_compile('\s+')
|
whitespace_replacement = re_compile('\s+')
|
||||||
|
|
|
||||||
|
|
@ -176,6 +176,19 @@ class TimeRETests(unittest.TestCase):
|
||||||
found = compiled_re.match("\w+ 10")
|
found = compiled_re.match("\w+ 10")
|
||||||
self.failUnless(found, "Escaping failed of format '\w+ 10'")
|
self.failUnless(found, "Escaping failed of format '\w+ 10'")
|
||||||
|
|
||||||
|
def test_locale_data_w_regex_metacharacters(self):
|
||||||
|
# Check that if locale data contains regex metacharacters they are
|
||||||
|
# escaped properly.
|
||||||
|
# Discovered by bug #1039270 .
|
||||||
|
locale_time = _strptime.LocaleTime()
|
||||||
|
locale_time.timezone = (frozenset(("utc", "gmt",
|
||||||
|
"Tokyo (standard time)")),
|
||||||
|
frozenset("Tokyo (daylight time)"))
|
||||||
|
time_re = _strptime.TimeRE(locale_time)
|
||||||
|
self.failUnless(time_re.compile("%Z").match("Tokyo (standard time)"),
|
||||||
|
"locale data that contains regex metacharacters is not"
|
||||||
|
" properly escaped")
|
||||||
|
|
||||||
class StrptimeTests(unittest.TestCase):
|
class StrptimeTests(unittest.TestCase):
|
||||||
"""Tests for _strptime.strptime."""
|
"""Tests for _strptime.strptime."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,11 @@ Extension modules
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- time.strptime() now properly escapes timezones and all other locale-specific
|
||||||
|
strings for regex-specific symbols. Was breaking under Japanese Windows when
|
||||||
|
the timezone was specified as "Tokyo (standard time)".
|
||||||
|
Closes bug #1039270.
|
||||||
|
|
||||||
- Updates for the email package:
|
- Updates for the email package:
|
||||||
+ All deprecated APIs that in email 2.x issued warnings have been removed:
|
+ All deprecated APIs that in email 2.x issued warnings have been removed:
|
||||||
_encoder argument to the MIMEText constructor, Message.add_payload(),
|
_encoder argument to the MIMEText constructor, Message.add_payload(),
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue