mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Patch by Mike Meyer:
Extended the rfc822 parsedate routines to handle the cases they failed on in an archive of ~37,000 messages. I believe the changes are compatible, in that all previously correct parsing are still correct. [I still see problems with some messages, but no showstoppers.]
This commit is contained in:
parent
9a4d63730e
commit
db01ee0e22
1 changed files with 19 additions and 5 deletions
|
@ -748,9 +748,11 @@ def dump_address_pair(pair):
|
||||||
|
|
||||||
# Parse a date field
|
# Parse a date field
|
||||||
|
|
||||||
_monthnames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
|
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
|
||||||
'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
'aug', 'sep', 'oct', 'nov', 'dec',
|
||||||
_daynames = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
'january', 'february', 'march', 'april', 'may', 'june', 'july',
|
||||||
|
'august', 'september', 'october', 'november', 'december']
|
||||||
|
_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
|
||||||
|
|
||||||
# The timezone table does not include the military time zones defined
|
# The timezone table does not include the military time zones defined
|
||||||
# in RFC822, other than Z. According to RFC1123, the description in
|
# in RFC822, other than Z. According to RFC1123, the description in
|
||||||
|
@ -773,7 +775,7 @@ def parsedate_tz(data):
|
||||||
Accounts for military timezones.
|
Accounts for military timezones.
|
||||||
"""
|
"""
|
||||||
data = string.split(data)
|
data = string.split(data)
|
||||||
if data[0][-1] == ',' or data[0] in _daynames:
|
if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames:
|
||||||
# There's a dayname here. Skip it
|
# There's a dayname here. Skip it
|
||||||
del data[0]
|
del data[0]
|
||||||
if len(data) == 3: # RFC 850 date, deprecated
|
if len(data) == 3: # RFC 850 date, deprecated
|
||||||
|
@ -791,11 +793,23 @@ def parsedate_tz(data):
|
||||||
return None
|
return None
|
||||||
data = data[:5]
|
data = data[:5]
|
||||||
[dd, mm, yy, tm, tz] = data
|
[dd, mm, yy, tm, tz] = data
|
||||||
|
mm = string.lower(mm)
|
||||||
if not mm in _monthnames:
|
if not mm in _monthnames:
|
||||||
dd, mm, yy, tm, tz = mm, dd, tm, yy, tz
|
dd, mm = mm, string.lower(dd)
|
||||||
if not mm in _monthnames:
|
if not mm in _monthnames:
|
||||||
return None
|
return None
|
||||||
mm = _monthnames.index(mm)+1
|
mm = _monthnames.index(mm)+1
|
||||||
|
if dd[-1] == ',':
|
||||||
|
dd = dd[:-1]
|
||||||
|
i = string.find(yy, ':')
|
||||||
|
if i > 0:
|
||||||
|
yy, tm = tm, yy
|
||||||
|
if yy[-1] == ',':
|
||||||
|
yy = yy[:-1]
|
||||||
|
if yy[0] not in string.digits:
|
||||||
|
yy, tz = tz, yy
|
||||||
|
if tm[-1] == ',':
|
||||||
|
tm = tm[:-1]
|
||||||
tm = string.splitfields(tm, ':')
|
tm = string.splitfields(tm, ':')
|
||||||
if len(tm) == 2:
|
if len(tm) == 2:
|
||||||
[thh, tmm] = tm
|
[thh, tmm] = tm
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue