New version that is much more compact and easier to maintain. (Sjoerd)

This commit is contained in:
Guido van Rossum 1998-10-09 13:27:49 +00:00
parent 3886026bed
commit e33bba8d3d

View file

@ -3,7 +3,7 @@
import string import string
import rfc822 import rfc822
import calendar import calendar
import regex import re
import os import os
import sys import sys
@ -12,6 +12,7 @@ Unparseable = 'mailerdaemon.Unparseable'
class ErrorMessage(rfc822.Message): class ErrorMessage(rfc822.Message):
def __init__(self, fp): def __init__(self, fp):
rfc822.Message.__init__(self, fp) rfc822.Message.__init__(self, fp)
self.sub = ''
def is_warning(self): def is_warning(self):
sub = self.getheader('Subject') sub = self.getheader('Subject')
@ -32,245 +33,120 @@ class ErrorMessage(rfc822.Message):
pass pass
raise Unparseable raise Unparseable
sendmail_pattern = regex.compile('[0-9][0-9][0-9] ') # List of re's or tuples of re's.
def emparse_sendmail(fp, sub): # If a re, it should contain at least a group (?P<email>...) which
while 1: # should refer to the email address. The re can also contain a group
line = fp.readline() # (?P<reason>...) which should refer to the reason (error message).
if not line: # If no reason is present, the emparse_list_reason list is used to
raise Unparseable # find a reason.
line = line[:-1] # If a tuple, the tuple should contain 2 re's. The first re finds a
# location, the second re is repeated one or more times to find
# Check that we're not in the returned message yet # multiple email addresses. The second re is matched (not searched)
if string.lower(line)[:5] == 'from:': # where the previous match ended.
raise Unparseable # The re's are compiled using the re module.
line = string.split(line) emparse_list_list = [
if len(line) > 3 and \ 'error: (?P<reason>unresolvable): (?P<email>.+)',
((line[0] == '-----' and line[1] == 'Transcript') or ('----- The following addresses had permanent fatal errors -----\n',
(line[0] == '---' and line[1] == 'The' and '(?P<email>[^ \n].*)\n( .*\n)?'),
line[2] == 'transcript') or 'remote execution.*\n.*rmail (?P<email>.+)',
(line[0] == 'While' and ('The following recipients did not receive your message:\n\n',
line[1] == 'talking' and ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
line[2] == 'to')): '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)',
# Yes, found it! '^<(?P<email>.*)>:\n(?P<reason>.*)',
break '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
'^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
'^Original-Recipient: rfc822;(?P<email>.*)',
'^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
'^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
'^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
'^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
'^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
]
# compile the re's in the list and store them in-place.
for i in range(len(emparse_list_list)):
x = emparse_list_list[i]
if type(x) is type(''):
x = re.compile(x, re.MULTILINE)
else:
xl = []
for x in x:
xl.append(re.compile(x, re.MULTILINE))
x = tuple(xl)
del xl
emparse_list_list[i] = x
del x
del i
# list of re's used to find reasons (error messages).
# if a string, "<>" is replaced by a copy of the email address.
# The expressions are searched for in order. After the first match,
# no more expressions are searched for. So, order is important.
emparse_list_reason = [
r'^5\d{2} <>\.\.\. (?P<reason>.*)',
'<>\.\.\. (?P<reason>.*)',
re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
]
emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
def emparse_list(fp, sub):
data = fp.read()
res = emparse_list_from.search(data)
if res is None:
from_index = len(data)
else:
from_index = res.start(0)
errors = [] errors = []
found_a_line = 0 emails = []
warnings = 0 reason = None
while 1: for regexp in emparse_list_list:
line = fp.readline() if type(regexp) is type(()):
if not line: res = regexp[0].search(data, 0, from_index)
break if res is not None:
line = line[:-1] try:
if not line: reason = res.group('reason')
continue except IndexError:
if sendmail_pattern.match(line) == 4: pass
# Yes, an error/warning line. Ignore 4, remember 5, stop on rest while 1:
if line[0] == '5': res = regexp[1].match(data, res.end(0), from_index)
errors.append(line) if res is None:
elif line[0] == '4': break
warnings = 1 emails.append(res.group('email'))
else:
raise Unparseable
line = string.split(line)
if line and line[0][:3] == '---':
break
found_a_line = 1
# special case for CWI sendmail
if len(line) > 1 and line[1] == 'Undelivered':
while 1:
line = fp.readline()
if not line:
break break
line = string.strip(line)
if not line:
break
errors.append(line + ': ' + sub)
# Empty transcripts are ok, others without an error are not.
if found_a_line and not (errors or warnings):
raise Unparseable
return errors
def emparse_cts(fp, sub):
while 1:
line = fp.readline()
if not line:
raise Unparseable
line = line[:-1]
# Check that we're not in the returned message yet
if string.lower(line)[:5] == 'from:':
raise Unparseable
line = string.split(line)
if len(line) > 3 and line[0][:2] == '|-' and line[1] == 'Failed' \
and line[2] == 'addresses':
# Yes, found it!
break
errors = []
while 1:
line = fp.readline()
if not line:
break
line = line[:-1]
if not line:
continue
if line[:2] == '|-':
break
errors.append(line)
return errors
def emparse_aol(fp, sub):
while 1:
line = fp.readline()
if not line:
raise Unparseable
line = line[:-1]
if line:
break
exp = 'The mail you sent could not be delivered to:'
if line[:len(exp)] != exp:
raise Unparseable
errors = []
while 1:
line = fp.readline()
if sendmail_pattern.match(line) == 4:
# Yes, an error/warning line. Ignore 4, remember 5, stop on rest
if line[0] == '5':
errors.append(line)
elif line[0] != '4':
raise Unparseable
elif line == '\n':
break
else: else:
raise Unparseable res = regexp.search(data, 0, from_index)
return errors if res is not None:
emails.append(res.group('email'))
def emparse_compuserve(fp, sub): try:
while 1: reason = res.group('reason')
line = fp.readline() except IndexError:
if not line: pass
raise Unparseable break
line = line[:-1] if not emails:
if line:
break
exp = 'Your message could not be delivered for the following reason:'
if line[:len(exp)] != exp:
raise Unparseable raise Unparseable
errors = [] if not reason:
while 1: reason = sub
line = fp.readline() if reason[:15] == 'returned mail: ':
if not line: break reason = reason[15:]
if line[:3] == '---': break for regexp in emparse_list_reason:
line = line[:-1] if type(regexp) is type(''):
if not line: continue for i in range(len(emails)-1,-1,-1):
if line == 'Please resend your message at a later time.': email = emails[i]
continue exp = re.compile(string.join(string.split(regexp, '<>'), re.escape(email)), re.MULTILINE)
line = 'Compuserve: ' + line res = exp.search(data)
errors.append(line) if res is not None:
errors.append(string.join(string.split(string.strip(email)+': '+res.group('reason'))))
del emails[i]
continue
res = regexp.search(data)
if res is not None:
reason = res.group('reason')
break
for email in emails:
errors.append(string.join(string.split(string.strip(email)+': '+reason)))
return errors return errors
prov_pattern = regex.compile('.* | \(.*\)') EMPARSERS = [emparse_list, ]
def emparse_providence(fp, sub):
while 1:
line = fp.readline()
if not line:
raise Unparseable
line = line[:-1]
# Check that we're not in the returned message yet
if string.lower(line)[:5] == 'from:':
raise Unparseable
exp = 'The following errors occurred'
if line[:len(exp)] == exp:
break
errors = []
while 1:
line = fp.readline()
if not line:
break
line = line[:-1]
if not line:
continue
if line[:4] == '----':
break
if prov_pattern.match(line) > 0:
errors.append(prov_pattern.group(1))
if not errors:
raise Unparseable
return errors
def emparse_x400(fp, sub):
exp = 'This report relates to your message:'
while 1:
line = fp.readline()
if not line:
raise Unparseable
line = line[:-1]
# Check that we're not in the returned message yet
if string.lower(line)[:5] == 'from:':
raise Unparseable
if line[:len(exp)] == exp:
break
errors = []
exp = 'Your message was not delivered to'
while 1:
line = fp.readline()
if not line:
break
line = line[:-1]
if not line:
continue
if line[:len(exp)] == exp:
error = string.strip(line[len(exp):])
sep = ': '
while 1:
line = fp.readline()
if not line:
break
line = line[:-1]
if not line:
break
if line[0] == ' ' and line[-1] != ':':
error = error + sep + string.strip(line)
sep = '; '
errors.append(error)
return errors
raise Unparseable
def emparse_passau(fp, sub):
exp = 'Unable to deliver message because'
while 1:
line = fp.readline()
if not line:
raise Unparseable
if string.lower(line)[:5] == 'from:':
raise Unparseable
if line[:len(exp)] == exp:
break
errors = []
exp = 'Returned Text follows'
while 1:
line = fp.readline()
if not line:
raise Unparseable
line = line[:-1]
# Check that we're not in the returned message yet
if string.lower(line)[:5] == 'from:':
raise Unparseable
if not line:
continue
if line[:len(exp)] == exp:
return errors
errors.append(string.strip(line))
EMPARSERS = [emparse_sendmail, emparse_aol, emparse_cts, emparse_compuserve,
emparse_providence, emparse_x400, emparse_passau]
def sort_numeric(a, b): def sort_numeric(a, b):
a = string.atoi(a) a = string.atoi(a)
@ -281,14 +157,14 @@ def sort_numeric(a, b):
def parsedir(dir, modify): def parsedir(dir, modify):
os.chdir(dir) os.chdir(dir)
pat = regex.compile('^[0-9]*$') pat = re.compile('^[0-9]*$')
errordict = {} errordict = {}
errorfirst = {} errorfirst = {}
errorlast = {} errorlast = {}
nok = nwarn = nbad = 0 nok = nwarn = nbad = 0
# find all numeric file names and sort them # find all numeric file names and sort them
files = filter(lambda fn, pat=pat: pat.match(fn) > 0, os.listdir('.')) files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))
files.sort(sort_numeric) files.sort(sort_numeric)
for fn in files: for fn in files:
@ -299,10 +175,12 @@ def parsedir(dir, modify):
print '%s\t%-40s\t'%(fn, sender[1]), print '%s\t%-40s\t'%(fn, sender[1]),
if m.is_warning(): if m.is_warning():
fp.close()
print 'warning only' print 'warning only'
nwarn = nwarn + 1 nwarn = nwarn + 1
if modify: if modify:
os.unlink(fn) os.rename(fn, ','+fn)
## os.unlink(fn)
continue continue
try: try:
@ -310,6 +188,7 @@ def parsedir(dir, modify):
except Unparseable: except Unparseable:
print '** Not parseable' print '** Not parseable'
nbad = nbad + 1 nbad = nbad + 1
fp.close()
continue continue
print len(errors), 'errors' print len(errors), 'errors'
@ -327,16 +206,22 @@ def parsedir(dir, modify):
errordict[e] = errordict[e] + 1 errordict[e] = errordict[e] + 1
errorlast[e] = '%s (%s)' % (fn, date) errorlast[e] = '%s (%s)' % (fn, date)
fp.close()
nok = nok + 1 nok = nok + 1
if modify: if modify:
os.unlink(fn) os.rename(fn, ','+fn)
## os.unlink(fn)
print '--------------' print '--------------'
print nok, 'files parsed,',nwarn,'files warning-only,', print nok, 'files parsed,',nwarn,'files warning-only,',
print nbad,'files unparseable' print nbad,'files unparseable'
print '--------------' print '--------------'
list = []
for e in errordict.keys(): for e in errordict.keys():
print errordict[e], errorfirst[e], '-', errorlast[e], '\t', e list.append(errordict[e], errorfirst[e], errorlast[e], e)
list.sort()
for num, first, last, e in list:
print '%d %s - %s\t%s' % (num, first, last, e)
def main(): def main():
modify = 0 modify = 0