17db96d56Sopenharmony_ci#!/usr/bin/env python3 27db96d56Sopenharmony_ci"""Classes to parse mailer-daemon messages.""" 37db96d56Sopenharmony_ci 47db96d56Sopenharmony_ciimport calendar 57db96d56Sopenharmony_ciimport email.message 67db96d56Sopenharmony_ciimport re 77db96d56Sopenharmony_ciimport os 87db96d56Sopenharmony_ciimport sys 97db96d56Sopenharmony_ci 107db96d56Sopenharmony_ci 117db96d56Sopenharmony_ciclass Unparseable(Exception): 127db96d56Sopenharmony_ci pass 137db96d56Sopenharmony_ci 147db96d56Sopenharmony_ci 157db96d56Sopenharmony_ciclass ErrorMessage(email.message.Message): 167db96d56Sopenharmony_ci def __init__(self): 177db96d56Sopenharmony_ci email.message.Message.__init__(self) 187db96d56Sopenharmony_ci self.sub = '' 197db96d56Sopenharmony_ci 207db96d56Sopenharmony_ci def is_warning(self): 217db96d56Sopenharmony_ci sub = self.get('Subject') 227db96d56Sopenharmony_ci if not sub: 237db96d56Sopenharmony_ci return 0 247db96d56Sopenharmony_ci sub = sub.lower() 257db96d56Sopenharmony_ci if sub.startswith('waiting mail'): 267db96d56Sopenharmony_ci return 1 277db96d56Sopenharmony_ci if 'warning' in sub: 287db96d56Sopenharmony_ci return 1 297db96d56Sopenharmony_ci self.sub = sub 307db96d56Sopenharmony_ci return 0 317db96d56Sopenharmony_ci 327db96d56Sopenharmony_ci def get_errors(self): 337db96d56Sopenharmony_ci for p in EMPARSERS: 347db96d56Sopenharmony_ci self.rewindbody() 357db96d56Sopenharmony_ci try: 367db96d56Sopenharmony_ci return p(self.fp, self.sub) 377db96d56Sopenharmony_ci except Unparseable: 387db96d56Sopenharmony_ci pass 397db96d56Sopenharmony_ci raise Unparseable 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_ci# List of re's or tuples of re's. 427db96d56Sopenharmony_ci# If a re, it should contain at least a group (?P<email>...) which 437db96d56Sopenharmony_ci# should refer to the email address. The re can also contain a group 447db96d56Sopenharmony_ci# (?P<reason>...) which should refer to the reason (error message). 457db96d56Sopenharmony_ci# If no reason is present, the emparse_list_reason list is used to 467db96d56Sopenharmony_ci# find a reason. 477db96d56Sopenharmony_ci# If a tuple, the tuple should contain 2 re's. The first re finds a 487db96d56Sopenharmony_ci# location, the second re is repeated one or more times to find 497db96d56Sopenharmony_ci# multiple email addresses. The second re is matched (not searched) 507db96d56Sopenharmony_ci# where the previous match ended. 517db96d56Sopenharmony_ci# The re's are compiled using the re module. 527db96d56Sopenharmony_ciemparse_list_list = [ 537db96d56Sopenharmony_ci 'error: (?P<reason>unresolvable): (?P<email>.+)', 547db96d56Sopenharmony_ci ('----- The following addresses had permanent fatal errors -----\n', 557db96d56Sopenharmony_ci '(?P<email>[^ \n].*)\n( .*\n)?'), 567db96d56Sopenharmony_ci 'remote execution.*\n.*rmail (?P<email>.+)', 577db96d56Sopenharmony_ci ('The following recipients did not receive your message:\n\n', 587db96d56Sopenharmony_ci ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'), 597db96d56Sopenharmony_ci '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)', 607db96d56Sopenharmony_ci '^<(?P<email>.*)>:\n(?P<reason>.*)', 617db96d56Sopenharmony_ci '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)', 627db96d56Sopenharmony_ci '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)', 637db96d56Sopenharmony_ci '^Original-Recipient: rfc822;(?P<email>.*)', 647db96d56Sopenharmony_ci '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)', 657db96d56Sopenharmony_ci '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)', 667db96d56Sopenharmony_ci '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)', 677db96d56Sopenharmony_ci '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)', 687db96d56Sopenharmony_ci '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n', 697db96d56Sopenharmony_ci ] 707db96d56Sopenharmony_ci# compile the re's in the list and store them in-place. 717db96d56Sopenharmony_cifor i in range(len(emparse_list_list)): 727db96d56Sopenharmony_ci x = emparse_list_list[i] 737db96d56Sopenharmony_ci if type(x) is type(''): 747db96d56Sopenharmony_ci x = re.compile(x, re.MULTILINE) 757db96d56Sopenharmony_ci else: 767db96d56Sopenharmony_ci xl = [] 777db96d56Sopenharmony_ci for x in x: 787db96d56Sopenharmony_ci xl.append(re.compile(x, re.MULTILINE)) 797db96d56Sopenharmony_ci x = tuple(xl) 807db96d56Sopenharmony_ci del xl 817db96d56Sopenharmony_ci emparse_list_list[i] = x 827db96d56Sopenharmony_ci del x 837db96d56Sopenharmony_cidel i 847db96d56Sopenharmony_ci 857db96d56Sopenharmony_ci# list of re's used to find reasons (error messages). 867db96d56Sopenharmony_ci# if a string, "<>" is replaced by a copy of the email address. 877db96d56Sopenharmony_ci# The expressions are searched for in order. After the first match, 887db96d56Sopenharmony_ci# no more expressions are searched for. So, order is important. 897db96d56Sopenharmony_ciemparse_list_reason = [ 907db96d56Sopenharmony_ci r'^5\d{2} <>\.\.\. (?P<reason>.*)', 917db96d56Sopenharmony_ci r'<>\.\.\. (?P<reason>.*)', 927db96d56Sopenharmony_ci re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE), 937db96d56Sopenharmony_ci re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'), 947db96d56Sopenharmony_ci re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE), 957db96d56Sopenharmony_ci ] 967db96d56Sopenharmony_ciemparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE) 977db96d56Sopenharmony_cidef emparse_list(fp, sub): 987db96d56Sopenharmony_ci data = fp.read() 997db96d56Sopenharmony_ci res = emparse_list_from.search(data) 1007db96d56Sopenharmony_ci if res is None: 1017db96d56Sopenharmony_ci from_index = len(data) 1027db96d56Sopenharmony_ci else: 1037db96d56Sopenharmony_ci from_index = res.start(0) 1047db96d56Sopenharmony_ci errors = [] 1057db96d56Sopenharmony_ci emails = [] 1067db96d56Sopenharmony_ci reason = None 1077db96d56Sopenharmony_ci for regexp in emparse_list_list: 1087db96d56Sopenharmony_ci if type(regexp) is type(()): 1097db96d56Sopenharmony_ci res = regexp[0].search(data, 0, from_index) 1107db96d56Sopenharmony_ci if res is not None: 1117db96d56Sopenharmony_ci try: 1127db96d56Sopenharmony_ci reason = res.group('reason') 1137db96d56Sopenharmony_ci except IndexError: 1147db96d56Sopenharmony_ci pass 1157db96d56Sopenharmony_ci while 1: 1167db96d56Sopenharmony_ci res = regexp[1].match(data, res.end(0), from_index) 1177db96d56Sopenharmony_ci if res is None: 1187db96d56Sopenharmony_ci break 1197db96d56Sopenharmony_ci emails.append(res.group('email')) 1207db96d56Sopenharmony_ci break 1217db96d56Sopenharmony_ci else: 1227db96d56Sopenharmony_ci res = regexp.search(data, 0, from_index) 1237db96d56Sopenharmony_ci if res is not None: 1247db96d56Sopenharmony_ci emails.append(res.group('email')) 1257db96d56Sopenharmony_ci try: 1267db96d56Sopenharmony_ci reason = res.group('reason') 1277db96d56Sopenharmony_ci except IndexError: 1287db96d56Sopenharmony_ci pass 1297db96d56Sopenharmony_ci break 1307db96d56Sopenharmony_ci if not emails: 1317db96d56Sopenharmony_ci raise Unparseable 1327db96d56Sopenharmony_ci if not reason: 1337db96d56Sopenharmony_ci reason = sub 1347db96d56Sopenharmony_ci if reason[:15] == 'returned mail: ': 1357db96d56Sopenharmony_ci reason = reason[15:] 1367db96d56Sopenharmony_ci for regexp in emparse_list_reason: 1377db96d56Sopenharmony_ci if type(regexp) is type(''): 1387db96d56Sopenharmony_ci for i in range(len(emails)-1,-1,-1): 1397db96d56Sopenharmony_ci email = emails[i] 1407db96d56Sopenharmony_ci exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE) 1417db96d56Sopenharmony_ci res = exp.search(data) 1427db96d56Sopenharmony_ci if res is not None: 1437db96d56Sopenharmony_ci errors.append(' '.join((email.strip()+': '+res.group('reason')).split())) 1447db96d56Sopenharmony_ci del emails[i] 1457db96d56Sopenharmony_ci continue 1467db96d56Sopenharmony_ci res = regexp.search(data) 1477db96d56Sopenharmony_ci if res is not None: 1487db96d56Sopenharmony_ci reason = res.group('reason') 1497db96d56Sopenharmony_ci break 1507db96d56Sopenharmony_ci for email in emails: 1517db96d56Sopenharmony_ci errors.append(' '.join((email.strip()+': '+reason).split())) 1527db96d56Sopenharmony_ci return errors 1537db96d56Sopenharmony_ci 1547db96d56Sopenharmony_ciEMPARSERS = [emparse_list] 1557db96d56Sopenharmony_ci 1567db96d56Sopenharmony_cidef sort_numeric(a, b): 1577db96d56Sopenharmony_ci a = int(a) 1587db96d56Sopenharmony_ci b = int(b) 1597db96d56Sopenharmony_ci if a < b: 1607db96d56Sopenharmony_ci return -1 1617db96d56Sopenharmony_ci elif a > b: 1627db96d56Sopenharmony_ci return 1 1637db96d56Sopenharmony_ci else: 1647db96d56Sopenharmony_ci return 0 1657db96d56Sopenharmony_ci 1667db96d56Sopenharmony_cidef parsedir(dir, modify): 1677db96d56Sopenharmony_ci os.chdir(dir) 1687db96d56Sopenharmony_ci pat = re.compile('^[0-9]*$') 1697db96d56Sopenharmony_ci errordict = {} 1707db96d56Sopenharmony_ci errorfirst = {} 1717db96d56Sopenharmony_ci errorlast = {} 1727db96d56Sopenharmony_ci nok = nwarn = nbad = 0 1737db96d56Sopenharmony_ci 1747db96d56Sopenharmony_ci # find all numeric file names and sort them 1757db96d56Sopenharmony_ci files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))) 1767db96d56Sopenharmony_ci files.sort(sort_numeric) 1777db96d56Sopenharmony_ci 1787db96d56Sopenharmony_ci for fn in files: 1797db96d56Sopenharmony_ci # Lets try to parse the file. 1807db96d56Sopenharmony_ci fp = open(fn) 1817db96d56Sopenharmony_ci m = email.message_from_file(fp, _class=ErrorMessage) 1827db96d56Sopenharmony_ci sender = m.getaddr('From') 1837db96d56Sopenharmony_ci print('%s\t%-40s\t'%(fn, sender[1]), end=' ') 1847db96d56Sopenharmony_ci 1857db96d56Sopenharmony_ci if m.is_warning(): 1867db96d56Sopenharmony_ci fp.close() 1877db96d56Sopenharmony_ci print('warning only') 1887db96d56Sopenharmony_ci nwarn = nwarn + 1 1897db96d56Sopenharmony_ci if modify: 1907db96d56Sopenharmony_ci os.rename(fn, ','+fn) 1917db96d56Sopenharmony_ci## os.unlink(fn) 1927db96d56Sopenharmony_ci continue 1937db96d56Sopenharmony_ci 1947db96d56Sopenharmony_ci try: 1957db96d56Sopenharmony_ci errors = m.get_errors() 1967db96d56Sopenharmony_ci except Unparseable: 1977db96d56Sopenharmony_ci print('** Not parseable') 1987db96d56Sopenharmony_ci nbad = nbad + 1 1997db96d56Sopenharmony_ci fp.close() 2007db96d56Sopenharmony_ci continue 2017db96d56Sopenharmony_ci print(len(errors), 'errors') 2027db96d56Sopenharmony_ci 2037db96d56Sopenharmony_ci # Remember them 2047db96d56Sopenharmony_ci for e in errors: 2057db96d56Sopenharmony_ci try: 2067db96d56Sopenharmony_ci mm, dd = m.getdate('date')[1:1+2] 2077db96d56Sopenharmony_ci date = '%s %02d' % (calendar.month_abbr[mm], dd) 2087db96d56Sopenharmony_ci except: 2097db96d56Sopenharmony_ci date = '??????' 2107db96d56Sopenharmony_ci if e not in errordict: 2117db96d56Sopenharmony_ci errordict[e] = 1 2127db96d56Sopenharmony_ci errorfirst[e] = '%s (%s)' % (fn, date) 2137db96d56Sopenharmony_ci else: 2147db96d56Sopenharmony_ci errordict[e] = errordict[e] + 1 2157db96d56Sopenharmony_ci errorlast[e] = '%s (%s)' % (fn, date) 2167db96d56Sopenharmony_ci 2177db96d56Sopenharmony_ci fp.close() 2187db96d56Sopenharmony_ci nok = nok + 1 2197db96d56Sopenharmony_ci if modify: 2207db96d56Sopenharmony_ci os.rename(fn, ','+fn) 2217db96d56Sopenharmony_ci## os.unlink(fn) 2227db96d56Sopenharmony_ci 2237db96d56Sopenharmony_ci print('--------------') 2247db96d56Sopenharmony_ci print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ') 2257db96d56Sopenharmony_ci print(nbad,'files unparseable') 2267db96d56Sopenharmony_ci print('--------------') 2277db96d56Sopenharmony_ci list = [] 2287db96d56Sopenharmony_ci for e in errordict.keys(): 2297db96d56Sopenharmony_ci list.append((errordict[e], errorfirst[e], errorlast[e], e)) 2307db96d56Sopenharmony_ci list.sort() 2317db96d56Sopenharmony_ci for num, first, last, e in list: 2327db96d56Sopenharmony_ci print('%d %s - %s\t%s' % (num, first, last, e)) 2337db96d56Sopenharmony_ci 2347db96d56Sopenharmony_cidef main(): 2357db96d56Sopenharmony_ci modify = 0 2367db96d56Sopenharmony_ci if len(sys.argv) > 1 and sys.argv[1] == '-d': 2377db96d56Sopenharmony_ci modify = 1 2387db96d56Sopenharmony_ci del sys.argv[1] 2397db96d56Sopenharmony_ci if len(sys.argv) > 1: 2407db96d56Sopenharmony_ci for folder in sys.argv[1:]: 2417db96d56Sopenharmony_ci parsedir(folder, modify) 2427db96d56Sopenharmony_ci else: 2437db96d56Sopenharmony_ci parsedir('/ufs/jack/Mail/errorsinbox', modify) 2447db96d56Sopenharmony_ci 2457db96d56Sopenharmony_ciif __name__ == '__main__' or sys.argv[0] == __name__: 2467db96d56Sopenharmony_ci main() 247