17db96d56Sopenharmony_ci#!/usr/bin/env python3
27db96d56Sopenharmony_ci"""Classes to parse mailer-daemon messages."""
37db96d56Sopenharmony_ci
47db96d56Sopenharmony_ciimport calendar
57db96d56Sopenharmony_ciimport email.message
67db96d56Sopenharmony_ciimport re
77db96d56Sopenharmony_ciimport os
87db96d56Sopenharmony_ciimport sys
97db96d56Sopenharmony_ci
107db96d56Sopenharmony_ci
117db96d56Sopenharmony_ciclass Unparseable(Exception):
127db96d56Sopenharmony_ci    pass
137db96d56Sopenharmony_ci
147db96d56Sopenharmony_ci
157db96d56Sopenharmony_ciclass ErrorMessage(email.message.Message):
167db96d56Sopenharmony_ci    def __init__(self):
177db96d56Sopenharmony_ci        email.message.Message.__init__(self)
187db96d56Sopenharmony_ci        self.sub = ''
197db96d56Sopenharmony_ci
207db96d56Sopenharmony_ci    def is_warning(self):
217db96d56Sopenharmony_ci        sub = self.get('Subject')
227db96d56Sopenharmony_ci        if not sub:
237db96d56Sopenharmony_ci            return 0
247db96d56Sopenharmony_ci        sub = sub.lower()
257db96d56Sopenharmony_ci        if sub.startswith('waiting mail'):
267db96d56Sopenharmony_ci            return 1
277db96d56Sopenharmony_ci        if 'warning' in sub:
287db96d56Sopenharmony_ci            return 1
297db96d56Sopenharmony_ci        self.sub = sub
307db96d56Sopenharmony_ci        return 0
317db96d56Sopenharmony_ci
327db96d56Sopenharmony_ci    def get_errors(self):
337db96d56Sopenharmony_ci        for p in EMPARSERS:
347db96d56Sopenharmony_ci            self.rewindbody()
357db96d56Sopenharmony_ci            try:
367db96d56Sopenharmony_ci                return p(self.fp, self.sub)
377db96d56Sopenharmony_ci            except Unparseable:
387db96d56Sopenharmony_ci                pass
397db96d56Sopenharmony_ci        raise Unparseable
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ci# List of re's or tuples of re's.
427db96d56Sopenharmony_ci# If a re, it should contain at least a group (?P<email>...) which
437db96d56Sopenharmony_ci# should refer to the email address.  The re can also contain a group
447db96d56Sopenharmony_ci# (?P<reason>...) which should refer to the reason (error message).
457db96d56Sopenharmony_ci# If no reason is present, the emparse_list_reason list is used to
467db96d56Sopenharmony_ci# find a reason.
477db96d56Sopenharmony_ci# If a tuple, the tuple should contain 2 re's.  The first re finds a
487db96d56Sopenharmony_ci# location, the second re is repeated one or more times to find
497db96d56Sopenharmony_ci# multiple email addresses.  The second re is matched (not searched)
507db96d56Sopenharmony_ci# where the previous match ended.
517db96d56Sopenharmony_ci# The re's are compiled using the re module.
527db96d56Sopenharmony_ciemparse_list_list = [
537db96d56Sopenharmony_ci    'error: (?P<reason>unresolvable): (?P<email>.+)',
547db96d56Sopenharmony_ci    ('----- The following addresses had permanent fatal errors -----\n',
557db96d56Sopenharmony_ci     '(?P<email>[^ \n].*)\n( .*\n)?'),
567db96d56Sopenharmony_ci    'remote execution.*\n.*rmail (?P<email>.+)',
577db96d56Sopenharmony_ci    ('The following recipients did not receive your message:\n\n',
587db96d56Sopenharmony_ci     ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
597db96d56Sopenharmony_ci    '------- Failure Reasons  --------\n\n(?P<reason>.*)\n(?P<email>.*)',
607db96d56Sopenharmony_ci    '^<(?P<email>.*)>:\n(?P<reason>.*)',
617db96d56Sopenharmony_ci    '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
627db96d56Sopenharmony_ci    '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
637db96d56Sopenharmony_ci    '^Original-Recipient: rfc822;(?P<email>.*)',
647db96d56Sopenharmony_ci    '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
657db96d56Sopenharmony_ci    '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
667db96d56Sopenharmony_ci    '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
677db96d56Sopenharmony_ci    '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
687db96d56Sopenharmony_ci    '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
697db96d56Sopenharmony_ci    ]
707db96d56Sopenharmony_ci# compile the re's in the list and store them in-place.
717db96d56Sopenharmony_cifor i in range(len(emparse_list_list)):
727db96d56Sopenharmony_ci    x = emparse_list_list[i]
737db96d56Sopenharmony_ci    if type(x) is type(''):
747db96d56Sopenharmony_ci        x = re.compile(x, re.MULTILINE)
757db96d56Sopenharmony_ci    else:
767db96d56Sopenharmony_ci        xl = []
777db96d56Sopenharmony_ci        for x in x:
787db96d56Sopenharmony_ci            xl.append(re.compile(x, re.MULTILINE))
797db96d56Sopenharmony_ci        x = tuple(xl)
807db96d56Sopenharmony_ci        del xl
817db96d56Sopenharmony_ci    emparse_list_list[i] = x
827db96d56Sopenharmony_ci    del x
837db96d56Sopenharmony_cidel i
847db96d56Sopenharmony_ci
857db96d56Sopenharmony_ci# list of re's used to find reasons (error messages).
867db96d56Sopenharmony_ci# if a string, "<>" is replaced by a copy of the email address.
877db96d56Sopenharmony_ci# The expressions are searched for in order.  After the first match,
887db96d56Sopenharmony_ci# no more expressions are searched for.  So, order is important.
897db96d56Sopenharmony_ciemparse_list_reason = [
907db96d56Sopenharmony_ci    r'^5\d{2} <>\.\.\. (?P<reason>.*)',
917db96d56Sopenharmony_ci    r'<>\.\.\. (?P<reason>.*)',
927db96d56Sopenharmony_ci    re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
937db96d56Sopenharmony_ci    re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
947db96d56Sopenharmony_ci    re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
957db96d56Sopenharmony_ci    ]
967db96d56Sopenharmony_ciemparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
977db96d56Sopenharmony_cidef emparse_list(fp, sub):
987db96d56Sopenharmony_ci    data = fp.read()
997db96d56Sopenharmony_ci    res = emparse_list_from.search(data)
1007db96d56Sopenharmony_ci    if res is None:
1017db96d56Sopenharmony_ci        from_index = len(data)
1027db96d56Sopenharmony_ci    else:
1037db96d56Sopenharmony_ci        from_index = res.start(0)
1047db96d56Sopenharmony_ci    errors = []
1057db96d56Sopenharmony_ci    emails = []
1067db96d56Sopenharmony_ci    reason = None
1077db96d56Sopenharmony_ci    for regexp in emparse_list_list:
1087db96d56Sopenharmony_ci        if type(regexp) is type(()):
1097db96d56Sopenharmony_ci            res = regexp[0].search(data, 0, from_index)
1107db96d56Sopenharmony_ci            if res is not None:
1117db96d56Sopenharmony_ci                try:
1127db96d56Sopenharmony_ci                    reason = res.group('reason')
1137db96d56Sopenharmony_ci                except IndexError:
1147db96d56Sopenharmony_ci                    pass
1157db96d56Sopenharmony_ci                while 1:
1167db96d56Sopenharmony_ci                    res = regexp[1].match(data, res.end(0), from_index)
1177db96d56Sopenharmony_ci                    if res is None:
1187db96d56Sopenharmony_ci                        break
1197db96d56Sopenharmony_ci                    emails.append(res.group('email'))
1207db96d56Sopenharmony_ci                break
1217db96d56Sopenharmony_ci        else:
1227db96d56Sopenharmony_ci            res = regexp.search(data, 0, from_index)
1237db96d56Sopenharmony_ci            if res is not None:
1247db96d56Sopenharmony_ci                emails.append(res.group('email'))
1257db96d56Sopenharmony_ci                try:
1267db96d56Sopenharmony_ci                    reason = res.group('reason')
1277db96d56Sopenharmony_ci                except IndexError:
1287db96d56Sopenharmony_ci                    pass
1297db96d56Sopenharmony_ci                break
1307db96d56Sopenharmony_ci    if not emails:
1317db96d56Sopenharmony_ci        raise Unparseable
1327db96d56Sopenharmony_ci    if not reason:
1337db96d56Sopenharmony_ci        reason = sub
1347db96d56Sopenharmony_ci        if reason[:15] == 'returned mail: ':
1357db96d56Sopenharmony_ci            reason = reason[15:]
1367db96d56Sopenharmony_ci        for regexp in emparse_list_reason:
1377db96d56Sopenharmony_ci            if type(regexp) is type(''):
1387db96d56Sopenharmony_ci                for i in range(len(emails)-1,-1,-1):
1397db96d56Sopenharmony_ci                    email = emails[i]
1407db96d56Sopenharmony_ci                    exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
1417db96d56Sopenharmony_ci                    res = exp.search(data)
1427db96d56Sopenharmony_ci                    if res is not None:
1437db96d56Sopenharmony_ci                        errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
1447db96d56Sopenharmony_ci                        del emails[i]
1457db96d56Sopenharmony_ci                continue
1467db96d56Sopenharmony_ci            res = regexp.search(data)
1477db96d56Sopenharmony_ci            if res is not None:
1487db96d56Sopenharmony_ci                reason = res.group('reason')
1497db96d56Sopenharmony_ci                break
1507db96d56Sopenharmony_ci    for email in emails:
1517db96d56Sopenharmony_ci        errors.append(' '.join((email.strip()+': '+reason).split()))
1527db96d56Sopenharmony_ci    return errors
1537db96d56Sopenharmony_ci
1547db96d56Sopenharmony_ciEMPARSERS = [emparse_list]
1557db96d56Sopenharmony_ci
1567db96d56Sopenharmony_cidef sort_numeric(a, b):
1577db96d56Sopenharmony_ci    a = int(a)
1587db96d56Sopenharmony_ci    b = int(b)
1597db96d56Sopenharmony_ci    if a < b:
1607db96d56Sopenharmony_ci        return -1
1617db96d56Sopenharmony_ci    elif a > b:
1627db96d56Sopenharmony_ci        return 1
1637db96d56Sopenharmony_ci    else:
1647db96d56Sopenharmony_ci        return 0
1657db96d56Sopenharmony_ci
1667db96d56Sopenharmony_cidef parsedir(dir, modify):
1677db96d56Sopenharmony_ci    os.chdir(dir)
1687db96d56Sopenharmony_ci    pat = re.compile('^[0-9]*$')
1697db96d56Sopenharmony_ci    errordict = {}
1707db96d56Sopenharmony_ci    errorfirst = {}
1717db96d56Sopenharmony_ci    errorlast = {}
1727db96d56Sopenharmony_ci    nok = nwarn = nbad = 0
1737db96d56Sopenharmony_ci
1747db96d56Sopenharmony_ci    # find all numeric file names and sort them
1757db96d56Sopenharmony_ci    files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))
1767db96d56Sopenharmony_ci    files.sort(sort_numeric)
1777db96d56Sopenharmony_ci
1787db96d56Sopenharmony_ci    for fn in files:
1797db96d56Sopenharmony_ci        # Lets try to parse the file.
1807db96d56Sopenharmony_ci        fp = open(fn)
1817db96d56Sopenharmony_ci        m = email.message_from_file(fp, _class=ErrorMessage)
1827db96d56Sopenharmony_ci        sender = m.getaddr('From')
1837db96d56Sopenharmony_ci        print('%s\t%-40s\t'%(fn, sender[1]), end=' ')
1847db96d56Sopenharmony_ci
1857db96d56Sopenharmony_ci        if m.is_warning():
1867db96d56Sopenharmony_ci            fp.close()
1877db96d56Sopenharmony_ci            print('warning only')
1887db96d56Sopenharmony_ci            nwarn = nwarn + 1
1897db96d56Sopenharmony_ci            if modify:
1907db96d56Sopenharmony_ci                os.rename(fn, ','+fn)
1917db96d56Sopenharmony_ci##              os.unlink(fn)
1927db96d56Sopenharmony_ci            continue
1937db96d56Sopenharmony_ci
1947db96d56Sopenharmony_ci        try:
1957db96d56Sopenharmony_ci            errors = m.get_errors()
1967db96d56Sopenharmony_ci        except Unparseable:
1977db96d56Sopenharmony_ci            print('** Not parseable')
1987db96d56Sopenharmony_ci            nbad = nbad + 1
1997db96d56Sopenharmony_ci            fp.close()
2007db96d56Sopenharmony_ci            continue
2017db96d56Sopenharmony_ci        print(len(errors), 'errors')
2027db96d56Sopenharmony_ci
2037db96d56Sopenharmony_ci        # Remember them
2047db96d56Sopenharmony_ci        for e in errors:
2057db96d56Sopenharmony_ci            try:
2067db96d56Sopenharmony_ci                mm, dd = m.getdate('date')[1:1+2]
2077db96d56Sopenharmony_ci                date = '%s %02d' % (calendar.month_abbr[mm], dd)
2087db96d56Sopenharmony_ci            except:
2097db96d56Sopenharmony_ci                date = '??????'
2107db96d56Sopenharmony_ci            if e not in errordict:
2117db96d56Sopenharmony_ci                errordict[e] = 1
2127db96d56Sopenharmony_ci                errorfirst[e] = '%s (%s)' % (fn, date)
2137db96d56Sopenharmony_ci            else:
2147db96d56Sopenharmony_ci                errordict[e] = errordict[e] + 1
2157db96d56Sopenharmony_ci            errorlast[e] = '%s (%s)' % (fn, date)
2167db96d56Sopenharmony_ci
2177db96d56Sopenharmony_ci        fp.close()
2187db96d56Sopenharmony_ci        nok = nok + 1
2197db96d56Sopenharmony_ci        if modify:
2207db96d56Sopenharmony_ci            os.rename(fn, ','+fn)
2217db96d56Sopenharmony_ci##          os.unlink(fn)
2227db96d56Sopenharmony_ci
2237db96d56Sopenharmony_ci    print('--------------')
2247db96d56Sopenharmony_ci    print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')
2257db96d56Sopenharmony_ci    print(nbad,'files unparseable')
2267db96d56Sopenharmony_ci    print('--------------')
2277db96d56Sopenharmony_ci    list = []
2287db96d56Sopenharmony_ci    for e in errordict.keys():
2297db96d56Sopenharmony_ci        list.append((errordict[e], errorfirst[e], errorlast[e], e))
2307db96d56Sopenharmony_ci    list.sort()
2317db96d56Sopenharmony_ci    for num, first, last, e in list:
2327db96d56Sopenharmony_ci        print('%d %s - %s\t%s' % (num, first, last, e))
2337db96d56Sopenharmony_ci
2347db96d56Sopenharmony_cidef main():
2357db96d56Sopenharmony_ci    modify = 0
2367db96d56Sopenharmony_ci    if len(sys.argv) > 1 and sys.argv[1] == '-d':
2377db96d56Sopenharmony_ci        modify = 1
2387db96d56Sopenharmony_ci        del sys.argv[1]
2397db96d56Sopenharmony_ci    if len(sys.argv) > 1:
2407db96d56Sopenharmony_ci        for folder in sys.argv[1:]:
2417db96d56Sopenharmony_ci            parsedir(folder, modify)
2427db96d56Sopenharmony_ci    else:
2437db96d56Sopenharmony_ci        parsedir('/ufs/jack/Mail/errorsinbox', modify)
2447db96d56Sopenharmony_ci
2457db96d56Sopenharmony_ciif __name__ == '__main__' or sys.argv[0] == __name__:
2467db96d56Sopenharmony_ci    main()
247