17db96d56Sopenharmony_ci# Copyright (C) 2002-2007 Python Software Foundation 27db96d56Sopenharmony_ci# Contact: email-sig@python.org 37db96d56Sopenharmony_ci 47db96d56Sopenharmony_ci"""Email address parsing code. 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ciLifted directly from rfc822.py. This should eventually be rewritten. 77db96d56Sopenharmony_ci""" 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_ci__all__ = [ 107db96d56Sopenharmony_ci 'mktime_tz', 117db96d56Sopenharmony_ci 'parsedate', 127db96d56Sopenharmony_ci 'parsedate_tz', 137db96d56Sopenharmony_ci 'quote', 147db96d56Sopenharmony_ci ] 157db96d56Sopenharmony_ci 167db96d56Sopenharmony_ciimport time, calendar 177db96d56Sopenharmony_ci 187db96d56Sopenharmony_ciSPACE = ' ' 197db96d56Sopenharmony_ciEMPTYSTRING = '' 207db96d56Sopenharmony_ciCOMMASPACE = ', ' 217db96d56Sopenharmony_ci 227db96d56Sopenharmony_ci# Parse a date field 237db96d56Sopenharmony_ci_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 247db96d56Sopenharmony_ci 'aug', 'sep', 'oct', 'nov', 'dec', 257db96d56Sopenharmony_ci 'january', 'february', 'march', 'april', 'may', 'june', 'july', 267db96d56Sopenharmony_ci 'august', 'september', 'october', 'november', 'december'] 277db96d56Sopenharmony_ci 287db96d56Sopenharmony_ci_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] 297db96d56Sopenharmony_ci 307db96d56Sopenharmony_ci# The timezone table does not include the military time zones defined 317db96d56Sopenharmony_ci# in RFC822, other than Z. According to RFC1123, the description in 327db96d56Sopenharmony_ci# RFC822 gets the signs wrong, so we can't rely on any such time 337db96d56Sopenharmony_ci# zones. RFC1123 recommends that numeric timezone indicators be used 347db96d56Sopenharmony_ci# instead of timezone names. 357db96d56Sopenharmony_ci 367db96d56Sopenharmony_ci_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, 377db96d56Sopenharmony_ci 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) 387db96d56Sopenharmony_ci 'EST': -500, 'EDT': -400, # Eastern 397db96d56Sopenharmony_ci 'CST': -600, 'CDT': -500, # Central 407db96d56Sopenharmony_ci 'MST': -700, 'MDT': -600, # Mountain 417db96d56Sopenharmony_ci 'PST': -800, 'PDT': -700 # Pacific 427db96d56Sopenharmony_ci } 437db96d56Sopenharmony_ci 447db96d56Sopenharmony_ci 457db96d56Sopenharmony_cidef parsedate_tz(data): 467db96d56Sopenharmony_ci """Convert a date string to a time tuple. 477db96d56Sopenharmony_ci 487db96d56Sopenharmony_ci Accounts for military timezones. 497db96d56Sopenharmony_ci """ 507db96d56Sopenharmony_ci res = _parsedate_tz(data) 517db96d56Sopenharmony_ci if not res: 527db96d56Sopenharmony_ci return 537db96d56Sopenharmony_ci if res[9] is None: 547db96d56Sopenharmony_ci res[9] = 0 557db96d56Sopenharmony_ci return tuple(res) 567db96d56Sopenharmony_ci 577db96d56Sopenharmony_cidef _parsedate_tz(data): 587db96d56Sopenharmony_ci """Convert date to extended time tuple. 597db96d56Sopenharmony_ci 607db96d56Sopenharmony_ci The last (additional) element is the time zone offset in seconds, except if 617db96d56Sopenharmony_ci the timezone was specified as -0000. In that case the last element is 627db96d56Sopenharmony_ci None. This indicates a UTC timestamp that explicitly declaims knowledge of 637db96d56Sopenharmony_ci the source timezone, as opposed to a +0000 timestamp that indicates the 647db96d56Sopenharmony_ci source timezone really was UTC. 657db96d56Sopenharmony_ci 667db96d56Sopenharmony_ci """ 677db96d56Sopenharmony_ci if not data: 687db96d56Sopenharmony_ci return None 697db96d56Sopenharmony_ci data = data.split() 707db96d56Sopenharmony_ci if not data: # This happens for whitespace-only input. 717db96d56Sopenharmony_ci return None 727db96d56Sopenharmony_ci # The FWS after the comma after the day-of-week is optional, so search and 737db96d56Sopenharmony_ci # adjust for this. 747db96d56Sopenharmony_ci if data[0].endswith(',') or data[0].lower() in _daynames: 757db96d56Sopenharmony_ci # There's a dayname here. Skip it 767db96d56Sopenharmony_ci del data[0] 777db96d56Sopenharmony_ci else: 787db96d56Sopenharmony_ci i = data[0].rfind(',') 797db96d56Sopenharmony_ci if i >= 0: 807db96d56Sopenharmony_ci data[0] = data[0][i+1:] 817db96d56Sopenharmony_ci if len(data) == 3: # RFC 850 date, deprecated 827db96d56Sopenharmony_ci stuff = data[0].split('-') 837db96d56Sopenharmony_ci if len(stuff) == 3: 847db96d56Sopenharmony_ci data = stuff + data[1:] 857db96d56Sopenharmony_ci if len(data) == 4: 867db96d56Sopenharmony_ci s = data[3] 877db96d56Sopenharmony_ci i = s.find('+') 887db96d56Sopenharmony_ci if i == -1: 897db96d56Sopenharmony_ci i = s.find('-') 907db96d56Sopenharmony_ci if i > 0: 917db96d56Sopenharmony_ci data[3:] = [s[:i], s[i:]] 927db96d56Sopenharmony_ci else: 937db96d56Sopenharmony_ci data.append('') # Dummy tz 947db96d56Sopenharmony_ci if len(data) < 5: 957db96d56Sopenharmony_ci return None 967db96d56Sopenharmony_ci data = data[:5] 977db96d56Sopenharmony_ci [dd, mm, yy, tm, tz] = data 987db96d56Sopenharmony_ci if not (dd and mm and yy): 997db96d56Sopenharmony_ci return None 1007db96d56Sopenharmony_ci mm = mm.lower() 1017db96d56Sopenharmony_ci if mm not in _monthnames: 1027db96d56Sopenharmony_ci dd, mm = mm, dd.lower() 1037db96d56Sopenharmony_ci if mm not in _monthnames: 1047db96d56Sopenharmony_ci return None 1057db96d56Sopenharmony_ci mm = _monthnames.index(mm) + 1 1067db96d56Sopenharmony_ci if mm > 12: 1077db96d56Sopenharmony_ci mm -= 12 1087db96d56Sopenharmony_ci if dd[-1] == ',': 1097db96d56Sopenharmony_ci dd = dd[:-1] 1107db96d56Sopenharmony_ci i = yy.find(':') 1117db96d56Sopenharmony_ci if i > 0: 1127db96d56Sopenharmony_ci yy, tm = tm, yy 1137db96d56Sopenharmony_ci if yy[-1] == ',': 1147db96d56Sopenharmony_ci yy = yy[:-1] 1157db96d56Sopenharmony_ci if not yy: 1167db96d56Sopenharmony_ci return None 1177db96d56Sopenharmony_ci if not yy[0].isdigit(): 1187db96d56Sopenharmony_ci yy, tz = tz, yy 1197db96d56Sopenharmony_ci if tm[-1] == ',': 1207db96d56Sopenharmony_ci tm = tm[:-1] 1217db96d56Sopenharmony_ci tm = tm.split(':') 1227db96d56Sopenharmony_ci if len(tm) == 2: 1237db96d56Sopenharmony_ci [thh, tmm] = tm 1247db96d56Sopenharmony_ci tss = '0' 1257db96d56Sopenharmony_ci elif len(tm) == 3: 1267db96d56Sopenharmony_ci [thh, tmm, tss] = tm 1277db96d56Sopenharmony_ci elif len(tm) == 1 and '.' in tm[0]: 1287db96d56Sopenharmony_ci # Some non-compliant MUAs use '.' to separate time elements. 1297db96d56Sopenharmony_ci tm = tm[0].split('.') 1307db96d56Sopenharmony_ci if len(tm) == 2: 1317db96d56Sopenharmony_ci [thh, tmm] = tm 1327db96d56Sopenharmony_ci tss = 0 1337db96d56Sopenharmony_ci elif len(tm) == 3: 1347db96d56Sopenharmony_ci [thh, tmm, tss] = tm 1357db96d56Sopenharmony_ci else: 1367db96d56Sopenharmony_ci return None 1377db96d56Sopenharmony_ci else: 1387db96d56Sopenharmony_ci return None 1397db96d56Sopenharmony_ci try: 1407db96d56Sopenharmony_ci yy = int(yy) 1417db96d56Sopenharmony_ci dd = int(dd) 1427db96d56Sopenharmony_ci thh = int(thh) 1437db96d56Sopenharmony_ci tmm = int(tmm) 1447db96d56Sopenharmony_ci tss = int(tss) 1457db96d56Sopenharmony_ci except ValueError: 1467db96d56Sopenharmony_ci return None 1477db96d56Sopenharmony_ci # Check for a yy specified in two-digit format, then convert it to the 1487db96d56Sopenharmony_ci # appropriate four-digit format, according to the POSIX standard. RFC 822 1497db96d56Sopenharmony_ci # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) 1507db96d56Sopenharmony_ci # mandates a 4-digit yy. For more information, see the documentation for 1517db96d56Sopenharmony_ci # the time module. 1527db96d56Sopenharmony_ci if yy < 100: 1537db96d56Sopenharmony_ci # The year is between 1969 and 1999 (inclusive). 1547db96d56Sopenharmony_ci if yy > 68: 1557db96d56Sopenharmony_ci yy += 1900 1567db96d56Sopenharmony_ci # The year is between 2000 and 2068 (inclusive). 1577db96d56Sopenharmony_ci else: 1587db96d56Sopenharmony_ci yy += 2000 1597db96d56Sopenharmony_ci tzoffset = None 1607db96d56Sopenharmony_ci tz = tz.upper() 1617db96d56Sopenharmony_ci if tz in _timezones: 1627db96d56Sopenharmony_ci tzoffset = _timezones[tz] 1637db96d56Sopenharmony_ci else: 1647db96d56Sopenharmony_ci try: 1657db96d56Sopenharmony_ci tzoffset = int(tz) 1667db96d56Sopenharmony_ci except ValueError: 1677db96d56Sopenharmony_ci pass 1687db96d56Sopenharmony_ci if tzoffset==0 and tz.startswith('-'): 1697db96d56Sopenharmony_ci tzoffset = None 1707db96d56Sopenharmony_ci # Convert a timezone offset into seconds ; -0500 -> -18000 1717db96d56Sopenharmony_ci if tzoffset: 1727db96d56Sopenharmony_ci if tzoffset < 0: 1737db96d56Sopenharmony_ci tzsign = -1 1747db96d56Sopenharmony_ci tzoffset = -tzoffset 1757db96d56Sopenharmony_ci else: 1767db96d56Sopenharmony_ci tzsign = 1 1777db96d56Sopenharmony_ci tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) 1787db96d56Sopenharmony_ci # Daylight Saving Time flag is set to -1, since DST is unknown. 1797db96d56Sopenharmony_ci return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset] 1807db96d56Sopenharmony_ci 1817db96d56Sopenharmony_ci 1827db96d56Sopenharmony_cidef parsedate(data): 1837db96d56Sopenharmony_ci """Convert a time string to a time tuple.""" 1847db96d56Sopenharmony_ci t = parsedate_tz(data) 1857db96d56Sopenharmony_ci if isinstance(t, tuple): 1867db96d56Sopenharmony_ci return t[:9] 1877db96d56Sopenharmony_ci else: 1887db96d56Sopenharmony_ci return t 1897db96d56Sopenharmony_ci 1907db96d56Sopenharmony_ci 1917db96d56Sopenharmony_cidef mktime_tz(data): 1927db96d56Sopenharmony_ci """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp.""" 1937db96d56Sopenharmony_ci if data[9] is None: 1947db96d56Sopenharmony_ci # No zone info, so localtime is better assumption than GMT 1957db96d56Sopenharmony_ci return time.mktime(data[:8] + (-1,)) 1967db96d56Sopenharmony_ci else: 1977db96d56Sopenharmony_ci t = calendar.timegm(data) 1987db96d56Sopenharmony_ci return t - data[9] 1997db96d56Sopenharmony_ci 2007db96d56Sopenharmony_ci 2017db96d56Sopenharmony_cidef quote(str): 2027db96d56Sopenharmony_ci """Prepare string to be used in a quoted string. 2037db96d56Sopenharmony_ci 2047db96d56Sopenharmony_ci Turns backslash and double quote characters into quoted pairs. These 2057db96d56Sopenharmony_ci are the only characters that need to be quoted inside a quoted string. 2067db96d56Sopenharmony_ci Does not add the surrounding double quotes. 2077db96d56Sopenharmony_ci """ 2087db96d56Sopenharmony_ci return str.replace('\\', '\\\\').replace('"', '\\"') 2097db96d56Sopenharmony_ci 2107db96d56Sopenharmony_ci 2117db96d56Sopenharmony_ciclass AddrlistClass: 2127db96d56Sopenharmony_ci """Address parser class by Ben Escoto. 2137db96d56Sopenharmony_ci 2147db96d56Sopenharmony_ci To understand what this class does, it helps to have a copy of RFC 2822 in 2157db96d56Sopenharmony_ci front of you. 2167db96d56Sopenharmony_ci 2177db96d56Sopenharmony_ci Note: this class interface is deprecated and may be removed in the future. 2187db96d56Sopenharmony_ci Use email.utils.AddressList instead. 2197db96d56Sopenharmony_ci """ 2207db96d56Sopenharmony_ci 2217db96d56Sopenharmony_ci def __init__(self, field): 2227db96d56Sopenharmony_ci """Initialize a new instance. 2237db96d56Sopenharmony_ci 2247db96d56Sopenharmony_ci `field' is an unparsed address header field, containing 2257db96d56Sopenharmony_ci one or more addresses. 2267db96d56Sopenharmony_ci """ 2277db96d56Sopenharmony_ci self.specials = '()<>@,:;.\"[]' 2287db96d56Sopenharmony_ci self.pos = 0 2297db96d56Sopenharmony_ci self.LWS = ' \t' 2307db96d56Sopenharmony_ci self.CR = '\r\n' 2317db96d56Sopenharmony_ci self.FWS = self.LWS + self.CR 2327db96d56Sopenharmony_ci self.atomends = self.specials + self.LWS + self.CR 2337db96d56Sopenharmony_ci # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it 2347db96d56Sopenharmony_ci # is obsolete syntax. RFC 2822 requires that we recognize obsolete 2357db96d56Sopenharmony_ci # syntax, so allow dots in phrases. 2367db96d56Sopenharmony_ci self.phraseends = self.atomends.replace('.', '') 2377db96d56Sopenharmony_ci self.field = field 2387db96d56Sopenharmony_ci self.commentlist = [] 2397db96d56Sopenharmony_ci 2407db96d56Sopenharmony_ci def gotonext(self): 2417db96d56Sopenharmony_ci """Skip white space and extract comments.""" 2427db96d56Sopenharmony_ci wslist = [] 2437db96d56Sopenharmony_ci while self.pos < len(self.field): 2447db96d56Sopenharmony_ci if self.field[self.pos] in self.LWS + '\n\r': 2457db96d56Sopenharmony_ci if self.field[self.pos] not in '\n\r': 2467db96d56Sopenharmony_ci wslist.append(self.field[self.pos]) 2477db96d56Sopenharmony_ci self.pos += 1 2487db96d56Sopenharmony_ci elif self.field[self.pos] == '(': 2497db96d56Sopenharmony_ci self.commentlist.append(self.getcomment()) 2507db96d56Sopenharmony_ci else: 2517db96d56Sopenharmony_ci break 2527db96d56Sopenharmony_ci return EMPTYSTRING.join(wslist) 2537db96d56Sopenharmony_ci 2547db96d56Sopenharmony_ci def getaddrlist(self): 2557db96d56Sopenharmony_ci """Parse all addresses. 2567db96d56Sopenharmony_ci 2577db96d56Sopenharmony_ci Returns a list containing all of the addresses. 2587db96d56Sopenharmony_ci """ 2597db96d56Sopenharmony_ci result = [] 2607db96d56Sopenharmony_ci while self.pos < len(self.field): 2617db96d56Sopenharmony_ci ad = self.getaddress() 2627db96d56Sopenharmony_ci if ad: 2637db96d56Sopenharmony_ci result += ad 2647db96d56Sopenharmony_ci else: 2657db96d56Sopenharmony_ci result.append(('', '')) 2667db96d56Sopenharmony_ci return result 2677db96d56Sopenharmony_ci 2687db96d56Sopenharmony_ci def getaddress(self): 2697db96d56Sopenharmony_ci """Parse the next address.""" 2707db96d56Sopenharmony_ci self.commentlist = [] 2717db96d56Sopenharmony_ci self.gotonext() 2727db96d56Sopenharmony_ci 2737db96d56Sopenharmony_ci oldpos = self.pos 2747db96d56Sopenharmony_ci oldcl = self.commentlist 2757db96d56Sopenharmony_ci plist = self.getphraselist() 2767db96d56Sopenharmony_ci 2777db96d56Sopenharmony_ci self.gotonext() 2787db96d56Sopenharmony_ci returnlist = [] 2797db96d56Sopenharmony_ci 2807db96d56Sopenharmony_ci if self.pos >= len(self.field): 2817db96d56Sopenharmony_ci # Bad email address technically, no domain. 2827db96d56Sopenharmony_ci if plist: 2837db96d56Sopenharmony_ci returnlist = [(SPACE.join(self.commentlist), plist[0])] 2847db96d56Sopenharmony_ci 2857db96d56Sopenharmony_ci elif self.field[self.pos] in '.@': 2867db96d56Sopenharmony_ci # email address is just an addrspec 2877db96d56Sopenharmony_ci # this isn't very efficient since we start over 2887db96d56Sopenharmony_ci self.pos = oldpos 2897db96d56Sopenharmony_ci self.commentlist = oldcl 2907db96d56Sopenharmony_ci addrspec = self.getaddrspec() 2917db96d56Sopenharmony_ci returnlist = [(SPACE.join(self.commentlist), addrspec)] 2927db96d56Sopenharmony_ci 2937db96d56Sopenharmony_ci elif self.field[self.pos] == ':': 2947db96d56Sopenharmony_ci # address is a group 2957db96d56Sopenharmony_ci returnlist = [] 2967db96d56Sopenharmony_ci 2977db96d56Sopenharmony_ci fieldlen = len(self.field) 2987db96d56Sopenharmony_ci self.pos += 1 2997db96d56Sopenharmony_ci while self.pos < len(self.field): 3007db96d56Sopenharmony_ci self.gotonext() 3017db96d56Sopenharmony_ci if self.pos < fieldlen and self.field[self.pos] == ';': 3027db96d56Sopenharmony_ci self.pos += 1 3037db96d56Sopenharmony_ci break 3047db96d56Sopenharmony_ci returnlist = returnlist + self.getaddress() 3057db96d56Sopenharmony_ci 3067db96d56Sopenharmony_ci elif self.field[self.pos] == '<': 3077db96d56Sopenharmony_ci # Address is a phrase then a route addr 3087db96d56Sopenharmony_ci routeaddr = self.getrouteaddr() 3097db96d56Sopenharmony_ci 3107db96d56Sopenharmony_ci if self.commentlist: 3117db96d56Sopenharmony_ci returnlist = [(SPACE.join(plist) + ' (' + 3127db96d56Sopenharmony_ci ' '.join(self.commentlist) + ')', routeaddr)] 3137db96d56Sopenharmony_ci else: 3147db96d56Sopenharmony_ci returnlist = [(SPACE.join(plist), routeaddr)] 3157db96d56Sopenharmony_ci 3167db96d56Sopenharmony_ci else: 3177db96d56Sopenharmony_ci if plist: 3187db96d56Sopenharmony_ci returnlist = [(SPACE.join(self.commentlist), plist[0])] 3197db96d56Sopenharmony_ci elif self.field[self.pos] in self.specials: 3207db96d56Sopenharmony_ci self.pos += 1 3217db96d56Sopenharmony_ci 3227db96d56Sopenharmony_ci self.gotonext() 3237db96d56Sopenharmony_ci if self.pos < len(self.field) and self.field[self.pos] == ',': 3247db96d56Sopenharmony_ci self.pos += 1 3257db96d56Sopenharmony_ci return returnlist 3267db96d56Sopenharmony_ci 3277db96d56Sopenharmony_ci def getrouteaddr(self): 3287db96d56Sopenharmony_ci """Parse a route address (Return-path value). 3297db96d56Sopenharmony_ci 3307db96d56Sopenharmony_ci This method just skips all the route stuff and returns the addrspec. 3317db96d56Sopenharmony_ci """ 3327db96d56Sopenharmony_ci if self.field[self.pos] != '<': 3337db96d56Sopenharmony_ci return 3347db96d56Sopenharmony_ci 3357db96d56Sopenharmony_ci expectroute = False 3367db96d56Sopenharmony_ci self.pos += 1 3377db96d56Sopenharmony_ci self.gotonext() 3387db96d56Sopenharmony_ci adlist = '' 3397db96d56Sopenharmony_ci while self.pos < len(self.field): 3407db96d56Sopenharmony_ci if expectroute: 3417db96d56Sopenharmony_ci self.getdomain() 3427db96d56Sopenharmony_ci expectroute = False 3437db96d56Sopenharmony_ci elif self.field[self.pos] == '>': 3447db96d56Sopenharmony_ci self.pos += 1 3457db96d56Sopenharmony_ci break 3467db96d56Sopenharmony_ci elif self.field[self.pos] == '@': 3477db96d56Sopenharmony_ci self.pos += 1 3487db96d56Sopenharmony_ci expectroute = True 3497db96d56Sopenharmony_ci elif self.field[self.pos] == ':': 3507db96d56Sopenharmony_ci self.pos += 1 3517db96d56Sopenharmony_ci else: 3527db96d56Sopenharmony_ci adlist = self.getaddrspec() 3537db96d56Sopenharmony_ci self.pos += 1 3547db96d56Sopenharmony_ci break 3557db96d56Sopenharmony_ci self.gotonext() 3567db96d56Sopenharmony_ci 3577db96d56Sopenharmony_ci return adlist 3587db96d56Sopenharmony_ci 3597db96d56Sopenharmony_ci def getaddrspec(self): 3607db96d56Sopenharmony_ci """Parse an RFC 2822 addr-spec.""" 3617db96d56Sopenharmony_ci aslist = [] 3627db96d56Sopenharmony_ci 3637db96d56Sopenharmony_ci self.gotonext() 3647db96d56Sopenharmony_ci while self.pos < len(self.field): 3657db96d56Sopenharmony_ci preserve_ws = True 3667db96d56Sopenharmony_ci if self.field[self.pos] == '.': 3677db96d56Sopenharmony_ci if aslist and not aslist[-1].strip(): 3687db96d56Sopenharmony_ci aslist.pop() 3697db96d56Sopenharmony_ci aslist.append('.') 3707db96d56Sopenharmony_ci self.pos += 1 3717db96d56Sopenharmony_ci preserve_ws = False 3727db96d56Sopenharmony_ci elif self.field[self.pos] == '"': 3737db96d56Sopenharmony_ci aslist.append('"%s"' % quote(self.getquote())) 3747db96d56Sopenharmony_ci elif self.field[self.pos] in self.atomends: 3757db96d56Sopenharmony_ci if aslist and not aslist[-1].strip(): 3767db96d56Sopenharmony_ci aslist.pop() 3777db96d56Sopenharmony_ci break 3787db96d56Sopenharmony_ci else: 3797db96d56Sopenharmony_ci aslist.append(self.getatom()) 3807db96d56Sopenharmony_ci ws = self.gotonext() 3817db96d56Sopenharmony_ci if preserve_ws and ws: 3827db96d56Sopenharmony_ci aslist.append(ws) 3837db96d56Sopenharmony_ci 3847db96d56Sopenharmony_ci if self.pos >= len(self.field) or self.field[self.pos] != '@': 3857db96d56Sopenharmony_ci return EMPTYSTRING.join(aslist) 3867db96d56Sopenharmony_ci 3877db96d56Sopenharmony_ci aslist.append('@') 3887db96d56Sopenharmony_ci self.pos += 1 3897db96d56Sopenharmony_ci self.gotonext() 3907db96d56Sopenharmony_ci domain = self.getdomain() 3917db96d56Sopenharmony_ci if not domain: 3927db96d56Sopenharmony_ci # Invalid domain, return an empty address instead of returning a 3937db96d56Sopenharmony_ci # local part to denote failed parsing. 3947db96d56Sopenharmony_ci return EMPTYSTRING 3957db96d56Sopenharmony_ci return EMPTYSTRING.join(aslist) + domain 3967db96d56Sopenharmony_ci 3977db96d56Sopenharmony_ci def getdomain(self): 3987db96d56Sopenharmony_ci """Get the complete domain name from an address.""" 3997db96d56Sopenharmony_ci sdlist = [] 4007db96d56Sopenharmony_ci while self.pos < len(self.field): 4017db96d56Sopenharmony_ci if self.field[self.pos] in self.LWS: 4027db96d56Sopenharmony_ci self.pos += 1 4037db96d56Sopenharmony_ci elif self.field[self.pos] == '(': 4047db96d56Sopenharmony_ci self.commentlist.append(self.getcomment()) 4057db96d56Sopenharmony_ci elif self.field[self.pos] == '[': 4067db96d56Sopenharmony_ci sdlist.append(self.getdomainliteral()) 4077db96d56Sopenharmony_ci elif self.field[self.pos] == '.': 4087db96d56Sopenharmony_ci self.pos += 1 4097db96d56Sopenharmony_ci sdlist.append('.') 4107db96d56Sopenharmony_ci elif self.field[self.pos] == '@': 4117db96d56Sopenharmony_ci # bpo-34155: Don't parse domains with two `@` like 4127db96d56Sopenharmony_ci # `a@malicious.org@important.com`. 4137db96d56Sopenharmony_ci return EMPTYSTRING 4147db96d56Sopenharmony_ci elif self.field[self.pos] in self.atomends: 4157db96d56Sopenharmony_ci break 4167db96d56Sopenharmony_ci else: 4177db96d56Sopenharmony_ci sdlist.append(self.getatom()) 4187db96d56Sopenharmony_ci return EMPTYSTRING.join(sdlist) 4197db96d56Sopenharmony_ci 4207db96d56Sopenharmony_ci def getdelimited(self, beginchar, endchars, allowcomments=True): 4217db96d56Sopenharmony_ci """Parse a header fragment delimited by special characters. 4227db96d56Sopenharmony_ci 4237db96d56Sopenharmony_ci `beginchar' is the start character for the fragment. 4247db96d56Sopenharmony_ci If self is not looking at an instance of `beginchar' then 4257db96d56Sopenharmony_ci getdelimited returns the empty string. 4267db96d56Sopenharmony_ci 4277db96d56Sopenharmony_ci `endchars' is a sequence of allowable end-delimiting characters. 4287db96d56Sopenharmony_ci Parsing stops when one of these is encountered. 4297db96d56Sopenharmony_ci 4307db96d56Sopenharmony_ci If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed 4317db96d56Sopenharmony_ci within the parsed fragment. 4327db96d56Sopenharmony_ci """ 4337db96d56Sopenharmony_ci if self.field[self.pos] != beginchar: 4347db96d56Sopenharmony_ci return '' 4357db96d56Sopenharmony_ci 4367db96d56Sopenharmony_ci slist = [''] 4377db96d56Sopenharmony_ci quote = False 4387db96d56Sopenharmony_ci self.pos += 1 4397db96d56Sopenharmony_ci while self.pos < len(self.field): 4407db96d56Sopenharmony_ci if quote: 4417db96d56Sopenharmony_ci slist.append(self.field[self.pos]) 4427db96d56Sopenharmony_ci quote = False 4437db96d56Sopenharmony_ci elif self.field[self.pos] in endchars: 4447db96d56Sopenharmony_ci self.pos += 1 4457db96d56Sopenharmony_ci break 4467db96d56Sopenharmony_ci elif allowcomments and self.field[self.pos] == '(': 4477db96d56Sopenharmony_ci slist.append(self.getcomment()) 4487db96d56Sopenharmony_ci continue # have already advanced pos from getcomment 4497db96d56Sopenharmony_ci elif self.field[self.pos] == '\\': 4507db96d56Sopenharmony_ci quote = True 4517db96d56Sopenharmony_ci else: 4527db96d56Sopenharmony_ci slist.append(self.field[self.pos]) 4537db96d56Sopenharmony_ci self.pos += 1 4547db96d56Sopenharmony_ci 4557db96d56Sopenharmony_ci return EMPTYSTRING.join(slist) 4567db96d56Sopenharmony_ci 4577db96d56Sopenharmony_ci def getquote(self): 4587db96d56Sopenharmony_ci """Get a quote-delimited fragment from self's field.""" 4597db96d56Sopenharmony_ci return self.getdelimited('"', '"\r', False) 4607db96d56Sopenharmony_ci 4617db96d56Sopenharmony_ci def getcomment(self): 4627db96d56Sopenharmony_ci """Get a parenthesis-delimited fragment from self's field.""" 4637db96d56Sopenharmony_ci return self.getdelimited('(', ')\r', True) 4647db96d56Sopenharmony_ci 4657db96d56Sopenharmony_ci def getdomainliteral(self): 4667db96d56Sopenharmony_ci """Parse an RFC 2822 domain-literal.""" 4677db96d56Sopenharmony_ci return '[%s]' % self.getdelimited('[', ']\r', False) 4687db96d56Sopenharmony_ci 4697db96d56Sopenharmony_ci def getatom(self, atomends=None): 4707db96d56Sopenharmony_ci """Parse an RFC 2822 atom. 4717db96d56Sopenharmony_ci 4727db96d56Sopenharmony_ci Optional atomends specifies a different set of end token delimiters 4737db96d56Sopenharmony_ci (the default is to use self.atomends). This is used e.g. in 4747db96d56Sopenharmony_ci getphraselist() since phrase endings must not include the `.' (which 4757db96d56Sopenharmony_ci is legal in phrases).""" 4767db96d56Sopenharmony_ci atomlist = [''] 4777db96d56Sopenharmony_ci if atomends is None: 4787db96d56Sopenharmony_ci atomends = self.atomends 4797db96d56Sopenharmony_ci 4807db96d56Sopenharmony_ci while self.pos < len(self.field): 4817db96d56Sopenharmony_ci if self.field[self.pos] in atomends: 4827db96d56Sopenharmony_ci break 4837db96d56Sopenharmony_ci else: 4847db96d56Sopenharmony_ci atomlist.append(self.field[self.pos]) 4857db96d56Sopenharmony_ci self.pos += 1 4867db96d56Sopenharmony_ci 4877db96d56Sopenharmony_ci return EMPTYSTRING.join(atomlist) 4887db96d56Sopenharmony_ci 4897db96d56Sopenharmony_ci def getphraselist(self): 4907db96d56Sopenharmony_ci """Parse a sequence of RFC 2822 phrases. 4917db96d56Sopenharmony_ci 4927db96d56Sopenharmony_ci A phrase is a sequence of words, which are in turn either RFC 2822 4937db96d56Sopenharmony_ci atoms or quoted-strings. Phrases are canonicalized by squeezing all 4947db96d56Sopenharmony_ci runs of continuous whitespace into one space. 4957db96d56Sopenharmony_ci """ 4967db96d56Sopenharmony_ci plist = [] 4977db96d56Sopenharmony_ci 4987db96d56Sopenharmony_ci while self.pos < len(self.field): 4997db96d56Sopenharmony_ci if self.field[self.pos] in self.FWS: 5007db96d56Sopenharmony_ci self.pos += 1 5017db96d56Sopenharmony_ci elif self.field[self.pos] == '"': 5027db96d56Sopenharmony_ci plist.append(self.getquote()) 5037db96d56Sopenharmony_ci elif self.field[self.pos] == '(': 5047db96d56Sopenharmony_ci self.commentlist.append(self.getcomment()) 5057db96d56Sopenharmony_ci elif self.field[self.pos] in self.phraseends: 5067db96d56Sopenharmony_ci break 5077db96d56Sopenharmony_ci else: 5087db96d56Sopenharmony_ci plist.append(self.getatom(self.phraseends)) 5097db96d56Sopenharmony_ci 5107db96d56Sopenharmony_ci return plist 5117db96d56Sopenharmony_ci 5127db96d56Sopenharmony_ciclass AddressList(AddrlistClass): 5137db96d56Sopenharmony_ci """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" 5147db96d56Sopenharmony_ci def __init__(self, field): 5157db96d56Sopenharmony_ci AddrlistClass.__init__(self, field) 5167db96d56Sopenharmony_ci if field: 5177db96d56Sopenharmony_ci self.addresslist = self.getaddrlist() 5187db96d56Sopenharmony_ci else: 5197db96d56Sopenharmony_ci self.addresslist = [] 5207db96d56Sopenharmony_ci 5217db96d56Sopenharmony_ci def __len__(self): 5227db96d56Sopenharmony_ci return len(self.addresslist) 5237db96d56Sopenharmony_ci 5247db96d56Sopenharmony_ci def __add__(self, other): 5257db96d56Sopenharmony_ci # Set union 5267db96d56Sopenharmony_ci newaddr = AddressList(None) 5277db96d56Sopenharmony_ci newaddr.addresslist = self.addresslist[:] 5287db96d56Sopenharmony_ci for x in other.addresslist: 5297db96d56Sopenharmony_ci if not x in self.addresslist: 5307db96d56Sopenharmony_ci newaddr.addresslist.append(x) 5317db96d56Sopenharmony_ci return newaddr 5327db96d56Sopenharmony_ci 5337db96d56Sopenharmony_ci def __iadd__(self, other): 5347db96d56Sopenharmony_ci # Set union, in-place 5357db96d56Sopenharmony_ci for x in other.addresslist: 5367db96d56Sopenharmony_ci if not x in self.addresslist: 5377db96d56Sopenharmony_ci self.addresslist.append(x) 5387db96d56Sopenharmony_ci return self 5397db96d56Sopenharmony_ci 5407db96d56Sopenharmony_ci def __sub__(self, other): 5417db96d56Sopenharmony_ci # Set difference 5427db96d56Sopenharmony_ci newaddr = AddressList(None) 5437db96d56Sopenharmony_ci for x in self.addresslist: 5447db96d56Sopenharmony_ci if not x in other.addresslist: 5457db96d56Sopenharmony_ci newaddr.addresslist.append(x) 5467db96d56Sopenharmony_ci return newaddr 5477db96d56Sopenharmony_ci 5487db96d56Sopenharmony_ci def __isub__(self, other): 5497db96d56Sopenharmony_ci # Set difference, in-place 5507db96d56Sopenharmony_ci for x in other.addresslist: 5517db96d56Sopenharmony_ci if x in self.addresslist: 5527db96d56Sopenharmony_ci self.addresslist.remove(x) 5537db96d56Sopenharmony_ci return self 5547db96d56Sopenharmony_ci 5557db96d56Sopenharmony_ci def __getitem__(self, index): 5567db96d56Sopenharmony_ci # Make indexing, slices, and 'in' work 5577db96d56Sopenharmony_ci return self.addresslist[index] 558