17db96d56Sopenharmony_cir"""HTTP cookie handling for web clients. 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_ciThis module has (now fairly distant) origins in Gisle Aas' Perl module 47db96d56Sopenharmony_ciHTTP::Cookies, from the libwww-perl library. 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ciDocstrings, comments and debug strings in this code refer to the 77db96d56Sopenharmony_ciattributes of the HTTP cookie system as cookie-attributes, to distinguish 87db96d56Sopenharmony_cithem clearly from Python attributes. 97db96d56Sopenharmony_ci 107db96d56Sopenharmony_ciClass diagram (note that BSDDBCookieJar and the MSIE* classes are not 117db96d56Sopenharmony_cidistributed with the Python standard library, but are available from 127db96d56Sopenharmony_cihttp://wwwsearch.sf.net/): 137db96d56Sopenharmony_ci 147db96d56Sopenharmony_ci CookieJar____ 157db96d56Sopenharmony_ci / \ \ 167db96d56Sopenharmony_ci FileCookieJar \ \ 177db96d56Sopenharmony_ci / | \ \ \ 187db96d56Sopenharmony_ci MozillaCookieJar | LWPCookieJar \ \ 197db96d56Sopenharmony_ci | | \ 207db96d56Sopenharmony_ci | ---MSIEBase | \ 217db96d56Sopenharmony_ci | / | | \ 227db96d56Sopenharmony_ci | / MSIEDBCookieJar BSDDBCookieJar 237db96d56Sopenharmony_ci |/ 247db96d56Sopenharmony_ci MSIECookieJar 257db96d56Sopenharmony_ci 267db96d56Sopenharmony_ci""" 277db96d56Sopenharmony_ci 287db96d56Sopenharmony_ci__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', 297db96d56Sopenharmony_ci 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] 307db96d56Sopenharmony_ci 317db96d56Sopenharmony_ciimport os 327db96d56Sopenharmony_ciimport copy 337db96d56Sopenharmony_ciimport datetime 347db96d56Sopenharmony_ciimport re 357db96d56Sopenharmony_ciimport time 367db96d56Sopenharmony_ciimport urllib.parse, urllib.request 377db96d56Sopenharmony_ciimport threading as _threading 387db96d56Sopenharmony_ciimport http.client # only for the default HTTP port 397db96d56Sopenharmony_cifrom calendar import timegm 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_cidebug = False # set to True to enable debugging via the logging module 427db96d56Sopenharmony_cilogger = None 437db96d56Sopenharmony_ci 447db96d56Sopenharmony_cidef _debug(*args): 457db96d56Sopenharmony_ci if not debug: 467db96d56Sopenharmony_ci return 477db96d56Sopenharmony_ci global logger 487db96d56Sopenharmony_ci if not logger: 497db96d56Sopenharmony_ci import logging 507db96d56Sopenharmony_ci logger = logging.getLogger("http.cookiejar") 517db96d56Sopenharmony_ci return logger.debug(*args) 527db96d56Sopenharmony_ci 537db96d56Sopenharmony_ciHTTPONLY_ATTR = "HTTPOnly" 547db96d56Sopenharmony_ciHTTPONLY_PREFIX = "#HttpOnly_" 557db96d56Sopenharmony_ciDEFAULT_HTTP_PORT = str(http.client.HTTP_PORT) 567db96d56Sopenharmony_ciNETSCAPE_MAGIC_RGX = re.compile("#( Netscape)? HTTP Cookie File") 577db96d56Sopenharmony_ciMISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " 587db96d56Sopenharmony_ci "instance initialised with one)") 597db96d56Sopenharmony_ciNETSCAPE_HEADER_TEXT = """\ 607db96d56Sopenharmony_ci# Netscape HTTP Cookie File 617db96d56Sopenharmony_ci# http://curl.haxx.se/rfc/cookie_spec.html 627db96d56Sopenharmony_ci# This is a generated file! Do not edit. 637db96d56Sopenharmony_ci 647db96d56Sopenharmony_ci""" 657db96d56Sopenharmony_ci 667db96d56Sopenharmony_cidef _warn_unhandled_exception(): 677db96d56Sopenharmony_ci # There are a few catch-all except: statements in this module, for 687db96d56Sopenharmony_ci # catching input that's bad in unexpected ways. Warn if any 697db96d56Sopenharmony_ci # exceptions are caught there. 707db96d56Sopenharmony_ci import io, warnings, traceback 717db96d56Sopenharmony_ci f = io.StringIO() 727db96d56Sopenharmony_ci traceback.print_exc(None, f) 737db96d56Sopenharmony_ci msg = f.getvalue() 747db96d56Sopenharmony_ci warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2) 757db96d56Sopenharmony_ci 767db96d56Sopenharmony_ci 777db96d56Sopenharmony_ci# Date/time conversion 787db96d56Sopenharmony_ci# ----------------------------------------------------------------------------- 797db96d56Sopenharmony_ci 807db96d56Sopenharmony_ciEPOCH_YEAR = 1970 817db96d56Sopenharmony_cidef _timegm(tt): 827db96d56Sopenharmony_ci year, month, mday, hour, min, sec = tt[:6] 837db96d56Sopenharmony_ci if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and 847db96d56Sopenharmony_ci (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): 857db96d56Sopenharmony_ci return timegm(tt) 867db96d56Sopenharmony_ci else: 877db96d56Sopenharmony_ci return None 887db96d56Sopenharmony_ci 897db96d56Sopenharmony_ciDAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] 907db96d56Sopenharmony_ciMONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", 917db96d56Sopenharmony_ci "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] 927db96d56Sopenharmony_ciMONTHS_LOWER = [month.lower() for month in MONTHS] 937db96d56Sopenharmony_ci 947db96d56Sopenharmony_cidef time2isoz(t=None): 957db96d56Sopenharmony_ci """Return a string representing time in seconds since epoch, t. 967db96d56Sopenharmony_ci 977db96d56Sopenharmony_ci If the function is called without an argument, it will use the current 987db96d56Sopenharmony_ci time. 997db96d56Sopenharmony_ci 1007db96d56Sopenharmony_ci The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", 1017db96d56Sopenharmony_ci representing Universal Time (UTC, aka GMT). An example of this format is: 1027db96d56Sopenharmony_ci 1037db96d56Sopenharmony_ci 1994-11-24 08:49:37Z 1047db96d56Sopenharmony_ci 1057db96d56Sopenharmony_ci """ 1067db96d56Sopenharmony_ci if t is None: 1077db96d56Sopenharmony_ci dt = datetime.datetime.utcnow() 1087db96d56Sopenharmony_ci else: 1097db96d56Sopenharmony_ci dt = datetime.datetime.utcfromtimestamp(t) 1107db96d56Sopenharmony_ci return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( 1117db96d56Sopenharmony_ci dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second) 1127db96d56Sopenharmony_ci 1137db96d56Sopenharmony_cidef time2netscape(t=None): 1147db96d56Sopenharmony_ci """Return a string representing time in seconds since epoch, t. 1157db96d56Sopenharmony_ci 1167db96d56Sopenharmony_ci If the function is called without an argument, it will use the current 1177db96d56Sopenharmony_ci time. 1187db96d56Sopenharmony_ci 1197db96d56Sopenharmony_ci The format of the returned string is like this: 1207db96d56Sopenharmony_ci 1217db96d56Sopenharmony_ci Wed, DD-Mon-YYYY HH:MM:SS GMT 1227db96d56Sopenharmony_ci 1237db96d56Sopenharmony_ci """ 1247db96d56Sopenharmony_ci if t is None: 1257db96d56Sopenharmony_ci dt = datetime.datetime.utcnow() 1267db96d56Sopenharmony_ci else: 1277db96d56Sopenharmony_ci dt = datetime.datetime.utcfromtimestamp(t) 1287db96d56Sopenharmony_ci return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % ( 1297db96d56Sopenharmony_ci DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1], 1307db96d56Sopenharmony_ci dt.year, dt.hour, dt.minute, dt.second) 1317db96d56Sopenharmony_ci 1327db96d56Sopenharmony_ci 1337db96d56Sopenharmony_ciUTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} 1347db96d56Sopenharmony_ci 1357db96d56Sopenharmony_ciTIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII) 1367db96d56Sopenharmony_cidef offset_from_tz_string(tz): 1377db96d56Sopenharmony_ci offset = None 1387db96d56Sopenharmony_ci if tz in UTC_ZONES: 1397db96d56Sopenharmony_ci offset = 0 1407db96d56Sopenharmony_ci else: 1417db96d56Sopenharmony_ci m = TIMEZONE_RE.search(tz) 1427db96d56Sopenharmony_ci if m: 1437db96d56Sopenharmony_ci offset = 3600 * int(m.group(2)) 1447db96d56Sopenharmony_ci if m.group(3): 1457db96d56Sopenharmony_ci offset = offset + 60 * int(m.group(3)) 1467db96d56Sopenharmony_ci if m.group(1) == '-': 1477db96d56Sopenharmony_ci offset = -offset 1487db96d56Sopenharmony_ci return offset 1497db96d56Sopenharmony_ci 1507db96d56Sopenharmony_cidef _str2time(day, mon, yr, hr, min, sec, tz): 1517db96d56Sopenharmony_ci yr = int(yr) 1527db96d56Sopenharmony_ci if yr > datetime.MAXYEAR: 1537db96d56Sopenharmony_ci return None 1547db96d56Sopenharmony_ci 1557db96d56Sopenharmony_ci # translate month name to number 1567db96d56Sopenharmony_ci # month numbers start with 1 (January) 1577db96d56Sopenharmony_ci try: 1587db96d56Sopenharmony_ci mon = MONTHS_LOWER.index(mon.lower())+1 1597db96d56Sopenharmony_ci except ValueError: 1607db96d56Sopenharmony_ci # maybe it's already a number 1617db96d56Sopenharmony_ci try: 1627db96d56Sopenharmony_ci imon = int(mon) 1637db96d56Sopenharmony_ci except ValueError: 1647db96d56Sopenharmony_ci return None 1657db96d56Sopenharmony_ci if 1 <= imon <= 12: 1667db96d56Sopenharmony_ci mon = imon 1677db96d56Sopenharmony_ci else: 1687db96d56Sopenharmony_ci return None 1697db96d56Sopenharmony_ci 1707db96d56Sopenharmony_ci # make sure clock elements are defined 1717db96d56Sopenharmony_ci if hr is None: hr = 0 1727db96d56Sopenharmony_ci if min is None: min = 0 1737db96d56Sopenharmony_ci if sec is None: sec = 0 1747db96d56Sopenharmony_ci 1757db96d56Sopenharmony_ci day = int(day) 1767db96d56Sopenharmony_ci hr = int(hr) 1777db96d56Sopenharmony_ci min = int(min) 1787db96d56Sopenharmony_ci sec = int(sec) 1797db96d56Sopenharmony_ci 1807db96d56Sopenharmony_ci if yr < 1000: 1817db96d56Sopenharmony_ci # find "obvious" year 1827db96d56Sopenharmony_ci cur_yr = time.localtime(time.time())[0] 1837db96d56Sopenharmony_ci m = cur_yr % 100 1847db96d56Sopenharmony_ci tmp = yr 1857db96d56Sopenharmony_ci yr = yr + cur_yr - m 1867db96d56Sopenharmony_ci m = m - tmp 1877db96d56Sopenharmony_ci if abs(m) > 50: 1887db96d56Sopenharmony_ci if m > 0: yr = yr + 100 1897db96d56Sopenharmony_ci else: yr = yr - 100 1907db96d56Sopenharmony_ci 1917db96d56Sopenharmony_ci # convert UTC time tuple to seconds since epoch (not timezone-adjusted) 1927db96d56Sopenharmony_ci t = _timegm((yr, mon, day, hr, min, sec, tz)) 1937db96d56Sopenharmony_ci 1947db96d56Sopenharmony_ci if t is not None: 1957db96d56Sopenharmony_ci # adjust time using timezone string, to get absolute time since epoch 1967db96d56Sopenharmony_ci if tz is None: 1977db96d56Sopenharmony_ci tz = "UTC" 1987db96d56Sopenharmony_ci tz = tz.upper() 1997db96d56Sopenharmony_ci offset = offset_from_tz_string(tz) 2007db96d56Sopenharmony_ci if offset is None: 2017db96d56Sopenharmony_ci return None 2027db96d56Sopenharmony_ci t = t - offset 2037db96d56Sopenharmony_ci 2047db96d56Sopenharmony_ci return t 2057db96d56Sopenharmony_ci 2067db96d56Sopenharmony_ciSTRICT_DATE_RE = re.compile( 2077db96d56Sopenharmony_ci r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " 2087db96d56Sopenharmony_ci r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII) 2097db96d56Sopenharmony_ciWEEKDAY_RE = re.compile( 2107db96d56Sopenharmony_ci r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII) 2117db96d56Sopenharmony_ciLOOSE_HTTP_DATE_RE = re.compile( 2127db96d56Sopenharmony_ci r"""^ 2137db96d56Sopenharmony_ci (\d\d?) # day 2147db96d56Sopenharmony_ci (?:\s+|[-\/]) 2157db96d56Sopenharmony_ci (\w+) # month 2167db96d56Sopenharmony_ci (?:\s+|[-\/]) 2177db96d56Sopenharmony_ci (\d+) # year 2187db96d56Sopenharmony_ci (?: 2197db96d56Sopenharmony_ci (?:\s+|:) # separator before clock 2207db96d56Sopenharmony_ci (\d\d?):(\d\d) # hour:min 2217db96d56Sopenharmony_ci (?::(\d\d))? # optional seconds 2227db96d56Sopenharmony_ci )? # optional clock 2237db96d56Sopenharmony_ci \s* 2247db96d56Sopenharmony_ci (?: 2257db96d56Sopenharmony_ci ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+) # timezone 2267db96d56Sopenharmony_ci \s* 2277db96d56Sopenharmony_ci )? 2287db96d56Sopenharmony_ci (?: 2297db96d56Sopenharmony_ci \(\w+\) # ASCII representation of timezone in parens. 2307db96d56Sopenharmony_ci \s* 2317db96d56Sopenharmony_ci )?$""", re.X | re.ASCII) 2327db96d56Sopenharmony_cidef http2time(text): 2337db96d56Sopenharmony_ci """Returns time in seconds since epoch of time represented by a string. 2347db96d56Sopenharmony_ci 2357db96d56Sopenharmony_ci Return value is an integer. 2367db96d56Sopenharmony_ci 2377db96d56Sopenharmony_ci None is returned if the format of str is unrecognized, the time is outside 2387db96d56Sopenharmony_ci the representable range, or the timezone string is not recognized. If the 2397db96d56Sopenharmony_ci string contains no timezone, UTC is assumed. 2407db96d56Sopenharmony_ci 2417db96d56Sopenharmony_ci The timezone in the string may be numerical (like "-0800" or "+0100") or a 2427db96d56Sopenharmony_ci string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the 2437db96d56Sopenharmony_ci timezone strings equivalent to UTC (zero offset) are known to the function. 2447db96d56Sopenharmony_ci 2457db96d56Sopenharmony_ci The function loosely parses the following formats: 2467db96d56Sopenharmony_ci 2477db96d56Sopenharmony_ci Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format 2487db96d56Sopenharmony_ci Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format 2497db96d56Sopenharmony_ci Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format 2507db96d56Sopenharmony_ci 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) 2517db96d56Sopenharmony_ci 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) 2527db96d56Sopenharmony_ci 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) 2537db96d56Sopenharmony_ci 2547db96d56Sopenharmony_ci The parser ignores leading and trailing whitespace. The time may be 2557db96d56Sopenharmony_ci absent. 2567db96d56Sopenharmony_ci 2577db96d56Sopenharmony_ci If the year is given with only 2 digits, the function will select the 2587db96d56Sopenharmony_ci century that makes the year closest to the current date. 2597db96d56Sopenharmony_ci 2607db96d56Sopenharmony_ci """ 2617db96d56Sopenharmony_ci # fast exit for strictly conforming string 2627db96d56Sopenharmony_ci m = STRICT_DATE_RE.search(text) 2637db96d56Sopenharmony_ci if m: 2647db96d56Sopenharmony_ci g = m.groups() 2657db96d56Sopenharmony_ci mon = MONTHS_LOWER.index(g[1].lower()) + 1 2667db96d56Sopenharmony_ci tt = (int(g[2]), mon, int(g[0]), 2677db96d56Sopenharmony_ci int(g[3]), int(g[4]), float(g[5])) 2687db96d56Sopenharmony_ci return _timegm(tt) 2697db96d56Sopenharmony_ci 2707db96d56Sopenharmony_ci # No, we need some messy parsing... 2717db96d56Sopenharmony_ci 2727db96d56Sopenharmony_ci # clean up 2737db96d56Sopenharmony_ci text = text.lstrip() 2747db96d56Sopenharmony_ci text = WEEKDAY_RE.sub("", text, 1) # Useless weekday 2757db96d56Sopenharmony_ci 2767db96d56Sopenharmony_ci # tz is time zone specifier string 2777db96d56Sopenharmony_ci day, mon, yr, hr, min, sec, tz = [None]*7 2787db96d56Sopenharmony_ci 2797db96d56Sopenharmony_ci # loose regexp parse 2807db96d56Sopenharmony_ci m = LOOSE_HTTP_DATE_RE.search(text) 2817db96d56Sopenharmony_ci if m is not None: 2827db96d56Sopenharmony_ci day, mon, yr, hr, min, sec, tz = m.groups() 2837db96d56Sopenharmony_ci else: 2847db96d56Sopenharmony_ci return None # bad format 2857db96d56Sopenharmony_ci 2867db96d56Sopenharmony_ci return _str2time(day, mon, yr, hr, min, sec, tz) 2877db96d56Sopenharmony_ci 2887db96d56Sopenharmony_ciISO_DATE_RE = re.compile( 2897db96d56Sopenharmony_ci r"""^ 2907db96d56Sopenharmony_ci (\d{4}) # year 2917db96d56Sopenharmony_ci [-\/]? 2927db96d56Sopenharmony_ci (\d\d?) # numerical month 2937db96d56Sopenharmony_ci [-\/]? 2947db96d56Sopenharmony_ci (\d\d?) # day 2957db96d56Sopenharmony_ci (?: 2967db96d56Sopenharmony_ci (?:\s+|[-:Tt]) # separator before clock 2977db96d56Sopenharmony_ci (\d\d?):?(\d\d) # hour:min 2987db96d56Sopenharmony_ci (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) 2997db96d56Sopenharmony_ci )? # optional clock 3007db96d56Sopenharmony_ci \s* 3017db96d56Sopenharmony_ci (?: 3027db96d56Sopenharmony_ci ([-+]?\d\d?:?(:?\d\d)? 3037db96d56Sopenharmony_ci |Z|z) # timezone (Z is "zero meridian", i.e. GMT) 3047db96d56Sopenharmony_ci \s* 3057db96d56Sopenharmony_ci )?$""", re.X | re. ASCII) 3067db96d56Sopenharmony_cidef iso2time(text): 3077db96d56Sopenharmony_ci """ 3087db96d56Sopenharmony_ci As for http2time, but parses the ISO 8601 formats: 3097db96d56Sopenharmony_ci 3107db96d56Sopenharmony_ci 1994-02-03 14:15:29 -0100 -- ISO 8601 format 3117db96d56Sopenharmony_ci 1994-02-03 14:15:29 -- zone is optional 3127db96d56Sopenharmony_ci 1994-02-03 -- only date 3137db96d56Sopenharmony_ci 1994-02-03T14:15:29 -- Use T as separator 3147db96d56Sopenharmony_ci 19940203T141529Z -- ISO 8601 compact format 3157db96d56Sopenharmony_ci 19940203 -- only date 3167db96d56Sopenharmony_ci 3177db96d56Sopenharmony_ci """ 3187db96d56Sopenharmony_ci # clean up 3197db96d56Sopenharmony_ci text = text.lstrip() 3207db96d56Sopenharmony_ci 3217db96d56Sopenharmony_ci # tz is time zone specifier string 3227db96d56Sopenharmony_ci day, mon, yr, hr, min, sec, tz = [None]*7 3237db96d56Sopenharmony_ci 3247db96d56Sopenharmony_ci # loose regexp parse 3257db96d56Sopenharmony_ci m = ISO_DATE_RE.search(text) 3267db96d56Sopenharmony_ci if m is not None: 3277db96d56Sopenharmony_ci # XXX there's an extra bit of the timezone I'm ignoring here: is 3287db96d56Sopenharmony_ci # this the right thing to do? 3297db96d56Sopenharmony_ci yr, mon, day, hr, min, sec, tz, _ = m.groups() 3307db96d56Sopenharmony_ci else: 3317db96d56Sopenharmony_ci return None # bad format 3327db96d56Sopenharmony_ci 3337db96d56Sopenharmony_ci return _str2time(day, mon, yr, hr, min, sec, tz) 3347db96d56Sopenharmony_ci 3357db96d56Sopenharmony_ci 3367db96d56Sopenharmony_ci# Header parsing 3377db96d56Sopenharmony_ci# ----------------------------------------------------------------------------- 3387db96d56Sopenharmony_ci 3397db96d56Sopenharmony_cidef unmatched(match): 3407db96d56Sopenharmony_ci """Return unmatched part of re.Match object.""" 3417db96d56Sopenharmony_ci start, end = match.span(0) 3427db96d56Sopenharmony_ci return match.string[:start]+match.string[end:] 3437db96d56Sopenharmony_ci 3447db96d56Sopenharmony_ciHEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") 3457db96d56Sopenharmony_ciHEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") 3467db96d56Sopenharmony_ciHEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") 3477db96d56Sopenharmony_ciHEADER_ESCAPE_RE = re.compile(r"\\(.)") 3487db96d56Sopenharmony_cidef split_header_words(header_values): 3497db96d56Sopenharmony_ci r"""Parse header values into a list of lists containing key,value pairs. 3507db96d56Sopenharmony_ci 3517db96d56Sopenharmony_ci The function knows how to deal with ",", ";" and "=" as well as quoted 3527db96d56Sopenharmony_ci values after "=". A list of space separated tokens are parsed as if they 3537db96d56Sopenharmony_ci were separated by ";". 3547db96d56Sopenharmony_ci 3557db96d56Sopenharmony_ci If the header_values passed as argument contains multiple values, then they 3567db96d56Sopenharmony_ci are treated as if they were a single value separated by comma ",". 3577db96d56Sopenharmony_ci 3587db96d56Sopenharmony_ci This means that this function is useful for parsing header fields that 3597db96d56Sopenharmony_ci follow this syntax (BNF as from the HTTP/1.1 specification, but we relax 3607db96d56Sopenharmony_ci the requirement for tokens). 3617db96d56Sopenharmony_ci 3627db96d56Sopenharmony_ci headers = #header 3637db96d56Sopenharmony_ci header = (token | parameter) *( [";"] (token | parameter)) 3647db96d56Sopenharmony_ci 3657db96d56Sopenharmony_ci token = 1*<any CHAR except CTLs or separators> 3667db96d56Sopenharmony_ci separators = "(" | ")" | "<" | ">" | "@" 3677db96d56Sopenharmony_ci | "," | ";" | ":" | "\" | <"> 3687db96d56Sopenharmony_ci | "/" | "[" | "]" | "?" | "=" 3697db96d56Sopenharmony_ci | "{" | "}" | SP | HT 3707db96d56Sopenharmony_ci 3717db96d56Sopenharmony_ci quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) 3727db96d56Sopenharmony_ci qdtext = <any TEXT except <">> 3737db96d56Sopenharmony_ci quoted-pair = "\" CHAR 3747db96d56Sopenharmony_ci 3757db96d56Sopenharmony_ci parameter = attribute "=" value 3767db96d56Sopenharmony_ci attribute = token 3777db96d56Sopenharmony_ci value = token | quoted-string 3787db96d56Sopenharmony_ci 3797db96d56Sopenharmony_ci Each header is represented by a list of key/value pairs. The value for a 3807db96d56Sopenharmony_ci simple token (not part of a parameter) is None. Syntactically incorrect 3817db96d56Sopenharmony_ci headers will not necessarily be parsed as you would want. 3827db96d56Sopenharmony_ci 3837db96d56Sopenharmony_ci This is easier to describe with some examples: 3847db96d56Sopenharmony_ci 3857db96d56Sopenharmony_ci >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) 3867db96d56Sopenharmony_ci [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] 3877db96d56Sopenharmony_ci >>> split_header_words(['text/html; charset="iso-8859-1"']) 3887db96d56Sopenharmony_ci [[('text/html', None), ('charset', 'iso-8859-1')]] 3897db96d56Sopenharmony_ci >>> split_header_words([r'Basic realm="\"foo\bar\""']) 3907db96d56Sopenharmony_ci [[('Basic', None), ('realm', '"foobar"')]] 3917db96d56Sopenharmony_ci 3927db96d56Sopenharmony_ci """ 3937db96d56Sopenharmony_ci assert not isinstance(header_values, str) 3947db96d56Sopenharmony_ci result = [] 3957db96d56Sopenharmony_ci for text in header_values: 3967db96d56Sopenharmony_ci orig_text = text 3977db96d56Sopenharmony_ci pairs = [] 3987db96d56Sopenharmony_ci while text: 3997db96d56Sopenharmony_ci m = HEADER_TOKEN_RE.search(text) 4007db96d56Sopenharmony_ci if m: 4017db96d56Sopenharmony_ci text = unmatched(m) 4027db96d56Sopenharmony_ci name = m.group(1) 4037db96d56Sopenharmony_ci m = HEADER_QUOTED_VALUE_RE.search(text) 4047db96d56Sopenharmony_ci if m: # quoted value 4057db96d56Sopenharmony_ci text = unmatched(m) 4067db96d56Sopenharmony_ci value = m.group(1) 4077db96d56Sopenharmony_ci value = HEADER_ESCAPE_RE.sub(r"\1", value) 4087db96d56Sopenharmony_ci else: 4097db96d56Sopenharmony_ci m = HEADER_VALUE_RE.search(text) 4107db96d56Sopenharmony_ci if m: # unquoted value 4117db96d56Sopenharmony_ci text = unmatched(m) 4127db96d56Sopenharmony_ci value = m.group(1) 4137db96d56Sopenharmony_ci value = value.rstrip() 4147db96d56Sopenharmony_ci else: 4157db96d56Sopenharmony_ci # no value, a lone token 4167db96d56Sopenharmony_ci value = None 4177db96d56Sopenharmony_ci pairs.append((name, value)) 4187db96d56Sopenharmony_ci elif text.lstrip().startswith(","): 4197db96d56Sopenharmony_ci # concatenated headers, as per RFC 2616 section 4.2 4207db96d56Sopenharmony_ci text = text.lstrip()[1:] 4217db96d56Sopenharmony_ci if pairs: result.append(pairs) 4227db96d56Sopenharmony_ci pairs = [] 4237db96d56Sopenharmony_ci else: 4247db96d56Sopenharmony_ci # skip junk 4257db96d56Sopenharmony_ci non_junk, nr_junk_chars = re.subn(r"^[=\s;]*", "", text) 4267db96d56Sopenharmony_ci assert nr_junk_chars > 0, ( 4277db96d56Sopenharmony_ci "split_header_words bug: '%s', '%s', %s" % 4287db96d56Sopenharmony_ci (orig_text, text, pairs)) 4297db96d56Sopenharmony_ci text = non_junk 4307db96d56Sopenharmony_ci if pairs: result.append(pairs) 4317db96d56Sopenharmony_ci return result 4327db96d56Sopenharmony_ci 4337db96d56Sopenharmony_ciHEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") 4347db96d56Sopenharmony_cidef join_header_words(lists): 4357db96d56Sopenharmony_ci """Do the inverse (almost) of the conversion done by split_header_words. 4367db96d56Sopenharmony_ci 4377db96d56Sopenharmony_ci Takes a list of lists of (key, value) pairs and produces a single header 4387db96d56Sopenharmony_ci value. Attribute values are quoted if needed. 4397db96d56Sopenharmony_ci 4407db96d56Sopenharmony_ci >>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]]) 4417db96d56Sopenharmony_ci 'text/plain; charset="iso-8859-1"' 4427db96d56Sopenharmony_ci >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]]) 4437db96d56Sopenharmony_ci 'text/plain, charset="iso-8859-1"' 4447db96d56Sopenharmony_ci 4457db96d56Sopenharmony_ci """ 4467db96d56Sopenharmony_ci headers = [] 4477db96d56Sopenharmony_ci for pairs in lists: 4487db96d56Sopenharmony_ci attr = [] 4497db96d56Sopenharmony_ci for k, v in pairs: 4507db96d56Sopenharmony_ci if v is not None: 4517db96d56Sopenharmony_ci if not re.search(r"^\w+$", v): 4527db96d56Sopenharmony_ci v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ 4537db96d56Sopenharmony_ci v = '"%s"' % v 4547db96d56Sopenharmony_ci k = "%s=%s" % (k, v) 4557db96d56Sopenharmony_ci attr.append(k) 4567db96d56Sopenharmony_ci if attr: headers.append("; ".join(attr)) 4577db96d56Sopenharmony_ci return ", ".join(headers) 4587db96d56Sopenharmony_ci 4597db96d56Sopenharmony_cidef strip_quotes(text): 4607db96d56Sopenharmony_ci if text.startswith('"'): 4617db96d56Sopenharmony_ci text = text[1:] 4627db96d56Sopenharmony_ci if text.endswith('"'): 4637db96d56Sopenharmony_ci text = text[:-1] 4647db96d56Sopenharmony_ci return text 4657db96d56Sopenharmony_ci 4667db96d56Sopenharmony_cidef parse_ns_headers(ns_headers): 4677db96d56Sopenharmony_ci """Ad-hoc parser for Netscape protocol cookie-attributes. 4687db96d56Sopenharmony_ci 4697db96d56Sopenharmony_ci The old Netscape cookie format for Set-Cookie can for instance contain 4707db96d56Sopenharmony_ci an unquoted "," in the expires field, so we have to use this ad-hoc 4717db96d56Sopenharmony_ci parser instead of split_header_words. 4727db96d56Sopenharmony_ci 4737db96d56Sopenharmony_ci XXX This may not make the best possible effort to parse all the crap 4747db96d56Sopenharmony_ci that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient 4757db96d56Sopenharmony_ci parser is probably better, so could do worse than following that if 4767db96d56Sopenharmony_ci this ever gives any trouble. 4777db96d56Sopenharmony_ci 4787db96d56Sopenharmony_ci Currently, this is also used for parsing RFC 2109 cookies. 4797db96d56Sopenharmony_ci 4807db96d56Sopenharmony_ci """ 4817db96d56Sopenharmony_ci known_attrs = ("expires", "domain", "path", "secure", 4827db96d56Sopenharmony_ci # RFC 2109 attrs (may turn up in Netscape cookies, too) 4837db96d56Sopenharmony_ci "version", "port", "max-age") 4847db96d56Sopenharmony_ci 4857db96d56Sopenharmony_ci result = [] 4867db96d56Sopenharmony_ci for ns_header in ns_headers: 4877db96d56Sopenharmony_ci pairs = [] 4887db96d56Sopenharmony_ci version_set = False 4897db96d56Sopenharmony_ci 4907db96d56Sopenharmony_ci # XXX: The following does not strictly adhere to RFCs in that empty 4917db96d56Sopenharmony_ci # names and values are legal (the former will only appear once and will 4927db96d56Sopenharmony_ci # be overwritten if multiple occurrences are present). This is 4937db96d56Sopenharmony_ci # mostly to deal with backwards compatibility. 4947db96d56Sopenharmony_ci for ii, param in enumerate(ns_header.split(';')): 4957db96d56Sopenharmony_ci param = param.strip() 4967db96d56Sopenharmony_ci 4977db96d56Sopenharmony_ci key, sep, val = param.partition('=') 4987db96d56Sopenharmony_ci key = key.strip() 4997db96d56Sopenharmony_ci 5007db96d56Sopenharmony_ci if not key: 5017db96d56Sopenharmony_ci if ii == 0: 5027db96d56Sopenharmony_ci break 5037db96d56Sopenharmony_ci else: 5047db96d56Sopenharmony_ci continue 5057db96d56Sopenharmony_ci 5067db96d56Sopenharmony_ci # allow for a distinction between present and empty and missing 5077db96d56Sopenharmony_ci # altogether 5087db96d56Sopenharmony_ci val = val.strip() if sep else None 5097db96d56Sopenharmony_ci 5107db96d56Sopenharmony_ci if ii != 0: 5117db96d56Sopenharmony_ci lc = key.lower() 5127db96d56Sopenharmony_ci if lc in known_attrs: 5137db96d56Sopenharmony_ci key = lc 5147db96d56Sopenharmony_ci 5157db96d56Sopenharmony_ci if key == "version": 5167db96d56Sopenharmony_ci # This is an RFC 2109 cookie. 5177db96d56Sopenharmony_ci if val is not None: 5187db96d56Sopenharmony_ci val = strip_quotes(val) 5197db96d56Sopenharmony_ci version_set = True 5207db96d56Sopenharmony_ci elif key == "expires": 5217db96d56Sopenharmony_ci # convert expires date to seconds since epoch 5227db96d56Sopenharmony_ci if val is not None: 5237db96d56Sopenharmony_ci val = http2time(strip_quotes(val)) # None if invalid 5247db96d56Sopenharmony_ci pairs.append((key, val)) 5257db96d56Sopenharmony_ci 5267db96d56Sopenharmony_ci if pairs: 5277db96d56Sopenharmony_ci if not version_set: 5287db96d56Sopenharmony_ci pairs.append(("version", "0")) 5297db96d56Sopenharmony_ci result.append(pairs) 5307db96d56Sopenharmony_ci 5317db96d56Sopenharmony_ci return result 5327db96d56Sopenharmony_ci 5337db96d56Sopenharmony_ci 5347db96d56Sopenharmony_ciIPV4_RE = re.compile(r"\.\d+$", re.ASCII) 5357db96d56Sopenharmony_cidef is_HDN(text): 5367db96d56Sopenharmony_ci """Return True if text is a host domain name.""" 5377db96d56Sopenharmony_ci # XXX 5387db96d56Sopenharmony_ci # This may well be wrong. Which RFC is HDN defined in, if any (for 5397db96d56Sopenharmony_ci # the purposes of RFC 2965)? 5407db96d56Sopenharmony_ci # For the current implementation, what about IPv6? Remember to look 5417db96d56Sopenharmony_ci # at other uses of IPV4_RE also, if change this. 5427db96d56Sopenharmony_ci if IPV4_RE.search(text): 5437db96d56Sopenharmony_ci return False 5447db96d56Sopenharmony_ci if text == "": 5457db96d56Sopenharmony_ci return False 5467db96d56Sopenharmony_ci if text[0] == "." or text[-1] == ".": 5477db96d56Sopenharmony_ci return False 5487db96d56Sopenharmony_ci return True 5497db96d56Sopenharmony_ci 5507db96d56Sopenharmony_cidef domain_match(A, B): 5517db96d56Sopenharmony_ci """Return True if domain A domain-matches domain B, according to RFC 2965. 5527db96d56Sopenharmony_ci 5537db96d56Sopenharmony_ci A and B may be host domain names or IP addresses. 5547db96d56Sopenharmony_ci 5557db96d56Sopenharmony_ci RFC 2965, section 1: 5567db96d56Sopenharmony_ci 5577db96d56Sopenharmony_ci Host names can be specified either as an IP address or a HDN string. 5587db96d56Sopenharmony_ci Sometimes we compare one host name with another. (Such comparisons SHALL 5597db96d56Sopenharmony_ci be case-insensitive.) Host A's name domain-matches host B's if 5607db96d56Sopenharmony_ci 5617db96d56Sopenharmony_ci * their host name strings string-compare equal; or 5627db96d56Sopenharmony_ci 5637db96d56Sopenharmony_ci * A is a HDN string and has the form NB, where N is a non-empty 5647db96d56Sopenharmony_ci name string, B has the form .B', and B' is a HDN string. (So, 5657db96d56Sopenharmony_ci x.y.com domain-matches .Y.com but not Y.com.) 5667db96d56Sopenharmony_ci 5677db96d56Sopenharmony_ci Note that domain-match is not a commutative operation: a.b.c.com 5687db96d56Sopenharmony_ci domain-matches .c.com, but not the reverse. 5697db96d56Sopenharmony_ci 5707db96d56Sopenharmony_ci """ 5717db96d56Sopenharmony_ci # Note that, if A or B are IP addresses, the only relevant part of the 5727db96d56Sopenharmony_ci # definition of the domain-match algorithm is the direct string-compare. 5737db96d56Sopenharmony_ci A = A.lower() 5747db96d56Sopenharmony_ci B = B.lower() 5757db96d56Sopenharmony_ci if A == B: 5767db96d56Sopenharmony_ci return True 5777db96d56Sopenharmony_ci if not is_HDN(A): 5787db96d56Sopenharmony_ci return False 5797db96d56Sopenharmony_ci i = A.rfind(B) 5807db96d56Sopenharmony_ci if i == -1 or i == 0: 5817db96d56Sopenharmony_ci # A does not have form NB, or N is the empty string 5827db96d56Sopenharmony_ci return False 5837db96d56Sopenharmony_ci if not B.startswith("."): 5847db96d56Sopenharmony_ci return False 5857db96d56Sopenharmony_ci if not is_HDN(B[1:]): 5867db96d56Sopenharmony_ci return False 5877db96d56Sopenharmony_ci return True 5887db96d56Sopenharmony_ci 5897db96d56Sopenharmony_cidef liberal_is_HDN(text): 5907db96d56Sopenharmony_ci """Return True if text is a sort-of-like a host domain name. 5917db96d56Sopenharmony_ci 5927db96d56Sopenharmony_ci For accepting/blocking domains. 5937db96d56Sopenharmony_ci 5947db96d56Sopenharmony_ci """ 5957db96d56Sopenharmony_ci if IPV4_RE.search(text): 5967db96d56Sopenharmony_ci return False 5977db96d56Sopenharmony_ci return True 5987db96d56Sopenharmony_ci 5997db96d56Sopenharmony_cidef user_domain_match(A, B): 6007db96d56Sopenharmony_ci """For blocking/accepting domains. 6017db96d56Sopenharmony_ci 6027db96d56Sopenharmony_ci A and B may be host domain names or IP addresses. 6037db96d56Sopenharmony_ci 6047db96d56Sopenharmony_ci """ 6057db96d56Sopenharmony_ci A = A.lower() 6067db96d56Sopenharmony_ci B = B.lower() 6077db96d56Sopenharmony_ci if not (liberal_is_HDN(A) and liberal_is_HDN(B)): 6087db96d56Sopenharmony_ci if A == B: 6097db96d56Sopenharmony_ci # equal IP addresses 6107db96d56Sopenharmony_ci return True 6117db96d56Sopenharmony_ci return False 6127db96d56Sopenharmony_ci initial_dot = B.startswith(".") 6137db96d56Sopenharmony_ci if initial_dot and A.endswith(B): 6147db96d56Sopenharmony_ci return True 6157db96d56Sopenharmony_ci if not initial_dot and A == B: 6167db96d56Sopenharmony_ci return True 6177db96d56Sopenharmony_ci return False 6187db96d56Sopenharmony_ci 6197db96d56Sopenharmony_cicut_port_re = re.compile(r":\d+$", re.ASCII) 6207db96d56Sopenharmony_cidef request_host(request): 6217db96d56Sopenharmony_ci """Return request-host, as defined by RFC 2965. 6227db96d56Sopenharmony_ci 6237db96d56Sopenharmony_ci Variation from RFC: returned value is lowercased, for convenient 6247db96d56Sopenharmony_ci comparison. 6257db96d56Sopenharmony_ci 6267db96d56Sopenharmony_ci """ 6277db96d56Sopenharmony_ci url = request.get_full_url() 6287db96d56Sopenharmony_ci host = urllib.parse.urlparse(url)[1] 6297db96d56Sopenharmony_ci if host == "": 6307db96d56Sopenharmony_ci host = request.get_header("Host", "") 6317db96d56Sopenharmony_ci 6327db96d56Sopenharmony_ci # remove port, if present 6337db96d56Sopenharmony_ci host = cut_port_re.sub("", host, 1) 6347db96d56Sopenharmony_ci return host.lower() 6357db96d56Sopenharmony_ci 6367db96d56Sopenharmony_cidef eff_request_host(request): 6377db96d56Sopenharmony_ci """Return a tuple (request-host, effective request-host name). 6387db96d56Sopenharmony_ci 6397db96d56Sopenharmony_ci As defined by RFC 2965, except both are lowercased. 6407db96d56Sopenharmony_ci 6417db96d56Sopenharmony_ci """ 6427db96d56Sopenharmony_ci erhn = req_host = request_host(request) 6437db96d56Sopenharmony_ci if req_host.find(".") == -1 and not IPV4_RE.search(req_host): 6447db96d56Sopenharmony_ci erhn = req_host + ".local" 6457db96d56Sopenharmony_ci return req_host, erhn 6467db96d56Sopenharmony_ci 6477db96d56Sopenharmony_cidef request_path(request): 6487db96d56Sopenharmony_ci """Path component of request-URI, as defined by RFC 2965.""" 6497db96d56Sopenharmony_ci url = request.get_full_url() 6507db96d56Sopenharmony_ci parts = urllib.parse.urlsplit(url) 6517db96d56Sopenharmony_ci path = escape_path(parts.path) 6527db96d56Sopenharmony_ci if not path.startswith("/"): 6537db96d56Sopenharmony_ci # fix bad RFC 2396 absoluteURI 6547db96d56Sopenharmony_ci path = "/" + path 6557db96d56Sopenharmony_ci return path 6567db96d56Sopenharmony_ci 6577db96d56Sopenharmony_cidef request_port(request): 6587db96d56Sopenharmony_ci host = request.host 6597db96d56Sopenharmony_ci i = host.find(':') 6607db96d56Sopenharmony_ci if i >= 0: 6617db96d56Sopenharmony_ci port = host[i+1:] 6627db96d56Sopenharmony_ci try: 6637db96d56Sopenharmony_ci int(port) 6647db96d56Sopenharmony_ci except ValueError: 6657db96d56Sopenharmony_ci _debug("nonnumeric port: '%s'", port) 6667db96d56Sopenharmony_ci return None 6677db96d56Sopenharmony_ci else: 6687db96d56Sopenharmony_ci port = DEFAULT_HTTP_PORT 6697db96d56Sopenharmony_ci return port 6707db96d56Sopenharmony_ci 6717db96d56Sopenharmony_ci# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't 6727db96d56Sopenharmony_ci# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). 6737db96d56Sopenharmony_ciHTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" 6747db96d56Sopenharmony_ciESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") 6757db96d56Sopenharmony_cidef uppercase_escaped_char(match): 6767db96d56Sopenharmony_ci return "%%%s" % match.group(1).upper() 6777db96d56Sopenharmony_cidef escape_path(path): 6787db96d56Sopenharmony_ci """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" 6797db96d56Sopenharmony_ci # There's no knowing what character encoding was used to create URLs 6807db96d56Sopenharmony_ci # containing %-escapes, but since we have to pick one to escape invalid 6817db96d56Sopenharmony_ci # path characters, we pick UTF-8, as recommended in the HTML 4.0 6827db96d56Sopenharmony_ci # specification: 6837db96d56Sopenharmony_ci # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 6847db96d56Sopenharmony_ci # And here, kind of: draft-fielding-uri-rfc2396bis-03 6857db96d56Sopenharmony_ci # (And in draft IRI specification: draft-duerst-iri-05) 6867db96d56Sopenharmony_ci # (And here, for new URI schemes: RFC 2718) 6877db96d56Sopenharmony_ci path = urllib.parse.quote(path, HTTP_PATH_SAFE) 6887db96d56Sopenharmony_ci path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) 6897db96d56Sopenharmony_ci return path 6907db96d56Sopenharmony_ci 6917db96d56Sopenharmony_cidef reach(h): 6927db96d56Sopenharmony_ci """Return reach of host h, as defined by RFC 2965, section 1. 6937db96d56Sopenharmony_ci 6947db96d56Sopenharmony_ci The reach R of a host name H is defined as follows: 6957db96d56Sopenharmony_ci 6967db96d56Sopenharmony_ci * If 6977db96d56Sopenharmony_ci 6987db96d56Sopenharmony_ci - H is the host domain name of a host; and, 6997db96d56Sopenharmony_ci 7007db96d56Sopenharmony_ci - H has the form A.B; and 7017db96d56Sopenharmony_ci 7027db96d56Sopenharmony_ci - A has no embedded (that is, interior) dots; and 7037db96d56Sopenharmony_ci 7047db96d56Sopenharmony_ci - B has at least one embedded dot, or B is the string "local". 7057db96d56Sopenharmony_ci then the reach of H is .B. 7067db96d56Sopenharmony_ci 7077db96d56Sopenharmony_ci * Otherwise, the reach of H is H. 7087db96d56Sopenharmony_ci 7097db96d56Sopenharmony_ci >>> reach("www.acme.com") 7107db96d56Sopenharmony_ci '.acme.com' 7117db96d56Sopenharmony_ci >>> reach("acme.com") 7127db96d56Sopenharmony_ci 'acme.com' 7137db96d56Sopenharmony_ci >>> reach("acme.local") 7147db96d56Sopenharmony_ci '.local' 7157db96d56Sopenharmony_ci 7167db96d56Sopenharmony_ci """ 7177db96d56Sopenharmony_ci i = h.find(".") 7187db96d56Sopenharmony_ci if i >= 0: 7197db96d56Sopenharmony_ci #a = h[:i] # this line is only here to show what a is 7207db96d56Sopenharmony_ci b = h[i+1:] 7217db96d56Sopenharmony_ci i = b.find(".") 7227db96d56Sopenharmony_ci if is_HDN(h) and (i >= 0 or b == "local"): 7237db96d56Sopenharmony_ci return "."+b 7247db96d56Sopenharmony_ci return h 7257db96d56Sopenharmony_ci 7267db96d56Sopenharmony_cidef is_third_party(request): 7277db96d56Sopenharmony_ci """ 7287db96d56Sopenharmony_ci 7297db96d56Sopenharmony_ci RFC 2965, section 3.3.6: 7307db96d56Sopenharmony_ci 7317db96d56Sopenharmony_ci An unverifiable transaction is to a third-party host if its request- 7327db96d56Sopenharmony_ci host U does not domain-match the reach R of the request-host O in the 7337db96d56Sopenharmony_ci origin transaction. 7347db96d56Sopenharmony_ci 7357db96d56Sopenharmony_ci """ 7367db96d56Sopenharmony_ci req_host = request_host(request) 7377db96d56Sopenharmony_ci if not domain_match(req_host, reach(request.origin_req_host)): 7387db96d56Sopenharmony_ci return True 7397db96d56Sopenharmony_ci else: 7407db96d56Sopenharmony_ci return False 7417db96d56Sopenharmony_ci 7427db96d56Sopenharmony_ci 7437db96d56Sopenharmony_ciclass Cookie: 7447db96d56Sopenharmony_ci """HTTP Cookie. 7457db96d56Sopenharmony_ci 7467db96d56Sopenharmony_ci This class represents both Netscape and RFC 2965 cookies. 7477db96d56Sopenharmony_ci 7487db96d56Sopenharmony_ci This is deliberately a very simple class. It just holds attributes. It's 7497db96d56Sopenharmony_ci possible to construct Cookie instances that don't comply with the cookie 7507db96d56Sopenharmony_ci standards. CookieJar.make_cookies is the factory function for Cookie 7517db96d56Sopenharmony_ci objects -- it deals with cookie parsing, supplying defaults, and 7527db96d56Sopenharmony_ci normalising to the representation used in this class. CookiePolicy is 7537db96d56Sopenharmony_ci responsible for checking them to see whether they should be accepted from 7547db96d56Sopenharmony_ci and returned to the server. 7557db96d56Sopenharmony_ci 7567db96d56Sopenharmony_ci Note that the port may be present in the headers, but unspecified ("Port" 7577db96d56Sopenharmony_ci rather than"Port=80", for example); if this is the case, port is None. 7587db96d56Sopenharmony_ci 7597db96d56Sopenharmony_ci """ 7607db96d56Sopenharmony_ci 7617db96d56Sopenharmony_ci def __init__(self, version, name, value, 7627db96d56Sopenharmony_ci port, port_specified, 7637db96d56Sopenharmony_ci domain, domain_specified, domain_initial_dot, 7647db96d56Sopenharmony_ci path, path_specified, 7657db96d56Sopenharmony_ci secure, 7667db96d56Sopenharmony_ci expires, 7677db96d56Sopenharmony_ci discard, 7687db96d56Sopenharmony_ci comment, 7697db96d56Sopenharmony_ci comment_url, 7707db96d56Sopenharmony_ci rest, 7717db96d56Sopenharmony_ci rfc2109=False, 7727db96d56Sopenharmony_ci ): 7737db96d56Sopenharmony_ci 7747db96d56Sopenharmony_ci if version is not None: version = int(version) 7757db96d56Sopenharmony_ci if expires is not None: expires = int(float(expires)) 7767db96d56Sopenharmony_ci if port is None and port_specified is True: 7777db96d56Sopenharmony_ci raise ValueError("if port is None, port_specified must be false") 7787db96d56Sopenharmony_ci 7797db96d56Sopenharmony_ci self.version = version 7807db96d56Sopenharmony_ci self.name = name 7817db96d56Sopenharmony_ci self.value = value 7827db96d56Sopenharmony_ci self.port = port 7837db96d56Sopenharmony_ci self.port_specified = port_specified 7847db96d56Sopenharmony_ci # normalise case, as per RFC 2965 section 3.3.3 7857db96d56Sopenharmony_ci self.domain = domain.lower() 7867db96d56Sopenharmony_ci self.domain_specified = domain_specified 7877db96d56Sopenharmony_ci # Sigh. We need to know whether the domain given in the 7887db96d56Sopenharmony_ci # cookie-attribute had an initial dot, in order to follow RFC 2965 7897db96d56Sopenharmony_ci # (as clarified in draft errata). Needed for the returned $Domain 7907db96d56Sopenharmony_ci # value. 7917db96d56Sopenharmony_ci self.domain_initial_dot = domain_initial_dot 7927db96d56Sopenharmony_ci self.path = path 7937db96d56Sopenharmony_ci self.path_specified = path_specified 7947db96d56Sopenharmony_ci self.secure = secure 7957db96d56Sopenharmony_ci self.expires = expires 7967db96d56Sopenharmony_ci self.discard = discard 7977db96d56Sopenharmony_ci self.comment = comment 7987db96d56Sopenharmony_ci self.comment_url = comment_url 7997db96d56Sopenharmony_ci self.rfc2109 = rfc2109 8007db96d56Sopenharmony_ci 8017db96d56Sopenharmony_ci self._rest = copy.copy(rest) 8027db96d56Sopenharmony_ci 8037db96d56Sopenharmony_ci def has_nonstandard_attr(self, name): 8047db96d56Sopenharmony_ci return name in self._rest 8057db96d56Sopenharmony_ci def get_nonstandard_attr(self, name, default=None): 8067db96d56Sopenharmony_ci return self._rest.get(name, default) 8077db96d56Sopenharmony_ci def set_nonstandard_attr(self, name, value): 8087db96d56Sopenharmony_ci self._rest[name] = value 8097db96d56Sopenharmony_ci 8107db96d56Sopenharmony_ci def is_expired(self, now=None): 8117db96d56Sopenharmony_ci if now is None: now = time.time() 8127db96d56Sopenharmony_ci if (self.expires is not None) and (self.expires <= now): 8137db96d56Sopenharmony_ci return True 8147db96d56Sopenharmony_ci return False 8157db96d56Sopenharmony_ci 8167db96d56Sopenharmony_ci def __str__(self): 8177db96d56Sopenharmony_ci if self.port is None: p = "" 8187db96d56Sopenharmony_ci else: p = ":"+self.port 8197db96d56Sopenharmony_ci limit = self.domain + p + self.path 8207db96d56Sopenharmony_ci if self.value is not None: 8217db96d56Sopenharmony_ci namevalue = "%s=%s" % (self.name, self.value) 8227db96d56Sopenharmony_ci else: 8237db96d56Sopenharmony_ci namevalue = self.name 8247db96d56Sopenharmony_ci return "<Cookie %s for %s>" % (namevalue, limit) 8257db96d56Sopenharmony_ci 8267db96d56Sopenharmony_ci def __repr__(self): 8277db96d56Sopenharmony_ci args = [] 8287db96d56Sopenharmony_ci for name in ("version", "name", "value", 8297db96d56Sopenharmony_ci "port", "port_specified", 8307db96d56Sopenharmony_ci "domain", "domain_specified", "domain_initial_dot", 8317db96d56Sopenharmony_ci "path", "path_specified", 8327db96d56Sopenharmony_ci "secure", "expires", "discard", "comment", "comment_url", 8337db96d56Sopenharmony_ci ): 8347db96d56Sopenharmony_ci attr = getattr(self, name) 8357db96d56Sopenharmony_ci args.append("%s=%s" % (name, repr(attr))) 8367db96d56Sopenharmony_ci args.append("rest=%s" % repr(self._rest)) 8377db96d56Sopenharmony_ci args.append("rfc2109=%s" % repr(self.rfc2109)) 8387db96d56Sopenharmony_ci return "%s(%s)" % (self.__class__.__name__, ", ".join(args)) 8397db96d56Sopenharmony_ci 8407db96d56Sopenharmony_ci 8417db96d56Sopenharmony_ciclass CookiePolicy: 8427db96d56Sopenharmony_ci """Defines which cookies get accepted from and returned to server. 8437db96d56Sopenharmony_ci 8447db96d56Sopenharmony_ci May also modify cookies, though this is probably a bad idea. 8457db96d56Sopenharmony_ci 8467db96d56Sopenharmony_ci The subclass DefaultCookiePolicy defines the standard rules for Netscape 8477db96d56Sopenharmony_ci and RFC 2965 cookies -- override that if you want a customized policy. 8487db96d56Sopenharmony_ci 8497db96d56Sopenharmony_ci """ 8507db96d56Sopenharmony_ci def set_ok(self, cookie, request): 8517db96d56Sopenharmony_ci """Return true if (and only if) cookie should be accepted from server. 8527db96d56Sopenharmony_ci 8537db96d56Sopenharmony_ci Currently, pre-expired cookies never get this far -- the CookieJar 8547db96d56Sopenharmony_ci class deletes such cookies itself. 8557db96d56Sopenharmony_ci 8567db96d56Sopenharmony_ci """ 8577db96d56Sopenharmony_ci raise NotImplementedError() 8587db96d56Sopenharmony_ci 8597db96d56Sopenharmony_ci def return_ok(self, cookie, request): 8607db96d56Sopenharmony_ci """Return true if (and only if) cookie should be returned to server.""" 8617db96d56Sopenharmony_ci raise NotImplementedError() 8627db96d56Sopenharmony_ci 8637db96d56Sopenharmony_ci def domain_return_ok(self, domain, request): 8647db96d56Sopenharmony_ci """Return false if cookies should not be returned, given cookie domain. 8657db96d56Sopenharmony_ci """ 8667db96d56Sopenharmony_ci return True 8677db96d56Sopenharmony_ci 8687db96d56Sopenharmony_ci def path_return_ok(self, path, request): 8697db96d56Sopenharmony_ci """Return false if cookies should not be returned, given cookie path. 8707db96d56Sopenharmony_ci """ 8717db96d56Sopenharmony_ci return True 8727db96d56Sopenharmony_ci 8737db96d56Sopenharmony_ci 8747db96d56Sopenharmony_ciclass DefaultCookiePolicy(CookiePolicy): 8757db96d56Sopenharmony_ci """Implements the standard rules for accepting and returning cookies.""" 8767db96d56Sopenharmony_ci 8777db96d56Sopenharmony_ci DomainStrictNoDots = 1 8787db96d56Sopenharmony_ci DomainStrictNonDomain = 2 8797db96d56Sopenharmony_ci DomainRFC2965Match = 4 8807db96d56Sopenharmony_ci 8817db96d56Sopenharmony_ci DomainLiberal = 0 8827db96d56Sopenharmony_ci DomainStrict = DomainStrictNoDots|DomainStrictNonDomain 8837db96d56Sopenharmony_ci 8847db96d56Sopenharmony_ci def __init__(self, 8857db96d56Sopenharmony_ci blocked_domains=None, allowed_domains=None, 8867db96d56Sopenharmony_ci netscape=True, rfc2965=False, 8877db96d56Sopenharmony_ci rfc2109_as_netscape=None, 8887db96d56Sopenharmony_ci hide_cookie2=False, 8897db96d56Sopenharmony_ci strict_domain=False, 8907db96d56Sopenharmony_ci strict_rfc2965_unverifiable=True, 8917db96d56Sopenharmony_ci strict_ns_unverifiable=False, 8927db96d56Sopenharmony_ci strict_ns_domain=DomainLiberal, 8937db96d56Sopenharmony_ci strict_ns_set_initial_dollar=False, 8947db96d56Sopenharmony_ci strict_ns_set_path=False, 8957db96d56Sopenharmony_ci secure_protocols=("https", "wss") 8967db96d56Sopenharmony_ci ): 8977db96d56Sopenharmony_ci """Constructor arguments should be passed as keyword arguments only.""" 8987db96d56Sopenharmony_ci self.netscape = netscape 8997db96d56Sopenharmony_ci self.rfc2965 = rfc2965 9007db96d56Sopenharmony_ci self.rfc2109_as_netscape = rfc2109_as_netscape 9017db96d56Sopenharmony_ci self.hide_cookie2 = hide_cookie2 9027db96d56Sopenharmony_ci self.strict_domain = strict_domain 9037db96d56Sopenharmony_ci self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable 9047db96d56Sopenharmony_ci self.strict_ns_unverifiable = strict_ns_unverifiable 9057db96d56Sopenharmony_ci self.strict_ns_domain = strict_ns_domain 9067db96d56Sopenharmony_ci self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar 9077db96d56Sopenharmony_ci self.strict_ns_set_path = strict_ns_set_path 9087db96d56Sopenharmony_ci self.secure_protocols = secure_protocols 9097db96d56Sopenharmony_ci 9107db96d56Sopenharmony_ci if blocked_domains is not None: 9117db96d56Sopenharmony_ci self._blocked_domains = tuple(blocked_domains) 9127db96d56Sopenharmony_ci else: 9137db96d56Sopenharmony_ci self._blocked_domains = () 9147db96d56Sopenharmony_ci 9157db96d56Sopenharmony_ci if allowed_domains is not None: 9167db96d56Sopenharmony_ci allowed_domains = tuple(allowed_domains) 9177db96d56Sopenharmony_ci self._allowed_domains = allowed_domains 9187db96d56Sopenharmony_ci 9197db96d56Sopenharmony_ci def blocked_domains(self): 9207db96d56Sopenharmony_ci """Return the sequence of blocked domains (as a tuple).""" 9217db96d56Sopenharmony_ci return self._blocked_domains 9227db96d56Sopenharmony_ci def set_blocked_domains(self, blocked_domains): 9237db96d56Sopenharmony_ci """Set the sequence of blocked domains.""" 9247db96d56Sopenharmony_ci self._blocked_domains = tuple(blocked_domains) 9257db96d56Sopenharmony_ci 9267db96d56Sopenharmony_ci def is_blocked(self, domain): 9277db96d56Sopenharmony_ci for blocked_domain in self._blocked_domains: 9287db96d56Sopenharmony_ci if user_domain_match(domain, blocked_domain): 9297db96d56Sopenharmony_ci return True 9307db96d56Sopenharmony_ci return False 9317db96d56Sopenharmony_ci 9327db96d56Sopenharmony_ci def allowed_domains(self): 9337db96d56Sopenharmony_ci """Return None, or the sequence of allowed domains (as a tuple).""" 9347db96d56Sopenharmony_ci return self._allowed_domains 9357db96d56Sopenharmony_ci def set_allowed_domains(self, allowed_domains): 9367db96d56Sopenharmony_ci """Set the sequence of allowed domains, or None.""" 9377db96d56Sopenharmony_ci if allowed_domains is not None: 9387db96d56Sopenharmony_ci allowed_domains = tuple(allowed_domains) 9397db96d56Sopenharmony_ci self._allowed_domains = allowed_domains 9407db96d56Sopenharmony_ci 9417db96d56Sopenharmony_ci def is_not_allowed(self, domain): 9427db96d56Sopenharmony_ci if self._allowed_domains is None: 9437db96d56Sopenharmony_ci return False 9447db96d56Sopenharmony_ci for allowed_domain in self._allowed_domains: 9457db96d56Sopenharmony_ci if user_domain_match(domain, allowed_domain): 9467db96d56Sopenharmony_ci return False 9477db96d56Sopenharmony_ci return True 9487db96d56Sopenharmony_ci 9497db96d56Sopenharmony_ci def set_ok(self, cookie, request): 9507db96d56Sopenharmony_ci """ 9517db96d56Sopenharmony_ci If you override .set_ok(), be sure to call this method. If it returns 9527db96d56Sopenharmony_ci false, so should your subclass (assuming your subclass wants to be more 9537db96d56Sopenharmony_ci strict about which cookies to accept). 9547db96d56Sopenharmony_ci 9557db96d56Sopenharmony_ci """ 9567db96d56Sopenharmony_ci _debug(" - checking cookie %s=%s", cookie.name, cookie.value) 9577db96d56Sopenharmony_ci 9587db96d56Sopenharmony_ci assert cookie.name is not None 9597db96d56Sopenharmony_ci 9607db96d56Sopenharmony_ci for n in "version", "verifiability", "name", "path", "domain", "port": 9617db96d56Sopenharmony_ci fn_name = "set_ok_"+n 9627db96d56Sopenharmony_ci fn = getattr(self, fn_name) 9637db96d56Sopenharmony_ci if not fn(cookie, request): 9647db96d56Sopenharmony_ci return False 9657db96d56Sopenharmony_ci 9667db96d56Sopenharmony_ci return True 9677db96d56Sopenharmony_ci 9687db96d56Sopenharmony_ci def set_ok_version(self, cookie, request): 9697db96d56Sopenharmony_ci if cookie.version is None: 9707db96d56Sopenharmony_ci # Version is always set to 0 by parse_ns_headers if it's a Netscape 9717db96d56Sopenharmony_ci # cookie, so this must be an invalid RFC 2965 cookie. 9727db96d56Sopenharmony_ci _debug(" Set-Cookie2 without version attribute (%s=%s)", 9737db96d56Sopenharmony_ci cookie.name, cookie.value) 9747db96d56Sopenharmony_ci return False 9757db96d56Sopenharmony_ci if cookie.version > 0 and not self.rfc2965: 9767db96d56Sopenharmony_ci _debug(" RFC 2965 cookies are switched off") 9777db96d56Sopenharmony_ci return False 9787db96d56Sopenharmony_ci elif cookie.version == 0 and not self.netscape: 9797db96d56Sopenharmony_ci _debug(" Netscape cookies are switched off") 9807db96d56Sopenharmony_ci return False 9817db96d56Sopenharmony_ci return True 9827db96d56Sopenharmony_ci 9837db96d56Sopenharmony_ci def set_ok_verifiability(self, cookie, request): 9847db96d56Sopenharmony_ci if request.unverifiable and is_third_party(request): 9857db96d56Sopenharmony_ci if cookie.version > 0 and self.strict_rfc2965_unverifiable: 9867db96d56Sopenharmony_ci _debug(" third-party RFC 2965 cookie during " 9877db96d56Sopenharmony_ci "unverifiable transaction") 9887db96d56Sopenharmony_ci return False 9897db96d56Sopenharmony_ci elif cookie.version == 0 and self.strict_ns_unverifiable: 9907db96d56Sopenharmony_ci _debug(" third-party Netscape cookie during " 9917db96d56Sopenharmony_ci "unverifiable transaction") 9927db96d56Sopenharmony_ci return False 9937db96d56Sopenharmony_ci return True 9947db96d56Sopenharmony_ci 9957db96d56Sopenharmony_ci def set_ok_name(self, cookie, request): 9967db96d56Sopenharmony_ci # Try and stop servers setting V0 cookies designed to hack other 9977db96d56Sopenharmony_ci # servers that know both V0 and V1 protocols. 9987db96d56Sopenharmony_ci if (cookie.version == 0 and self.strict_ns_set_initial_dollar and 9997db96d56Sopenharmony_ci cookie.name.startswith("$")): 10007db96d56Sopenharmony_ci _debug(" illegal name (starts with '$'): '%s'", cookie.name) 10017db96d56Sopenharmony_ci return False 10027db96d56Sopenharmony_ci return True 10037db96d56Sopenharmony_ci 10047db96d56Sopenharmony_ci def set_ok_path(self, cookie, request): 10057db96d56Sopenharmony_ci if cookie.path_specified: 10067db96d56Sopenharmony_ci req_path = request_path(request) 10077db96d56Sopenharmony_ci if ((cookie.version > 0 or 10087db96d56Sopenharmony_ci (cookie.version == 0 and self.strict_ns_set_path)) and 10097db96d56Sopenharmony_ci not self.path_return_ok(cookie.path, request)): 10107db96d56Sopenharmony_ci _debug(" path attribute %s is not a prefix of request " 10117db96d56Sopenharmony_ci "path %s", cookie.path, req_path) 10127db96d56Sopenharmony_ci return False 10137db96d56Sopenharmony_ci return True 10147db96d56Sopenharmony_ci 10157db96d56Sopenharmony_ci def set_ok_domain(self, cookie, request): 10167db96d56Sopenharmony_ci if self.is_blocked(cookie.domain): 10177db96d56Sopenharmony_ci _debug(" domain %s is in user block-list", cookie.domain) 10187db96d56Sopenharmony_ci return False 10197db96d56Sopenharmony_ci if self.is_not_allowed(cookie.domain): 10207db96d56Sopenharmony_ci _debug(" domain %s is not in user allow-list", cookie.domain) 10217db96d56Sopenharmony_ci return False 10227db96d56Sopenharmony_ci if cookie.domain_specified: 10237db96d56Sopenharmony_ci req_host, erhn = eff_request_host(request) 10247db96d56Sopenharmony_ci domain = cookie.domain 10257db96d56Sopenharmony_ci if self.strict_domain and (domain.count(".") >= 2): 10267db96d56Sopenharmony_ci # XXX This should probably be compared with the Konqueror 10277db96d56Sopenharmony_ci # (kcookiejar.cpp) and Mozilla implementations, but it's a 10287db96d56Sopenharmony_ci # losing battle. 10297db96d56Sopenharmony_ci i = domain.rfind(".") 10307db96d56Sopenharmony_ci j = domain.rfind(".", 0, i) 10317db96d56Sopenharmony_ci if j == 0: # domain like .foo.bar 10327db96d56Sopenharmony_ci tld = domain[i+1:] 10337db96d56Sopenharmony_ci sld = domain[j+1:i] 10347db96d56Sopenharmony_ci if sld.lower() in ("co", "ac", "com", "edu", "org", "net", 10357db96d56Sopenharmony_ci "gov", "mil", "int", "aero", "biz", "cat", "coop", 10367db96d56Sopenharmony_ci "info", "jobs", "mobi", "museum", "name", "pro", 10377db96d56Sopenharmony_ci "travel", "eu") and len(tld) == 2: 10387db96d56Sopenharmony_ci # domain like .co.uk 10397db96d56Sopenharmony_ci _debug(" country-code second level domain %s", domain) 10407db96d56Sopenharmony_ci return False 10417db96d56Sopenharmony_ci if domain.startswith("."): 10427db96d56Sopenharmony_ci undotted_domain = domain[1:] 10437db96d56Sopenharmony_ci else: 10447db96d56Sopenharmony_ci undotted_domain = domain 10457db96d56Sopenharmony_ci embedded_dots = (undotted_domain.find(".") >= 0) 10467db96d56Sopenharmony_ci if not embedded_dots and not erhn.endswith(".local"): 10477db96d56Sopenharmony_ci _debug(" non-local domain %s contains no embedded dot", 10487db96d56Sopenharmony_ci domain) 10497db96d56Sopenharmony_ci return False 10507db96d56Sopenharmony_ci if cookie.version == 0: 10517db96d56Sopenharmony_ci if (not (erhn.endswith(domain) or 10527db96d56Sopenharmony_ci erhn.endswith(f"{undotted_domain}.local")) and 10537db96d56Sopenharmony_ci (not erhn.startswith(".") and 10547db96d56Sopenharmony_ci not ("."+erhn).endswith(domain))): 10557db96d56Sopenharmony_ci _debug(" effective request-host %s (even with added " 10567db96d56Sopenharmony_ci "initial dot) does not end with %s", 10577db96d56Sopenharmony_ci erhn, domain) 10587db96d56Sopenharmony_ci return False 10597db96d56Sopenharmony_ci if (cookie.version > 0 or 10607db96d56Sopenharmony_ci (self.strict_ns_domain & self.DomainRFC2965Match)): 10617db96d56Sopenharmony_ci if not domain_match(erhn, domain): 10627db96d56Sopenharmony_ci _debug(" effective request-host %s does not domain-match " 10637db96d56Sopenharmony_ci "%s", erhn, domain) 10647db96d56Sopenharmony_ci return False 10657db96d56Sopenharmony_ci if (cookie.version > 0 or 10667db96d56Sopenharmony_ci (self.strict_ns_domain & self.DomainStrictNoDots)): 10677db96d56Sopenharmony_ci host_prefix = req_host[:-len(domain)] 10687db96d56Sopenharmony_ci if (host_prefix.find(".") >= 0 and 10697db96d56Sopenharmony_ci not IPV4_RE.search(req_host)): 10707db96d56Sopenharmony_ci _debug(" host prefix %s for domain %s contains a dot", 10717db96d56Sopenharmony_ci host_prefix, domain) 10727db96d56Sopenharmony_ci return False 10737db96d56Sopenharmony_ci return True 10747db96d56Sopenharmony_ci 10757db96d56Sopenharmony_ci def set_ok_port(self, cookie, request): 10767db96d56Sopenharmony_ci if cookie.port_specified: 10777db96d56Sopenharmony_ci req_port = request_port(request) 10787db96d56Sopenharmony_ci if req_port is None: 10797db96d56Sopenharmony_ci req_port = "80" 10807db96d56Sopenharmony_ci else: 10817db96d56Sopenharmony_ci req_port = str(req_port) 10827db96d56Sopenharmony_ci for p in cookie.port.split(","): 10837db96d56Sopenharmony_ci try: 10847db96d56Sopenharmony_ci int(p) 10857db96d56Sopenharmony_ci except ValueError: 10867db96d56Sopenharmony_ci _debug(" bad port %s (not numeric)", p) 10877db96d56Sopenharmony_ci return False 10887db96d56Sopenharmony_ci if p == req_port: 10897db96d56Sopenharmony_ci break 10907db96d56Sopenharmony_ci else: 10917db96d56Sopenharmony_ci _debug(" request port (%s) not found in %s", 10927db96d56Sopenharmony_ci req_port, cookie.port) 10937db96d56Sopenharmony_ci return False 10947db96d56Sopenharmony_ci return True 10957db96d56Sopenharmony_ci 10967db96d56Sopenharmony_ci def return_ok(self, cookie, request): 10977db96d56Sopenharmony_ci """ 10987db96d56Sopenharmony_ci If you override .return_ok(), be sure to call this method. If it 10997db96d56Sopenharmony_ci returns false, so should your subclass (assuming your subclass wants to 11007db96d56Sopenharmony_ci be more strict about which cookies to return). 11017db96d56Sopenharmony_ci 11027db96d56Sopenharmony_ci """ 11037db96d56Sopenharmony_ci # Path has already been checked by .path_return_ok(), and domain 11047db96d56Sopenharmony_ci # blocking done by .domain_return_ok(). 11057db96d56Sopenharmony_ci _debug(" - checking cookie %s=%s", cookie.name, cookie.value) 11067db96d56Sopenharmony_ci 11077db96d56Sopenharmony_ci for n in "version", "verifiability", "secure", "expires", "port", "domain": 11087db96d56Sopenharmony_ci fn_name = "return_ok_"+n 11097db96d56Sopenharmony_ci fn = getattr(self, fn_name) 11107db96d56Sopenharmony_ci if not fn(cookie, request): 11117db96d56Sopenharmony_ci return False 11127db96d56Sopenharmony_ci return True 11137db96d56Sopenharmony_ci 11147db96d56Sopenharmony_ci def return_ok_version(self, cookie, request): 11157db96d56Sopenharmony_ci if cookie.version > 0 and not self.rfc2965: 11167db96d56Sopenharmony_ci _debug(" RFC 2965 cookies are switched off") 11177db96d56Sopenharmony_ci return False 11187db96d56Sopenharmony_ci elif cookie.version == 0 and not self.netscape: 11197db96d56Sopenharmony_ci _debug(" Netscape cookies are switched off") 11207db96d56Sopenharmony_ci return False 11217db96d56Sopenharmony_ci return True 11227db96d56Sopenharmony_ci 11237db96d56Sopenharmony_ci def return_ok_verifiability(self, cookie, request): 11247db96d56Sopenharmony_ci if request.unverifiable and is_third_party(request): 11257db96d56Sopenharmony_ci if cookie.version > 0 and self.strict_rfc2965_unverifiable: 11267db96d56Sopenharmony_ci _debug(" third-party RFC 2965 cookie during unverifiable " 11277db96d56Sopenharmony_ci "transaction") 11287db96d56Sopenharmony_ci return False 11297db96d56Sopenharmony_ci elif cookie.version == 0 and self.strict_ns_unverifiable: 11307db96d56Sopenharmony_ci _debug(" third-party Netscape cookie during unverifiable " 11317db96d56Sopenharmony_ci "transaction") 11327db96d56Sopenharmony_ci return False 11337db96d56Sopenharmony_ci return True 11347db96d56Sopenharmony_ci 11357db96d56Sopenharmony_ci def return_ok_secure(self, cookie, request): 11367db96d56Sopenharmony_ci if cookie.secure and request.type not in self.secure_protocols: 11377db96d56Sopenharmony_ci _debug(" secure cookie with non-secure request") 11387db96d56Sopenharmony_ci return False 11397db96d56Sopenharmony_ci return True 11407db96d56Sopenharmony_ci 11417db96d56Sopenharmony_ci def return_ok_expires(self, cookie, request): 11427db96d56Sopenharmony_ci if cookie.is_expired(self._now): 11437db96d56Sopenharmony_ci _debug(" cookie expired") 11447db96d56Sopenharmony_ci return False 11457db96d56Sopenharmony_ci return True 11467db96d56Sopenharmony_ci 11477db96d56Sopenharmony_ci def return_ok_port(self, cookie, request): 11487db96d56Sopenharmony_ci if cookie.port: 11497db96d56Sopenharmony_ci req_port = request_port(request) 11507db96d56Sopenharmony_ci if req_port is None: 11517db96d56Sopenharmony_ci req_port = "80" 11527db96d56Sopenharmony_ci for p in cookie.port.split(","): 11537db96d56Sopenharmony_ci if p == req_port: 11547db96d56Sopenharmony_ci break 11557db96d56Sopenharmony_ci else: 11567db96d56Sopenharmony_ci _debug(" request port %s does not match cookie port %s", 11577db96d56Sopenharmony_ci req_port, cookie.port) 11587db96d56Sopenharmony_ci return False 11597db96d56Sopenharmony_ci return True 11607db96d56Sopenharmony_ci 11617db96d56Sopenharmony_ci def return_ok_domain(self, cookie, request): 11627db96d56Sopenharmony_ci req_host, erhn = eff_request_host(request) 11637db96d56Sopenharmony_ci domain = cookie.domain 11647db96d56Sopenharmony_ci 11657db96d56Sopenharmony_ci if domain and not domain.startswith("."): 11667db96d56Sopenharmony_ci dotdomain = "." + domain 11677db96d56Sopenharmony_ci else: 11687db96d56Sopenharmony_ci dotdomain = domain 11697db96d56Sopenharmony_ci 11707db96d56Sopenharmony_ci # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't 11717db96d56Sopenharmony_ci if (cookie.version == 0 and 11727db96d56Sopenharmony_ci (self.strict_ns_domain & self.DomainStrictNonDomain) and 11737db96d56Sopenharmony_ci not cookie.domain_specified and domain != erhn): 11747db96d56Sopenharmony_ci _debug(" cookie with unspecified domain does not string-compare " 11757db96d56Sopenharmony_ci "equal to request domain") 11767db96d56Sopenharmony_ci return False 11777db96d56Sopenharmony_ci 11787db96d56Sopenharmony_ci if cookie.version > 0 and not domain_match(erhn, domain): 11797db96d56Sopenharmony_ci _debug(" effective request-host name %s does not domain-match " 11807db96d56Sopenharmony_ci "RFC 2965 cookie domain %s", erhn, domain) 11817db96d56Sopenharmony_ci return False 11827db96d56Sopenharmony_ci if cookie.version == 0 and not ("."+erhn).endswith(dotdomain): 11837db96d56Sopenharmony_ci _debug(" request-host %s does not match Netscape cookie domain " 11847db96d56Sopenharmony_ci "%s", req_host, domain) 11857db96d56Sopenharmony_ci return False 11867db96d56Sopenharmony_ci return True 11877db96d56Sopenharmony_ci 11887db96d56Sopenharmony_ci def domain_return_ok(self, domain, request): 11897db96d56Sopenharmony_ci # Liberal check of. This is here as an optimization to avoid 11907db96d56Sopenharmony_ci # having to load lots of MSIE cookie files unless necessary. 11917db96d56Sopenharmony_ci req_host, erhn = eff_request_host(request) 11927db96d56Sopenharmony_ci if not req_host.startswith("."): 11937db96d56Sopenharmony_ci req_host = "."+req_host 11947db96d56Sopenharmony_ci if not erhn.startswith("."): 11957db96d56Sopenharmony_ci erhn = "."+erhn 11967db96d56Sopenharmony_ci if domain and not domain.startswith("."): 11977db96d56Sopenharmony_ci dotdomain = "." + domain 11987db96d56Sopenharmony_ci else: 11997db96d56Sopenharmony_ci dotdomain = domain 12007db96d56Sopenharmony_ci if not (req_host.endswith(dotdomain) or erhn.endswith(dotdomain)): 12017db96d56Sopenharmony_ci #_debug(" request domain %s does not match cookie domain %s", 12027db96d56Sopenharmony_ci # req_host, domain) 12037db96d56Sopenharmony_ci return False 12047db96d56Sopenharmony_ci 12057db96d56Sopenharmony_ci if self.is_blocked(domain): 12067db96d56Sopenharmony_ci _debug(" domain %s is in user block-list", domain) 12077db96d56Sopenharmony_ci return False 12087db96d56Sopenharmony_ci if self.is_not_allowed(domain): 12097db96d56Sopenharmony_ci _debug(" domain %s is not in user allow-list", domain) 12107db96d56Sopenharmony_ci return False 12117db96d56Sopenharmony_ci 12127db96d56Sopenharmony_ci return True 12137db96d56Sopenharmony_ci 12147db96d56Sopenharmony_ci def path_return_ok(self, path, request): 12157db96d56Sopenharmony_ci _debug("- checking cookie path=%s", path) 12167db96d56Sopenharmony_ci req_path = request_path(request) 12177db96d56Sopenharmony_ci pathlen = len(path) 12187db96d56Sopenharmony_ci if req_path == path: 12197db96d56Sopenharmony_ci return True 12207db96d56Sopenharmony_ci elif (req_path.startswith(path) and 12217db96d56Sopenharmony_ci (path.endswith("/") or req_path[pathlen:pathlen+1] == "/")): 12227db96d56Sopenharmony_ci return True 12237db96d56Sopenharmony_ci 12247db96d56Sopenharmony_ci _debug(" %s does not path-match %s", req_path, path) 12257db96d56Sopenharmony_ci return False 12267db96d56Sopenharmony_ci 12277db96d56Sopenharmony_cidef deepvalues(mapping): 12287db96d56Sopenharmony_ci """Iterates over nested mapping, depth-first""" 12297db96d56Sopenharmony_ci for obj in list(mapping.values()): 12307db96d56Sopenharmony_ci mapping = False 12317db96d56Sopenharmony_ci try: 12327db96d56Sopenharmony_ci obj.items 12337db96d56Sopenharmony_ci except AttributeError: 12347db96d56Sopenharmony_ci pass 12357db96d56Sopenharmony_ci else: 12367db96d56Sopenharmony_ci mapping = True 12377db96d56Sopenharmony_ci yield from deepvalues(obj) 12387db96d56Sopenharmony_ci if not mapping: 12397db96d56Sopenharmony_ci yield obj 12407db96d56Sopenharmony_ci 12417db96d56Sopenharmony_ci 12427db96d56Sopenharmony_ci# Used as second parameter to dict.get() method, to distinguish absent 12437db96d56Sopenharmony_ci# dict key from one with a None value. 12447db96d56Sopenharmony_ciclass Absent: pass 12457db96d56Sopenharmony_ci 12467db96d56Sopenharmony_ciclass CookieJar: 12477db96d56Sopenharmony_ci """Collection of HTTP cookies. 12487db96d56Sopenharmony_ci 12497db96d56Sopenharmony_ci You may not need to know about this class: try 12507db96d56Sopenharmony_ci urllib.request.build_opener(HTTPCookieProcessor).open(url). 12517db96d56Sopenharmony_ci """ 12527db96d56Sopenharmony_ci 12537db96d56Sopenharmony_ci non_word_re = re.compile(r"\W") 12547db96d56Sopenharmony_ci quote_re = re.compile(r"([\"\\])") 12557db96d56Sopenharmony_ci strict_domain_re = re.compile(r"\.?[^.]*") 12567db96d56Sopenharmony_ci domain_re = re.compile(r"[^.]*") 12577db96d56Sopenharmony_ci dots_re = re.compile(r"^\.+") 12587db96d56Sopenharmony_ci 12597db96d56Sopenharmony_ci magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII) 12607db96d56Sopenharmony_ci 12617db96d56Sopenharmony_ci def __init__(self, policy=None): 12627db96d56Sopenharmony_ci if policy is None: 12637db96d56Sopenharmony_ci policy = DefaultCookiePolicy() 12647db96d56Sopenharmony_ci self._policy = policy 12657db96d56Sopenharmony_ci 12667db96d56Sopenharmony_ci self._cookies_lock = _threading.RLock() 12677db96d56Sopenharmony_ci self._cookies = {} 12687db96d56Sopenharmony_ci 12697db96d56Sopenharmony_ci def set_policy(self, policy): 12707db96d56Sopenharmony_ci self._policy = policy 12717db96d56Sopenharmony_ci 12727db96d56Sopenharmony_ci def _cookies_for_domain(self, domain, request): 12737db96d56Sopenharmony_ci cookies = [] 12747db96d56Sopenharmony_ci if not self._policy.domain_return_ok(domain, request): 12757db96d56Sopenharmony_ci return [] 12767db96d56Sopenharmony_ci _debug("Checking %s for cookies to return", domain) 12777db96d56Sopenharmony_ci cookies_by_path = self._cookies[domain] 12787db96d56Sopenharmony_ci for path in cookies_by_path.keys(): 12797db96d56Sopenharmony_ci if not self._policy.path_return_ok(path, request): 12807db96d56Sopenharmony_ci continue 12817db96d56Sopenharmony_ci cookies_by_name = cookies_by_path[path] 12827db96d56Sopenharmony_ci for cookie in cookies_by_name.values(): 12837db96d56Sopenharmony_ci if not self._policy.return_ok(cookie, request): 12847db96d56Sopenharmony_ci _debug(" not returning cookie") 12857db96d56Sopenharmony_ci continue 12867db96d56Sopenharmony_ci _debug(" it's a match") 12877db96d56Sopenharmony_ci cookies.append(cookie) 12887db96d56Sopenharmony_ci return cookies 12897db96d56Sopenharmony_ci 12907db96d56Sopenharmony_ci def _cookies_for_request(self, request): 12917db96d56Sopenharmony_ci """Return a list of cookies to be returned to server.""" 12927db96d56Sopenharmony_ci cookies = [] 12937db96d56Sopenharmony_ci for domain in self._cookies.keys(): 12947db96d56Sopenharmony_ci cookies.extend(self._cookies_for_domain(domain, request)) 12957db96d56Sopenharmony_ci return cookies 12967db96d56Sopenharmony_ci 12977db96d56Sopenharmony_ci def _cookie_attrs(self, cookies): 12987db96d56Sopenharmony_ci """Return a list of cookie-attributes to be returned to server. 12997db96d56Sopenharmony_ci 13007db96d56Sopenharmony_ci like ['foo="bar"; $Path="/"', ...] 13017db96d56Sopenharmony_ci 13027db96d56Sopenharmony_ci The $Version attribute is also added when appropriate (currently only 13037db96d56Sopenharmony_ci once per request). 13047db96d56Sopenharmony_ci 13057db96d56Sopenharmony_ci """ 13067db96d56Sopenharmony_ci # add cookies in order of most specific (ie. longest) path first 13077db96d56Sopenharmony_ci cookies.sort(key=lambda a: len(a.path), reverse=True) 13087db96d56Sopenharmony_ci 13097db96d56Sopenharmony_ci version_set = False 13107db96d56Sopenharmony_ci 13117db96d56Sopenharmony_ci attrs = [] 13127db96d56Sopenharmony_ci for cookie in cookies: 13137db96d56Sopenharmony_ci # set version of Cookie header 13147db96d56Sopenharmony_ci # XXX 13157db96d56Sopenharmony_ci # What should it be if multiple matching Set-Cookie headers have 13167db96d56Sopenharmony_ci # different versions themselves? 13177db96d56Sopenharmony_ci # Answer: there is no answer; was supposed to be settled by 13187db96d56Sopenharmony_ci # RFC 2965 errata, but that may never appear... 13197db96d56Sopenharmony_ci version = cookie.version 13207db96d56Sopenharmony_ci if not version_set: 13217db96d56Sopenharmony_ci version_set = True 13227db96d56Sopenharmony_ci if version > 0: 13237db96d56Sopenharmony_ci attrs.append("$Version=%s" % version) 13247db96d56Sopenharmony_ci 13257db96d56Sopenharmony_ci # quote cookie value if necessary 13267db96d56Sopenharmony_ci # (not for Netscape protocol, which already has any quotes 13277db96d56Sopenharmony_ci # intact, due to the poorly-specified Netscape Cookie: syntax) 13287db96d56Sopenharmony_ci if ((cookie.value is not None) and 13297db96d56Sopenharmony_ci self.non_word_re.search(cookie.value) and version > 0): 13307db96d56Sopenharmony_ci value = self.quote_re.sub(r"\\\1", cookie.value) 13317db96d56Sopenharmony_ci else: 13327db96d56Sopenharmony_ci value = cookie.value 13337db96d56Sopenharmony_ci 13347db96d56Sopenharmony_ci # add cookie-attributes to be returned in Cookie header 13357db96d56Sopenharmony_ci if cookie.value is None: 13367db96d56Sopenharmony_ci attrs.append(cookie.name) 13377db96d56Sopenharmony_ci else: 13387db96d56Sopenharmony_ci attrs.append("%s=%s" % (cookie.name, value)) 13397db96d56Sopenharmony_ci if version > 0: 13407db96d56Sopenharmony_ci if cookie.path_specified: 13417db96d56Sopenharmony_ci attrs.append('$Path="%s"' % cookie.path) 13427db96d56Sopenharmony_ci if cookie.domain.startswith("."): 13437db96d56Sopenharmony_ci domain = cookie.domain 13447db96d56Sopenharmony_ci if (not cookie.domain_initial_dot and 13457db96d56Sopenharmony_ci domain.startswith(".")): 13467db96d56Sopenharmony_ci domain = domain[1:] 13477db96d56Sopenharmony_ci attrs.append('$Domain="%s"' % domain) 13487db96d56Sopenharmony_ci if cookie.port is not None: 13497db96d56Sopenharmony_ci p = "$Port" 13507db96d56Sopenharmony_ci if cookie.port_specified: 13517db96d56Sopenharmony_ci p = p + ('="%s"' % cookie.port) 13527db96d56Sopenharmony_ci attrs.append(p) 13537db96d56Sopenharmony_ci 13547db96d56Sopenharmony_ci return attrs 13557db96d56Sopenharmony_ci 13567db96d56Sopenharmony_ci def add_cookie_header(self, request): 13577db96d56Sopenharmony_ci """Add correct Cookie: header to request (urllib.request.Request object). 13587db96d56Sopenharmony_ci 13597db96d56Sopenharmony_ci The Cookie2 header is also added unless policy.hide_cookie2 is true. 13607db96d56Sopenharmony_ci 13617db96d56Sopenharmony_ci """ 13627db96d56Sopenharmony_ci _debug("add_cookie_header") 13637db96d56Sopenharmony_ci self._cookies_lock.acquire() 13647db96d56Sopenharmony_ci try: 13657db96d56Sopenharmony_ci 13667db96d56Sopenharmony_ci self._policy._now = self._now = int(time.time()) 13677db96d56Sopenharmony_ci 13687db96d56Sopenharmony_ci cookies = self._cookies_for_request(request) 13697db96d56Sopenharmony_ci 13707db96d56Sopenharmony_ci attrs = self._cookie_attrs(cookies) 13717db96d56Sopenharmony_ci if attrs: 13727db96d56Sopenharmony_ci if not request.has_header("Cookie"): 13737db96d56Sopenharmony_ci request.add_unredirected_header( 13747db96d56Sopenharmony_ci "Cookie", "; ".join(attrs)) 13757db96d56Sopenharmony_ci 13767db96d56Sopenharmony_ci # if necessary, advertise that we know RFC 2965 13777db96d56Sopenharmony_ci if (self._policy.rfc2965 and not self._policy.hide_cookie2 and 13787db96d56Sopenharmony_ci not request.has_header("Cookie2")): 13797db96d56Sopenharmony_ci for cookie in cookies: 13807db96d56Sopenharmony_ci if cookie.version != 1: 13817db96d56Sopenharmony_ci request.add_unredirected_header("Cookie2", '$Version="1"') 13827db96d56Sopenharmony_ci break 13837db96d56Sopenharmony_ci 13847db96d56Sopenharmony_ci finally: 13857db96d56Sopenharmony_ci self._cookies_lock.release() 13867db96d56Sopenharmony_ci 13877db96d56Sopenharmony_ci self.clear_expired_cookies() 13887db96d56Sopenharmony_ci 13897db96d56Sopenharmony_ci def _normalized_cookie_tuples(self, attrs_set): 13907db96d56Sopenharmony_ci """Return list of tuples containing normalised cookie information. 13917db96d56Sopenharmony_ci 13927db96d56Sopenharmony_ci attrs_set is the list of lists of key,value pairs extracted from 13937db96d56Sopenharmony_ci the Set-Cookie or Set-Cookie2 headers. 13947db96d56Sopenharmony_ci 13957db96d56Sopenharmony_ci Tuples are name, value, standard, rest, where name and value are the 13967db96d56Sopenharmony_ci cookie name and value, standard is a dictionary containing the standard 13977db96d56Sopenharmony_ci cookie-attributes (discard, secure, version, expires or max-age, 13987db96d56Sopenharmony_ci domain, path and port) and rest is a dictionary containing the rest of 13997db96d56Sopenharmony_ci the cookie-attributes. 14007db96d56Sopenharmony_ci 14017db96d56Sopenharmony_ci """ 14027db96d56Sopenharmony_ci cookie_tuples = [] 14037db96d56Sopenharmony_ci 14047db96d56Sopenharmony_ci boolean_attrs = "discard", "secure" 14057db96d56Sopenharmony_ci value_attrs = ("version", 14067db96d56Sopenharmony_ci "expires", "max-age", 14077db96d56Sopenharmony_ci "domain", "path", "port", 14087db96d56Sopenharmony_ci "comment", "commenturl") 14097db96d56Sopenharmony_ci 14107db96d56Sopenharmony_ci for cookie_attrs in attrs_set: 14117db96d56Sopenharmony_ci name, value = cookie_attrs[0] 14127db96d56Sopenharmony_ci 14137db96d56Sopenharmony_ci # Build dictionary of standard cookie-attributes (standard) and 14147db96d56Sopenharmony_ci # dictionary of other cookie-attributes (rest). 14157db96d56Sopenharmony_ci 14167db96d56Sopenharmony_ci # Note: expiry time is normalised to seconds since epoch. V0 14177db96d56Sopenharmony_ci # cookies should have the Expires cookie-attribute, and V1 cookies 14187db96d56Sopenharmony_ci # should have Max-Age, but since V1 includes RFC 2109 cookies (and 14197db96d56Sopenharmony_ci # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we 14207db96d56Sopenharmony_ci # accept either (but prefer Max-Age). 14217db96d56Sopenharmony_ci max_age_set = False 14227db96d56Sopenharmony_ci 14237db96d56Sopenharmony_ci bad_cookie = False 14247db96d56Sopenharmony_ci 14257db96d56Sopenharmony_ci standard = {} 14267db96d56Sopenharmony_ci rest = {} 14277db96d56Sopenharmony_ci for k, v in cookie_attrs[1:]: 14287db96d56Sopenharmony_ci lc = k.lower() 14297db96d56Sopenharmony_ci # don't lose case distinction for unknown fields 14307db96d56Sopenharmony_ci if lc in value_attrs or lc in boolean_attrs: 14317db96d56Sopenharmony_ci k = lc 14327db96d56Sopenharmony_ci if k in boolean_attrs and v is None: 14337db96d56Sopenharmony_ci # boolean cookie-attribute is present, but has no value 14347db96d56Sopenharmony_ci # (like "discard", rather than "port=80") 14357db96d56Sopenharmony_ci v = True 14367db96d56Sopenharmony_ci if k in standard: 14377db96d56Sopenharmony_ci # only first value is significant 14387db96d56Sopenharmony_ci continue 14397db96d56Sopenharmony_ci if k == "domain": 14407db96d56Sopenharmony_ci if v is None: 14417db96d56Sopenharmony_ci _debug(" missing value for domain attribute") 14427db96d56Sopenharmony_ci bad_cookie = True 14437db96d56Sopenharmony_ci break 14447db96d56Sopenharmony_ci # RFC 2965 section 3.3.3 14457db96d56Sopenharmony_ci v = v.lower() 14467db96d56Sopenharmony_ci if k == "expires": 14477db96d56Sopenharmony_ci if max_age_set: 14487db96d56Sopenharmony_ci # Prefer max-age to expires (like Mozilla) 14497db96d56Sopenharmony_ci continue 14507db96d56Sopenharmony_ci if v is None: 14517db96d56Sopenharmony_ci _debug(" missing or invalid value for expires " 14527db96d56Sopenharmony_ci "attribute: treating as session cookie") 14537db96d56Sopenharmony_ci continue 14547db96d56Sopenharmony_ci if k == "max-age": 14557db96d56Sopenharmony_ci max_age_set = True 14567db96d56Sopenharmony_ci try: 14577db96d56Sopenharmony_ci v = int(v) 14587db96d56Sopenharmony_ci except ValueError: 14597db96d56Sopenharmony_ci _debug(" missing or invalid (non-numeric) value for " 14607db96d56Sopenharmony_ci "max-age attribute") 14617db96d56Sopenharmony_ci bad_cookie = True 14627db96d56Sopenharmony_ci break 14637db96d56Sopenharmony_ci # convert RFC 2965 Max-Age to seconds since epoch 14647db96d56Sopenharmony_ci # XXX Strictly you're supposed to follow RFC 2616 14657db96d56Sopenharmony_ci # age-calculation rules. Remember that zero Max-Age 14667db96d56Sopenharmony_ci # is a request to discard (old and new) cookie, though. 14677db96d56Sopenharmony_ci k = "expires" 14687db96d56Sopenharmony_ci v = self._now + v 14697db96d56Sopenharmony_ci if (k in value_attrs) or (k in boolean_attrs): 14707db96d56Sopenharmony_ci if (v is None and 14717db96d56Sopenharmony_ci k not in ("port", "comment", "commenturl")): 14727db96d56Sopenharmony_ci _debug(" missing value for %s attribute" % k) 14737db96d56Sopenharmony_ci bad_cookie = True 14747db96d56Sopenharmony_ci break 14757db96d56Sopenharmony_ci standard[k] = v 14767db96d56Sopenharmony_ci else: 14777db96d56Sopenharmony_ci rest[k] = v 14787db96d56Sopenharmony_ci 14797db96d56Sopenharmony_ci if bad_cookie: 14807db96d56Sopenharmony_ci continue 14817db96d56Sopenharmony_ci 14827db96d56Sopenharmony_ci cookie_tuples.append((name, value, standard, rest)) 14837db96d56Sopenharmony_ci 14847db96d56Sopenharmony_ci return cookie_tuples 14857db96d56Sopenharmony_ci 14867db96d56Sopenharmony_ci def _cookie_from_cookie_tuple(self, tup, request): 14877db96d56Sopenharmony_ci # standard is dict of standard cookie-attributes, rest is dict of the 14887db96d56Sopenharmony_ci # rest of them 14897db96d56Sopenharmony_ci name, value, standard, rest = tup 14907db96d56Sopenharmony_ci 14917db96d56Sopenharmony_ci domain = standard.get("domain", Absent) 14927db96d56Sopenharmony_ci path = standard.get("path", Absent) 14937db96d56Sopenharmony_ci port = standard.get("port", Absent) 14947db96d56Sopenharmony_ci expires = standard.get("expires", Absent) 14957db96d56Sopenharmony_ci 14967db96d56Sopenharmony_ci # set the easy defaults 14977db96d56Sopenharmony_ci version = standard.get("version", None) 14987db96d56Sopenharmony_ci if version is not None: 14997db96d56Sopenharmony_ci try: 15007db96d56Sopenharmony_ci version = int(version) 15017db96d56Sopenharmony_ci except ValueError: 15027db96d56Sopenharmony_ci return None # invalid version, ignore cookie 15037db96d56Sopenharmony_ci secure = standard.get("secure", False) 15047db96d56Sopenharmony_ci # (discard is also set if expires is Absent) 15057db96d56Sopenharmony_ci discard = standard.get("discard", False) 15067db96d56Sopenharmony_ci comment = standard.get("comment", None) 15077db96d56Sopenharmony_ci comment_url = standard.get("commenturl", None) 15087db96d56Sopenharmony_ci 15097db96d56Sopenharmony_ci # set default path 15107db96d56Sopenharmony_ci if path is not Absent and path != "": 15117db96d56Sopenharmony_ci path_specified = True 15127db96d56Sopenharmony_ci path = escape_path(path) 15137db96d56Sopenharmony_ci else: 15147db96d56Sopenharmony_ci path_specified = False 15157db96d56Sopenharmony_ci path = request_path(request) 15167db96d56Sopenharmony_ci i = path.rfind("/") 15177db96d56Sopenharmony_ci if i != -1: 15187db96d56Sopenharmony_ci if version == 0: 15197db96d56Sopenharmony_ci # Netscape spec parts company from reality here 15207db96d56Sopenharmony_ci path = path[:i] 15217db96d56Sopenharmony_ci else: 15227db96d56Sopenharmony_ci path = path[:i+1] 15237db96d56Sopenharmony_ci if len(path) == 0: path = "/" 15247db96d56Sopenharmony_ci 15257db96d56Sopenharmony_ci # set default domain 15267db96d56Sopenharmony_ci domain_specified = domain is not Absent 15277db96d56Sopenharmony_ci # but first we have to remember whether it starts with a dot 15287db96d56Sopenharmony_ci domain_initial_dot = False 15297db96d56Sopenharmony_ci if domain_specified: 15307db96d56Sopenharmony_ci domain_initial_dot = bool(domain.startswith(".")) 15317db96d56Sopenharmony_ci if domain is Absent: 15327db96d56Sopenharmony_ci req_host, erhn = eff_request_host(request) 15337db96d56Sopenharmony_ci domain = erhn 15347db96d56Sopenharmony_ci elif not domain.startswith("."): 15357db96d56Sopenharmony_ci domain = "."+domain 15367db96d56Sopenharmony_ci 15377db96d56Sopenharmony_ci # set default port 15387db96d56Sopenharmony_ci port_specified = False 15397db96d56Sopenharmony_ci if port is not Absent: 15407db96d56Sopenharmony_ci if port is None: 15417db96d56Sopenharmony_ci # Port attr present, but has no value: default to request port. 15427db96d56Sopenharmony_ci # Cookie should then only be sent back on that port. 15437db96d56Sopenharmony_ci port = request_port(request) 15447db96d56Sopenharmony_ci else: 15457db96d56Sopenharmony_ci port_specified = True 15467db96d56Sopenharmony_ci port = re.sub(r"\s+", "", port) 15477db96d56Sopenharmony_ci else: 15487db96d56Sopenharmony_ci # No port attr present. Cookie can be sent back on any port. 15497db96d56Sopenharmony_ci port = None 15507db96d56Sopenharmony_ci 15517db96d56Sopenharmony_ci # set default expires and discard 15527db96d56Sopenharmony_ci if expires is Absent: 15537db96d56Sopenharmony_ci expires = None 15547db96d56Sopenharmony_ci discard = True 15557db96d56Sopenharmony_ci elif expires <= self._now: 15567db96d56Sopenharmony_ci # Expiry date in past is request to delete cookie. This can't be 15577db96d56Sopenharmony_ci # in DefaultCookiePolicy, because can't delete cookies there. 15587db96d56Sopenharmony_ci try: 15597db96d56Sopenharmony_ci self.clear(domain, path, name) 15607db96d56Sopenharmony_ci except KeyError: 15617db96d56Sopenharmony_ci pass 15627db96d56Sopenharmony_ci _debug("Expiring cookie, domain='%s', path='%s', name='%s'", 15637db96d56Sopenharmony_ci domain, path, name) 15647db96d56Sopenharmony_ci return None 15657db96d56Sopenharmony_ci 15667db96d56Sopenharmony_ci return Cookie(version, 15677db96d56Sopenharmony_ci name, value, 15687db96d56Sopenharmony_ci port, port_specified, 15697db96d56Sopenharmony_ci domain, domain_specified, domain_initial_dot, 15707db96d56Sopenharmony_ci path, path_specified, 15717db96d56Sopenharmony_ci secure, 15727db96d56Sopenharmony_ci expires, 15737db96d56Sopenharmony_ci discard, 15747db96d56Sopenharmony_ci comment, 15757db96d56Sopenharmony_ci comment_url, 15767db96d56Sopenharmony_ci rest) 15777db96d56Sopenharmony_ci 15787db96d56Sopenharmony_ci def _cookies_from_attrs_set(self, attrs_set, request): 15797db96d56Sopenharmony_ci cookie_tuples = self._normalized_cookie_tuples(attrs_set) 15807db96d56Sopenharmony_ci 15817db96d56Sopenharmony_ci cookies = [] 15827db96d56Sopenharmony_ci for tup in cookie_tuples: 15837db96d56Sopenharmony_ci cookie = self._cookie_from_cookie_tuple(tup, request) 15847db96d56Sopenharmony_ci if cookie: cookies.append(cookie) 15857db96d56Sopenharmony_ci return cookies 15867db96d56Sopenharmony_ci 15877db96d56Sopenharmony_ci def _process_rfc2109_cookies(self, cookies): 15887db96d56Sopenharmony_ci rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) 15897db96d56Sopenharmony_ci if rfc2109_as_ns is None: 15907db96d56Sopenharmony_ci rfc2109_as_ns = not self._policy.rfc2965 15917db96d56Sopenharmony_ci for cookie in cookies: 15927db96d56Sopenharmony_ci if cookie.version == 1: 15937db96d56Sopenharmony_ci cookie.rfc2109 = True 15947db96d56Sopenharmony_ci if rfc2109_as_ns: 15957db96d56Sopenharmony_ci # treat 2109 cookies as Netscape cookies rather than 15967db96d56Sopenharmony_ci # as RFC2965 cookies 15977db96d56Sopenharmony_ci cookie.version = 0 15987db96d56Sopenharmony_ci 15997db96d56Sopenharmony_ci def make_cookies(self, response, request): 16007db96d56Sopenharmony_ci """Return sequence of Cookie objects extracted from response object.""" 16017db96d56Sopenharmony_ci # get cookie-attributes for RFC 2965 and Netscape protocols 16027db96d56Sopenharmony_ci headers = response.info() 16037db96d56Sopenharmony_ci rfc2965_hdrs = headers.get_all("Set-Cookie2", []) 16047db96d56Sopenharmony_ci ns_hdrs = headers.get_all("Set-Cookie", []) 16057db96d56Sopenharmony_ci self._policy._now = self._now = int(time.time()) 16067db96d56Sopenharmony_ci 16077db96d56Sopenharmony_ci rfc2965 = self._policy.rfc2965 16087db96d56Sopenharmony_ci netscape = self._policy.netscape 16097db96d56Sopenharmony_ci 16107db96d56Sopenharmony_ci if ((not rfc2965_hdrs and not ns_hdrs) or 16117db96d56Sopenharmony_ci (not ns_hdrs and not rfc2965) or 16127db96d56Sopenharmony_ci (not rfc2965_hdrs and not netscape) or 16137db96d56Sopenharmony_ci (not netscape and not rfc2965)): 16147db96d56Sopenharmony_ci return [] # no relevant cookie headers: quick exit 16157db96d56Sopenharmony_ci 16167db96d56Sopenharmony_ci try: 16177db96d56Sopenharmony_ci cookies = self._cookies_from_attrs_set( 16187db96d56Sopenharmony_ci split_header_words(rfc2965_hdrs), request) 16197db96d56Sopenharmony_ci except Exception: 16207db96d56Sopenharmony_ci _warn_unhandled_exception() 16217db96d56Sopenharmony_ci cookies = [] 16227db96d56Sopenharmony_ci 16237db96d56Sopenharmony_ci if ns_hdrs and netscape: 16247db96d56Sopenharmony_ci try: 16257db96d56Sopenharmony_ci # RFC 2109 and Netscape cookies 16267db96d56Sopenharmony_ci ns_cookies = self._cookies_from_attrs_set( 16277db96d56Sopenharmony_ci parse_ns_headers(ns_hdrs), request) 16287db96d56Sopenharmony_ci except Exception: 16297db96d56Sopenharmony_ci _warn_unhandled_exception() 16307db96d56Sopenharmony_ci ns_cookies = [] 16317db96d56Sopenharmony_ci self._process_rfc2109_cookies(ns_cookies) 16327db96d56Sopenharmony_ci 16337db96d56Sopenharmony_ci # Look for Netscape cookies (from Set-Cookie headers) that match 16347db96d56Sopenharmony_ci # corresponding RFC 2965 cookies (from Set-Cookie2 headers). 16357db96d56Sopenharmony_ci # For each match, keep the RFC 2965 cookie and ignore the Netscape 16367db96d56Sopenharmony_ci # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are 16377db96d56Sopenharmony_ci # bundled in with the Netscape cookies for this purpose, which is 16387db96d56Sopenharmony_ci # reasonable behaviour. 16397db96d56Sopenharmony_ci if rfc2965: 16407db96d56Sopenharmony_ci lookup = {} 16417db96d56Sopenharmony_ci for cookie in cookies: 16427db96d56Sopenharmony_ci lookup[(cookie.domain, cookie.path, cookie.name)] = None 16437db96d56Sopenharmony_ci 16447db96d56Sopenharmony_ci def no_matching_rfc2965(ns_cookie, lookup=lookup): 16457db96d56Sopenharmony_ci key = ns_cookie.domain, ns_cookie.path, ns_cookie.name 16467db96d56Sopenharmony_ci return key not in lookup 16477db96d56Sopenharmony_ci ns_cookies = filter(no_matching_rfc2965, ns_cookies) 16487db96d56Sopenharmony_ci 16497db96d56Sopenharmony_ci if ns_cookies: 16507db96d56Sopenharmony_ci cookies.extend(ns_cookies) 16517db96d56Sopenharmony_ci 16527db96d56Sopenharmony_ci return cookies 16537db96d56Sopenharmony_ci 16547db96d56Sopenharmony_ci def set_cookie_if_ok(self, cookie, request): 16557db96d56Sopenharmony_ci """Set a cookie if policy says it's OK to do so.""" 16567db96d56Sopenharmony_ci self._cookies_lock.acquire() 16577db96d56Sopenharmony_ci try: 16587db96d56Sopenharmony_ci self._policy._now = self._now = int(time.time()) 16597db96d56Sopenharmony_ci 16607db96d56Sopenharmony_ci if self._policy.set_ok(cookie, request): 16617db96d56Sopenharmony_ci self.set_cookie(cookie) 16627db96d56Sopenharmony_ci 16637db96d56Sopenharmony_ci 16647db96d56Sopenharmony_ci finally: 16657db96d56Sopenharmony_ci self._cookies_lock.release() 16667db96d56Sopenharmony_ci 16677db96d56Sopenharmony_ci def set_cookie(self, cookie): 16687db96d56Sopenharmony_ci """Set a cookie, without checking whether or not it should be set.""" 16697db96d56Sopenharmony_ci c = self._cookies 16707db96d56Sopenharmony_ci self._cookies_lock.acquire() 16717db96d56Sopenharmony_ci try: 16727db96d56Sopenharmony_ci if cookie.domain not in c: c[cookie.domain] = {} 16737db96d56Sopenharmony_ci c2 = c[cookie.domain] 16747db96d56Sopenharmony_ci if cookie.path not in c2: c2[cookie.path] = {} 16757db96d56Sopenharmony_ci c3 = c2[cookie.path] 16767db96d56Sopenharmony_ci c3[cookie.name] = cookie 16777db96d56Sopenharmony_ci finally: 16787db96d56Sopenharmony_ci self._cookies_lock.release() 16797db96d56Sopenharmony_ci 16807db96d56Sopenharmony_ci def extract_cookies(self, response, request): 16817db96d56Sopenharmony_ci """Extract cookies from response, where allowable given the request.""" 16827db96d56Sopenharmony_ci _debug("extract_cookies: %s", response.info()) 16837db96d56Sopenharmony_ci self._cookies_lock.acquire() 16847db96d56Sopenharmony_ci try: 16857db96d56Sopenharmony_ci for cookie in self.make_cookies(response, request): 16867db96d56Sopenharmony_ci if self._policy.set_ok(cookie, request): 16877db96d56Sopenharmony_ci _debug(" setting cookie: %s", cookie) 16887db96d56Sopenharmony_ci self.set_cookie(cookie) 16897db96d56Sopenharmony_ci finally: 16907db96d56Sopenharmony_ci self._cookies_lock.release() 16917db96d56Sopenharmony_ci 16927db96d56Sopenharmony_ci def clear(self, domain=None, path=None, name=None): 16937db96d56Sopenharmony_ci """Clear some cookies. 16947db96d56Sopenharmony_ci 16957db96d56Sopenharmony_ci Invoking this method without arguments will clear all cookies. If 16967db96d56Sopenharmony_ci given a single argument, only cookies belonging to that domain will be 16977db96d56Sopenharmony_ci removed. If given two arguments, cookies belonging to the specified 16987db96d56Sopenharmony_ci path within that domain are removed. If given three arguments, then 16997db96d56Sopenharmony_ci the cookie with the specified name, path and domain is removed. 17007db96d56Sopenharmony_ci 17017db96d56Sopenharmony_ci Raises KeyError if no matching cookie exists. 17027db96d56Sopenharmony_ci 17037db96d56Sopenharmony_ci """ 17047db96d56Sopenharmony_ci if name is not None: 17057db96d56Sopenharmony_ci if (domain is None) or (path is None): 17067db96d56Sopenharmony_ci raise ValueError( 17077db96d56Sopenharmony_ci "domain and path must be given to remove a cookie by name") 17087db96d56Sopenharmony_ci del self._cookies[domain][path][name] 17097db96d56Sopenharmony_ci elif path is not None: 17107db96d56Sopenharmony_ci if domain is None: 17117db96d56Sopenharmony_ci raise ValueError( 17127db96d56Sopenharmony_ci "domain must be given to remove cookies by path") 17137db96d56Sopenharmony_ci del self._cookies[domain][path] 17147db96d56Sopenharmony_ci elif domain is not None: 17157db96d56Sopenharmony_ci del self._cookies[domain] 17167db96d56Sopenharmony_ci else: 17177db96d56Sopenharmony_ci self._cookies = {} 17187db96d56Sopenharmony_ci 17197db96d56Sopenharmony_ci def clear_session_cookies(self): 17207db96d56Sopenharmony_ci """Discard all session cookies. 17217db96d56Sopenharmony_ci 17227db96d56Sopenharmony_ci Note that the .save() method won't save session cookies anyway, unless 17237db96d56Sopenharmony_ci you ask otherwise by passing a true ignore_discard argument. 17247db96d56Sopenharmony_ci 17257db96d56Sopenharmony_ci """ 17267db96d56Sopenharmony_ci self._cookies_lock.acquire() 17277db96d56Sopenharmony_ci try: 17287db96d56Sopenharmony_ci for cookie in self: 17297db96d56Sopenharmony_ci if cookie.discard: 17307db96d56Sopenharmony_ci self.clear(cookie.domain, cookie.path, cookie.name) 17317db96d56Sopenharmony_ci finally: 17327db96d56Sopenharmony_ci self._cookies_lock.release() 17337db96d56Sopenharmony_ci 17347db96d56Sopenharmony_ci def clear_expired_cookies(self): 17357db96d56Sopenharmony_ci """Discard all expired cookies. 17367db96d56Sopenharmony_ci 17377db96d56Sopenharmony_ci You probably don't need to call this method: expired cookies are never 17387db96d56Sopenharmony_ci sent back to the server (provided you're using DefaultCookiePolicy), 17397db96d56Sopenharmony_ci this method is called by CookieJar itself every so often, and the 17407db96d56Sopenharmony_ci .save() method won't save expired cookies anyway (unless you ask 17417db96d56Sopenharmony_ci otherwise by passing a true ignore_expires argument). 17427db96d56Sopenharmony_ci 17437db96d56Sopenharmony_ci """ 17447db96d56Sopenharmony_ci self._cookies_lock.acquire() 17457db96d56Sopenharmony_ci try: 17467db96d56Sopenharmony_ci now = time.time() 17477db96d56Sopenharmony_ci for cookie in self: 17487db96d56Sopenharmony_ci if cookie.is_expired(now): 17497db96d56Sopenharmony_ci self.clear(cookie.domain, cookie.path, cookie.name) 17507db96d56Sopenharmony_ci finally: 17517db96d56Sopenharmony_ci self._cookies_lock.release() 17527db96d56Sopenharmony_ci 17537db96d56Sopenharmony_ci def __iter__(self): 17547db96d56Sopenharmony_ci return deepvalues(self._cookies) 17557db96d56Sopenharmony_ci 17567db96d56Sopenharmony_ci def __len__(self): 17577db96d56Sopenharmony_ci """Return number of contained cookies.""" 17587db96d56Sopenharmony_ci i = 0 17597db96d56Sopenharmony_ci for cookie in self: i = i + 1 17607db96d56Sopenharmony_ci return i 17617db96d56Sopenharmony_ci 17627db96d56Sopenharmony_ci def __repr__(self): 17637db96d56Sopenharmony_ci r = [] 17647db96d56Sopenharmony_ci for cookie in self: r.append(repr(cookie)) 17657db96d56Sopenharmony_ci return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r)) 17667db96d56Sopenharmony_ci 17677db96d56Sopenharmony_ci def __str__(self): 17687db96d56Sopenharmony_ci r = [] 17697db96d56Sopenharmony_ci for cookie in self: r.append(str(cookie)) 17707db96d56Sopenharmony_ci return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r)) 17717db96d56Sopenharmony_ci 17727db96d56Sopenharmony_ci 17737db96d56Sopenharmony_ci# derives from OSError for backwards-compatibility with Python 2.4.0 17747db96d56Sopenharmony_ciclass LoadError(OSError): pass 17757db96d56Sopenharmony_ci 17767db96d56Sopenharmony_ciclass FileCookieJar(CookieJar): 17777db96d56Sopenharmony_ci """CookieJar that can be loaded from and saved to a file.""" 17787db96d56Sopenharmony_ci 17797db96d56Sopenharmony_ci def __init__(self, filename=None, delayload=False, policy=None): 17807db96d56Sopenharmony_ci """ 17817db96d56Sopenharmony_ci Cookies are NOT loaded from the named file until either the .load() or 17827db96d56Sopenharmony_ci .revert() method is called. 17837db96d56Sopenharmony_ci 17847db96d56Sopenharmony_ci """ 17857db96d56Sopenharmony_ci CookieJar.__init__(self, policy) 17867db96d56Sopenharmony_ci if filename is not None: 17877db96d56Sopenharmony_ci filename = os.fspath(filename) 17887db96d56Sopenharmony_ci self.filename = filename 17897db96d56Sopenharmony_ci self.delayload = bool(delayload) 17907db96d56Sopenharmony_ci 17917db96d56Sopenharmony_ci def save(self, filename=None, ignore_discard=False, ignore_expires=False): 17927db96d56Sopenharmony_ci """Save cookies to a file.""" 17937db96d56Sopenharmony_ci raise NotImplementedError() 17947db96d56Sopenharmony_ci 17957db96d56Sopenharmony_ci def load(self, filename=None, ignore_discard=False, ignore_expires=False): 17967db96d56Sopenharmony_ci """Load cookies from a file.""" 17977db96d56Sopenharmony_ci if filename is None: 17987db96d56Sopenharmony_ci if self.filename is not None: filename = self.filename 17997db96d56Sopenharmony_ci else: raise ValueError(MISSING_FILENAME_TEXT) 18007db96d56Sopenharmony_ci 18017db96d56Sopenharmony_ci with open(filename) as f: 18027db96d56Sopenharmony_ci self._really_load(f, filename, ignore_discard, ignore_expires) 18037db96d56Sopenharmony_ci 18047db96d56Sopenharmony_ci def revert(self, filename=None, 18057db96d56Sopenharmony_ci ignore_discard=False, ignore_expires=False): 18067db96d56Sopenharmony_ci """Clear all cookies and reload cookies from a saved file. 18077db96d56Sopenharmony_ci 18087db96d56Sopenharmony_ci Raises LoadError (or OSError) if reversion is not successful; the 18097db96d56Sopenharmony_ci object's state will not be altered if this happens. 18107db96d56Sopenharmony_ci 18117db96d56Sopenharmony_ci """ 18127db96d56Sopenharmony_ci if filename is None: 18137db96d56Sopenharmony_ci if self.filename is not None: filename = self.filename 18147db96d56Sopenharmony_ci else: raise ValueError(MISSING_FILENAME_TEXT) 18157db96d56Sopenharmony_ci 18167db96d56Sopenharmony_ci self._cookies_lock.acquire() 18177db96d56Sopenharmony_ci try: 18187db96d56Sopenharmony_ci 18197db96d56Sopenharmony_ci old_state = copy.deepcopy(self._cookies) 18207db96d56Sopenharmony_ci self._cookies = {} 18217db96d56Sopenharmony_ci try: 18227db96d56Sopenharmony_ci self.load(filename, ignore_discard, ignore_expires) 18237db96d56Sopenharmony_ci except OSError: 18247db96d56Sopenharmony_ci self._cookies = old_state 18257db96d56Sopenharmony_ci raise 18267db96d56Sopenharmony_ci 18277db96d56Sopenharmony_ci finally: 18287db96d56Sopenharmony_ci self._cookies_lock.release() 18297db96d56Sopenharmony_ci 18307db96d56Sopenharmony_ci 18317db96d56Sopenharmony_cidef lwp_cookie_str(cookie): 18327db96d56Sopenharmony_ci """Return string representation of Cookie in the LWP cookie file format. 18337db96d56Sopenharmony_ci 18347db96d56Sopenharmony_ci Actually, the format is extended a bit -- see module docstring. 18357db96d56Sopenharmony_ci 18367db96d56Sopenharmony_ci """ 18377db96d56Sopenharmony_ci h = [(cookie.name, cookie.value), 18387db96d56Sopenharmony_ci ("path", cookie.path), 18397db96d56Sopenharmony_ci ("domain", cookie.domain)] 18407db96d56Sopenharmony_ci if cookie.port is not None: h.append(("port", cookie.port)) 18417db96d56Sopenharmony_ci if cookie.path_specified: h.append(("path_spec", None)) 18427db96d56Sopenharmony_ci if cookie.port_specified: h.append(("port_spec", None)) 18437db96d56Sopenharmony_ci if cookie.domain_initial_dot: h.append(("domain_dot", None)) 18447db96d56Sopenharmony_ci if cookie.secure: h.append(("secure", None)) 18457db96d56Sopenharmony_ci if cookie.expires: h.append(("expires", 18467db96d56Sopenharmony_ci time2isoz(float(cookie.expires)))) 18477db96d56Sopenharmony_ci if cookie.discard: h.append(("discard", None)) 18487db96d56Sopenharmony_ci if cookie.comment: h.append(("comment", cookie.comment)) 18497db96d56Sopenharmony_ci if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) 18507db96d56Sopenharmony_ci 18517db96d56Sopenharmony_ci keys = sorted(cookie._rest.keys()) 18527db96d56Sopenharmony_ci for k in keys: 18537db96d56Sopenharmony_ci h.append((k, str(cookie._rest[k]))) 18547db96d56Sopenharmony_ci 18557db96d56Sopenharmony_ci h.append(("version", str(cookie.version))) 18567db96d56Sopenharmony_ci 18577db96d56Sopenharmony_ci return join_header_words([h]) 18587db96d56Sopenharmony_ci 18597db96d56Sopenharmony_ciclass LWPCookieJar(FileCookieJar): 18607db96d56Sopenharmony_ci """ 18617db96d56Sopenharmony_ci The LWPCookieJar saves a sequence of "Set-Cookie3" lines. 18627db96d56Sopenharmony_ci "Set-Cookie3" is the format used by the libwww-perl library, not known 18637db96d56Sopenharmony_ci to be compatible with any browser, but which is easy to read and 18647db96d56Sopenharmony_ci doesn't lose information about RFC 2965 cookies. 18657db96d56Sopenharmony_ci 18667db96d56Sopenharmony_ci Additional methods 18677db96d56Sopenharmony_ci 18687db96d56Sopenharmony_ci as_lwp_str(ignore_discard=True, ignore_expired=True) 18697db96d56Sopenharmony_ci 18707db96d56Sopenharmony_ci """ 18717db96d56Sopenharmony_ci 18727db96d56Sopenharmony_ci def as_lwp_str(self, ignore_discard=True, ignore_expires=True): 18737db96d56Sopenharmony_ci """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers. 18747db96d56Sopenharmony_ci 18757db96d56Sopenharmony_ci ignore_discard and ignore_expires: see docstring for FileCookieJar.save 18767db96d56Sopenharmony_ci 18777db96d56Sopenharmony_ci """ 18787db96d56Sopenharmony_ci now = time.time() 18797db96d56Sopenharmony_ci r = [] 18807db96d56Sopenharmony_ci for cookie in self: 18817db96d56Sopenharmony_ci if not ignore_discard and cookie.discard: 18827db96d56Sopenharmony_ci continue 18837db96d56Sopenharmony_ci if not ignore_expires and cookie.is_expired(now): 18847db96d56Sopenharmony_ci continue 18857db96d56Sopenharmony_ci r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) 18867db96d56Sopenharmony_ci return "\n".join(r+[""]) 18877db96d56Sopenharmony_ci 18887db96d56Sopenharmony_ci def save(self, filename=None, ignore_discard=False, ignore_expires=False): 18897db96d56Sopenharmony_ci if filename is None: 18907db96d56Sopenharmony_ci if self.filename is not None: filename = self.filename 18917db96d56Sopenharmony_ci else: raise ValueError(MISSING_FILENAME_TEXT) 18927db96d56Sopenharmony_ci 18937db96d56Sopenharmony_ci with os.fdopen( 18947db96d56Sopenharmony_ci os.open(filename, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o600), 18957db96d56Sopenharmony_ci 'w', 18967db96d56Sopenharmony_ci ) as f: 18977db96d56Sopenharmony_ci # There really isn't an LWP Cookies 2.0 format, but this indicates 18987db96d56Sopenharmony_ci # that there is extra information in here (domain_dot and 18997db96d56Sopenharmony_ci # port_spec) while still being compatible with libwww-perl, I hope. 19007db96d56Sopenharmony_ci f.write("#LWP-Cookies-2.0\n") 19017db96d56Sopenharmony_ci f.write(self.as_lwp_str(ignore_discard, ignore_expires)) 19027db96d56Sopenharmony_ci 19037db96d56Sopenharmony_ci def _really_load(self, f, filename, ignore_discard, ignore_expires): 19047db96d56Sopenharmony_ci magic = f.readline() 19057db96d56Sopenharmony_ci if not self.magic_re.search(magic): 19067db96d56Sopenharmony_ci msg = ("%r does not look like a Set-Cookie3 (LWP) format " 19077db96d56Sopenharmony_ci "file" % filename) 19087db96d56Sopenharmony_ci raise LoadError(msg) 19097db96d56Sopenharmony_ci 19107db96d56Sopenharmony_ci now = time.time() 19117db96d56Sopenharmony_ci 19127db96d56Sopenharmony_ci header = "Set-Cookie3:" 19137db96d56Sopenharmony_ci boolean_attrs = ("port_spec", "path_spec", "domain_dot", 19147db96d56Sopenharmony_ci "secure", "discard") 19157db96d56Sopenharmony_ci value_attrs = ("version", 19167db96d56Sopenharmony_ci "port", "path", "domain", 19177db96d56Sopenharmony_ci "expires", 19187db96d56Sopenharmony_ci "comment", "commenturl") 19197db96d56Sopenharmony_ci 19207db96d56Sopenharmony_ci try: 19217db96d56Sopenharmony_ci while 1: 19227db96d56Sopenharmony_ci line = f.readline() 19237db96d56Sopenharmony_ci if line == "": break 19247db96d56Sopenharmony_ci if not line.startswith(header): 19257db96d56Sopenharmony_ci continue 19267db96d56Sopenharmony_ci line = line[len(header):].strip() 19277db96d56Sopenharmony_ci 19287db96d56Sopenharmony_ci for data in split_header_words([line]): 19297db96d56Sopenharmony_ci name, value = data[0] 19307db96d56Sopenharmony_ci standard = {} 19317db96d56Sopenharmony_ci rest = {} 19327db96d56Sopenharmony_ci for k in boolean_attrs: 19337db96d56Sopenharmony_ci standard[k] = False 19347db96d56Sopenharmony_ci for k, v in data[1:]: 19357db96d56Sopenharmony_ci if k is not None: 19367db96d56Sopenharmony_ci lc = k.lower() 19377db96d56Sopenharmony_ci else: 19387db96d56Sopenharmony_ci lc = None 19397db96d56Sopenharmony_ci # don't lose case distinction for unknown fields 19407db96d56Sopenharmony_ci if (lc in value_attrs) or (lc in boolean_attrs): 19417db96d56Sopenharmony_ci k = lc 19427db96d56Sopenharmony_ci if k in boolean_attrs: 19437db96d56Sopenharmony_ci if v is None: v = True 19447db96d56Sopenharmony_ci standard[k] = v 19457db96d56Sopenharmony_ci elif k in value_attrs: 19467db96d56Sopenharmony_ci standard[k] = v 19477db96d56Sopenharmony_ci else: 19487db96d56Sopenharmony_ci rest[k] = v 19497db96d56Sopenharmony_ci 19507db96d56Sopenharmony_ci h = standard.get 19517db96d56Sopenharmony_ci expires = h("expires") 19527db96d56Sopenharmony_ci discard = h("discard") 19537db96d56Sopenharmony_ci if expires is not None: 19547db96d56Sopenharmony_ci expires = iso2time(expires) 19557db96d56Sopenharmony_ci if expires is None: 19567db96d56Sopenharmony_ci discard = True 19577db96d56Sopenharmony_ci domain = h("domain") 19587db96d56Sopenharmony_ci domain_specified = domain.startswith(".") 19597db96d56Sopenharmony_ci c = Cookie(h("version"), name, value, 19607db96d56Sopenharmony_ci h("port"), h("port_spec"), 19617db96d56Sopenharmony_ci domain, domain_specified, h("domain_dot"), 19627db96d56Sopenharmony_ci h("path"), h("path_spec"), 19637db96d56Sopenharmony_ci h("secure"), 19647db96d56Sopenharmony_ci expires, 19657db96d56Sopenharmony_ci discard, 19667db96d56Sopenharmony_ci h("comment"), 19677db96d56Sopenharmony_ci h("commenturl"), 19687db96d56Sopenharmony_ci rest) 19697db96d56Sopenharmony_ci if not ignore_discard and c.discard: 19707db96d56Sopenharmony_ci continue 19717db96d56Sopenharmony_ci if not ignore_expires and c.is_expired(now): 19727db96d56Sopenharmony_ci continue 19737db96d56Sopenharmony_ci self.set_cookie(c) 19747db96d56Sopenharmony_ci except OSError: 19757db96d56Sopenharmony_ci raise 19767db96d56Sopenharmony_ci except Exception: 19777db96d56Sopenharmony_ci _warn_unhandled_exception() 19787db96d56Sopenharmony_ci raise LoadError("invalid Set-Cookie3 format file %r: %r" % 19797db96d56Sopenharmony_ci (filename, line)) 19807db96d56Sopenharmony_ci 19817db96d56Sopenharmony_ci 19827db96d56Sopenharmony_ciclass MozillaCookieJar(FileCookieJar): 19837db96d56Sopenharmony_ci """ 19847db96d56Sopenharmony_ci 19857db96d56Sopenharmony_ci WARNING: you may want to backup your browser's cookies file if you use 19867db96d56Sopenharmony_ci this class to save cookies. I *think* it works, but there have been 19877db96d56Sopenharmony_ci bugs in the past! 19887db96d56Sopenharmony_ci 19897db96d56Sopenharmony_ci This class differs from CookieJar only in the format it uses to save and 19907db96d56Sopenharmony_ci load cookies to and from a file. This class uses the Mozilla/Netscape 19917db96d56Sopenharmony_ci `cookies.txt' format. curl and lynx use this file format, too. 19927db96d56Sopenharmony_ci 19937db96d56Sopenharmony_ci Don't expect cookies saved while the browser is running to be noticed by 19947db96d56Sopenharmony_ci the browser (in fact, Mozilla on unix will overwrite your saved cookies if 19957db96d56Sopenharmony_ci you change them on disk while it's running; on Windows, you probably can't 19967db96d56Sopenharmony_ci save at all while the browser is running). 19977db96d56Sopenharmony_ci 19987db96d56Sopenharmony_ci Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to 19997db96d56Sopenharmony_ci Netscape cookies on saving. 20007db96d56Sopenharmony_ci 20017db96d56Sopenharmony_ci In particular, the cookie version and port number information is lost, 20027db96d56Sopenharmony_ci together with information about whether or not Path, Port and Discard were 20037db96d56Sopenharmony_ci specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the 20047db96d56Sopenharmony_ci domain as set in the HTTP header started with a dot (yes, I'm aware some 20057db96d56Sopenharmony_ci domains in Netscape files start with a dot and some don't -- trust me, you 20067db96d56Sopenharmony_ci really don't want to know any more about this). 20077db96d56Sopenharmony_ci 20087db96d56Sopenharmony_ci Note that though Mozilla and Netscape use the same format, they use 20097db96d56Sopenharmony_ci slightly different headers. The class saves cookies using the Netscape 20107db96d56Sopenharmony_ci header by default (Mozilla can cope with that). 20117db96d56Sopenharmony_ci 20127db96d56Sopenharmony_ci """ 20137db96d56Sopenharmony_ci 20147db96d56Sopenharmony_ci def _really_load(self, f, filename, ignore_discard, ignore_expires): 20157db96d56Sopenharmony_ci now = time.time() 20167db96d56Sopenharmony_ci 20177db96d56Sopenharmony_ci if not NETSCAPE_MAGIC_RGX.match(f.readline()): 20187db96d56Sopenharmony_ci raise LoadError( 20197db96d56Sopenharmony_ci "%r does not look like a Netscape format cookies file" % 20207db96d56Sopenharmony_ci filename) 20217db96d56Sopenharmony_ci 20227db96d56Sopenharmony_ci try: 20237db96d56Sopenharmony_ci while 1: 20247db96d56Sopenharmony_ci line = f.readline() 20257db96d56Sopenharmony_ci rest = {} 20267db96d56Sopenharmony_ci 20277db96d56Sopenharmony_ci if line == "": break 20287db96d56Sopenharmony_ci 20297db96d56Sopenharmony_ci # httponly is a cookie flag as defined in rfc6265 20307db96d56Sopenharmony_ci # when encoded in a netscape cookie file, 20317db96d56Sopenharmony_ci # the line is prepended with "#HttpOnly_" 20327db96d56Sopenharmony_ci if line.startswith(HTTPONLY_PREFIX): 20337db96d56Sopenharmony_ci rest[HTTPONLY_ATTR] = "" 20347db96d56Sopenharmony_ci line = line[len(HTTPONLY_PREFIX):] 20357db96d56Sopenharmony_ci 20367db96d56Sopenharmony_ci # last field may be absent, so keep any trailing tab 20377db96d56Sopenharmony_ci if line.endswith("\n"): line = line[:-1] 20387db96d56Sopenharmony_ci 20397db96d56Sopenharmony_ci # skip comments and blank lines XXX what is $ for? 20407db96d56Sopenharmony_ci if (line.strip().startswith(("#", "$")) or 20417db96d56Sopenharmony_ci line.strip() == ""): 20427db96d56Sopenharmony_ci continue 20437db96d56Sopenharmony_ci 20447db96d56Sopenharmony_ci domain, domain_specified, path, secure, expires, name, value = \ 20457db96d56Sopenharmony_ci line.split("\t") 20467db96d56Sopenharmony_ci secure = (secure == "TRUE") 20477db96d56Sopenharmony_ci domain_specified = (domain_specified == "TRUE") 20487db96d56Sopenharmony_ci if name == "": 20497db96d56Sopenharmony_ci # cookies.txt regards 'Set-Cookie: foo' as a cookie 20507db96d56Sopenharmony_ci # with no name, whereas http.cookiejar regards it as a 20517db96d56Sopenharmony_ci # cookie with no value. 20527db96d56Sopenharmony_ci name = value 20537db96d56Sopenharmony_ci value = None 20547db96d56Sopenharmony_ci 20557db96d56Sopenharmony_ci initial_dot = domain.startswith(".") 20567db96d56Sopenharmony_ci assert domain_specified == initial_dot 20577db96d56Sopenharmony_ci 20587db96d56Sopenharmony_ci discard = False 20597db96d56Sopenharmony_ci if expires == "": 20607db96d56Sopenharmony_ci expires = None 20617db96d56Sopenharmony_ci discard = True 20627db96d56Sopenharmony_ci 20637db96d56Sopenharmony_ci # assume path_specified is false 20647db96d56Sopenharmony_ci c = Cookie(0, name, value, 20657db96d56Sopenharmony_ci None, False, 20667db96d56Sopenharmony_ci domain, domain_specified, initial_dot, 20677db96d56Sopenharmony_ci path, False, 20687db96d56Sopenharmony_ci secure, 20697db96d56Sopenharmony_ci expires, 20707db96d56Sopenharmony_ci discard, 20717db96d56Sopenharmony_ci None, 20727db96d56Sopenharmony_ci None, 20737db96d56Sopenharmony_ci rest) 20747db96d56Sopenharmony_ci if not ignore_discard and c.discard: 20757db96d56Sopenharmony_ci continue 20767db96d56Sopenharmony_ci if not ignore_expires and c.is_expired(now): 20777db96d56Sopenharmony_ci continue 20787db96d56Sopenharmony_ci self.set_cookie(c) 20797db96d56Sopenharmony_ci 20807db96d56Sopenharmony_ci except OSError: 20817db96d56Sopenharmony_ci raise 20827db96d56Sopenharmony_ci except Exception: 20837db96d56Sopenharmony_ci _warn_unhandled_exception() 20847db96d56Sopenharmony_ci raise LoadError("invalid Netscape format cookies file %r: %r" % 20857db96d56Sopenharmony_ci (filename, line)) 20867db96d56Sopenharmony_ci 20877db96d56Sopenharmony_ci def save(self, filename=None, ignore_discard=False, ignore_expires=False): 20887db96d56Sopenharmony_ci if filename is None: 20897db96d56Sopenharmony_ci if self.filename is not None: filename = self.filename 20907db96d56Sopenharmony_ci else: raise ValueError(MISSING_FILENAME_TEXT) 20917db96d56Sopenharmony_ci 20927db96d56Sopenharmony_ci with os.fdopen( 20937db96d56Sopenharmony_ci os.open(filename, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o600), 20947db96d56Sopenharmony_ci 'w', 20957db96d56Sopenharmony_ci ) as f: 20967db96d56Sopenharmony_ci f.write(NETSCAPE_HEADER_TEXT) 20977db96d56Sopenharmony_ci now = time.time() 20987db96d56Sopenharmony_ci for cookie in self: 20997db96d56Sopenharmony_ci domain = cookie.domain 21007db96d56Sopenharmony_ci if not ignore_discard and cookie.discard: 21017db96d56Sopenharmony_ci continue 21027db96d56Sopenharmony_ci if not ignore_expires and cookie.is_expired(now): 21037db96d56Sopenharmony_ci continue 21047db96d56Sopenharmony_ci if cookie.secure: secure = "TRUE" 21057db96d56Sopenharmony_ci else: secure = "FALSE" 21067db96d56Sopenharmony_ci if domain.startswith("."): initial_dot = "TRUE" 21077db96d56Sopenharmony_ci else: initial_dot = "FALSE" 21087db96d56Sopenharmony_ci if cookie.expires is not None: 21097db96d56Sopenharmony_ci expires = str(cookie.expires) 21107db96d56Sopenharmony_ci else: 21117db96d56Sopenharmony_ci expires = "" 21127db96d56Sopenharmony_ci if cookie.value is None: 21137db96d56Sopenharmony_ci # cookies.txt regards 'Set-Cookie: foo' as a cookie 21147db96d56Sopenharmony_ci # with no name, whereas http.cookiejar regards it as a 21157db96d56Sopenharmony_ci # cookie with no value. 21167db96d56Sopenharmony_ci name = "" 21177db96d56Sopenharmony_ci value = cookie.name 21187db96d56Sopenharmony_ci else: 21197db96d56Sopenharmony_ci name = cookie.name 21207db96d56Sopenharmony_ci value = cookie.value 21217db96d56Sopenharmony_ci if cookie.has_nonstandard_attr(HTTPONLY_ATTR): 21227db96d56Sopenharmony_ci domain = HTTPONLY_PREFIX + domain 21237db96d56Sopenharmony_ci f.write( 21247db96d56Sopenharmony_ci "\t".join([domain, initial_dot, cookie.path, 21257db96d56Sopenharmony_ci secure, expires, name, value])+ 21267db96d56Sopenharmony_ci "\n") 2127