17db96d56Sopenharmony_ci"""This will be the home for the policy that hooks in the new 27db96d56Sopenharmony_cicode that adds all the email6 features. 37db96d56Sopenharmony_ci""" 47db96d56Sopenharmony_ci 57db96d56Sopenharmony_ciimport re 67db96d56Sopenharmony_ciimport sys 77db96d56Sopenharmony_cifrom email._policybase import Policy, Compat32, compat32, _extend_docstrings 87db96d56Sopenharmony_cifrom email.utils import _has_surrogates 97db96d56Sopenharmony_cifrom email.headerregistry import HeaderRegistry as HeaderRegistry 107db96d56Sopenharmony_cifrom email.contentmanager import raw_data_manager 117db96d56Sopenharmony_cifrom email.message import EmailMessage 127db96d56Sopenharmony_ci 137db96d56Sopenharmony_ci__all__ = [ 147db96d56Sopenharmony_ci 'Compat32', 157db96d56Sopenharmony_ci 'compat32', 167db96d56Sopenharmony_ci 'Policy', 177db96d56Sopenharmony_ci 'EmailPolicy', 187db96d56Sopenharmony_ci 'default', 197db96d56Sopenharmony_ci 'strict', 207db96d56Sopenharmony_ci 'SMTP', 217db96d56Sopenharmony_ci 'HTTP', 227db96d56Sopenharmony_ci ] 237db96d56Sopenharmony_ci 247db96d56Sopenharmony_cilinesep_splitter = re.compile(r'\n|\r') 257db96d56Sopenharmony_ci 267db96d56Sopenharmony_ci@_extend_docstrings 277db96d56Sopenharmony_ciclass EmailPolicy(Policy): 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ci """+ 307db96d56Sopenharmony_ci PROVISIONAL 317db96d56Sopenharmony_ci 327db96d56Sopenharmony_ci The API extensions enabled by this policy are currently provisional. 337db96d56Sopenharmony_ci Refer to the documentation for details. 347db96d56Sopenharmony_ci 357db96d56Sopenharmony_ci This policy adds new header parsing and folding algorithms. Instead of 367db96d56Sopenharmony_ci simple strings, headers are custom objects with custom attributes 377db96d56Sopenharmony_ci depending on the type of the field. The folding algorithm fully 387db96d56Sopenharmony_ci implements RFCs 2047 and 5322. 397db96d56Sopenharmony_ci 407db96d56Sopenharmony_ci In addition to the settable attributes listed above that apply to 417db96d56Sopenharmony_ci all Policies, this policy adds the following additional attributes: 427db96d56Sopenharmony_ci 437db96d56Sopenharmony_ci utf8 -- if False (the default) message headers will be 447db96d56Sopenharmony_ci serialized as ASCII, using encoded words to encode 457db96d56Sopenharmony_ci any non-ASCII characters in the source strings. If 467db96d56Sopenharmony_ci True, the message headers will be serialized using 477db96d56Sopenharmony_ci utf8 and will not contain encoded words (see RFC 487db96d56Sopenharmony_ci 6532 for more on this serialization format). 497db96d56Sopenharmony_ci 507db96d56Sopenharmony_ci refold_source -- if the value for a header in the Message object 517db96d56Sopenharmony_ci came from the parsing of some source, this attribute 527db96d56Sopenharmony_ci indicates whether or not a generator should refold 537db96d56Sopenharmony_ci that value when transforming the message back into 547db96d56Sopenharmony_ci stream form. The possible values are: 557db96d56Sopenharmony_ci 567db96d56Sopenharmony_ci none -- all source values use original folding 577db96d56Sopenharmony_ci long -- source values that have any line that is 587db96d56Sopenharmony_ci longer than max_line_length will be 597db96d56Sopenharmony_ci refolded 607db96d56Sopenharmony_ci all -- all values are refolded. 617db96d56Sopenharmony_ci 627db96d56Sopenharmony_ci The default is 'long'. 637db96d56Sopenharmony_ci 647db96d56Sopenharmony_ci header_factory -- a callable that takes two arguments, 'name' and 657db96d56Sopenharmony_ci 'value', where 'name' is a header field name and 667db96d56Sopenharmony_ci 'value' is an unfolded header field value, and 677db96d56Sopenharmony_ci returns a string-like object that represents that 687db96d56Sopenharmony_ci header. A default header_factory is provided that 697db96d56Sopenharmony_ci understands some of the RFC5322 header field types. 707db96d56Sopenharmony_ci (Currently address fields and date fields have 717db96d56Sopenharmony_ci special treatment, while all other fields are 727db96d56Sopenharmony_ci treated as unstructured. This list will be 737db96d56Sopenharmony_ci completed before the extension is marked stable.) 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_ci content_manager -- an object with at least two methods: get_content 767db96d56Sopenharmony_ci and set_content. When the get_content or 777db96d56Sopenharmony_ci set_content method of a Message object is called, 787db96d56Sopenharmony_ci it calls the corresponding method of this object, 797db96d56Sopenharmony_ci passing it the message object as its first argument, 807db96d56Sopenharmony_ci and any arguments or keywords that were passed to 817db96d56Sopenharmony_ci it as additional arguments. The default 827db96d56Sopenharmony_ci content_manager is 837db96d56Sopenharmony_ci :data:`~email.contentmanager.raw_data_manager`. 847db96d56Sopenharmony_ci 857db96d56Sopenharmony_ci """ 867db96d56Sopenharmony_ci 877db96d56Sopenharmony_ci message_factory = EmailMessage 887db96d56Sopenharmony_ci utf8 = False 897db96d56Sopenharmony_ci refold_source = 'long' 907db96d56Sopenharmony_ci header_factory = HeaderRegistry() 917db96d56Sopenharmony_ci content_manager = raw_data_manager 927db96d56Sopenharmony_ci 937db96d56Sopenharmony_ci def __init__(self, **kw): 947db96d56Sopenharmony_ci # Ensure that each new instance gets a unique header factory 957db96d56Sopenharmony_ci # (as opposed to clones, which share the factory). 967db96d56Sopenharmony_ci if 'header_factory' not in kw: 977db96d56Sopenharmony_ci object.__setattr__(self, 'header_factory', HeaderRegistry()) 987db96d56Sopenharmony_ci super().__init__(**kw) 997db96d56Sopenharmony_ci 1007db96d56Sopenharmony_ci def header_max_count(self, name): 1017db96d56Sopenharmony_ci """+ 1027db96d56Sopenharmony_ci The implementation for this class returns the max_count attribute from 1037db96d56Sopenharmony_ci the specialized header class that would be used to construct a header 1047db96d56Sopenharmony_ci of type 'name'. 1057db96d56Sopenharmony_ci """ 1067db96d56Sopenharmony_ci return self.header_factory[name].max_count 1077db96d56Sopenharmony_ci 1087db96d56Sopenharmony_ci # The logic of the next three methods is chosen such that it is possible to 1097db96d56Sopenharmony_ci # switch a Message object between a Compat32 policy and a policy derived 1107db96d56Sopenharmony_ci # from this class and have the results stay consistent. This allows a 1117db96d56Sopenharmony_ci # Message object constructed with this policy to be passed to a library 1127db96d56Sopenharmony_ci # that only handles Compat32 objects, or to receive such an object and 1137db96d56Sopenharmony_ci # convert it to use the newer style by just changing its policy. It is 1147db96d56Sopenharmony_ci # also chosen because it postpones the relatively expensive full rfc5322 1157db96d56Sopenharmony_ci # parse until as late as possible when parsing from source, since in many 1167db96d56Sopenharmony_ci # applications only a few headers will actually be inspected. 1177db96d56Sopenharmony_ci 1187db96d56Sopenharmony_ci def header_source_parse(self, sourcelines): 1197db96d56Sopenharmony_ci """+ 1207db96d56Sopenharmony_ci The name is parsed as everything up to the ':' and returned unmodified. 1217db96d56Sopenharmony_ci The value is determined by stripping leading whitespace off the 1227db96d56Sopenharmony_ci remainder of the first line, joining all subsequent lines together, and 1237db96d56Sopenharmony_ci stripping any trailing carriage return or linefeed characters. (This 1247db96d56Sopenharmony_ci is the same as Compat32). 1257db96d56Sopenharmony_ci 1267db96d56Sopenharmony_ci """ 1277db96d56Sopenharmony_ci name, value = sourcelines[0].split(':', 1) 1287db96d56Sopenharmony_ci value = value.lstrip(' \t') + ''.join(sourcelines[1:]) 1297db96d56Sopenharmony_ci return (name, value.rstrip('\r\n')) 1307db96d56Sopenharmony_ci 1317db96d56Sopenharmony_ci def header_store_parse(self, name, value): 1327db96d56Sopenharmony_ci """+ 1337db96d56Sopenharmony_ci The name is returned unchanged. If the input value has a 'name' 1347db96d56Sopenharmony_ci attribute and it matches the name ignoring case, the value is returned 1357db96d56Sopenharmony_ci unchanged. Otherwise the name and value are passed to header_factory 1367db96d56Sopenharmony_ci method, and the resulting custom header object is returned as the 1377db96d56Sopenharmony_ci value. In this case a ValueError is raised if the input value contains 1387db96d56Sopenharmony_ci CR or LF characters. 1397db96d56Sopenharmony_ci 1407db96d56Sopenharmony_ci """ 1417db96d56Sopenharmony_ci if hasattr(value, 'name') and value.name.lower() == name.lower(): 1427db96d56Sopenharmony_ci return (name, value) 1437db96d56Sopenharmony_ci if isinstance(value, str) and len(value.splitlines())>1: 1447db96d56Sopenharmony_ci # XXX this error message isn't quite right when we use splitlines 1457db96d56Sopenharmony_ci # (see issue 22233), but I'm not sure what should happen here. 1467db96d56Sopenharmony_ci raise ValueError("Header values may not contain linefeed " 1477db96d56Sopenharmony_ci "or carriage return characters") 1487db96d56Sopenharmony_ci return (name, self.header_factory(name, value)) 1497db96d56Sopenharmony_ci 1507db96d56Sopenharmony_ci def header_fetch_parse(self, name, value): 1517db96d56Sopenharmony_ci """+ 1527db96d56Sopenharmony_ci If the value has a 'name' attribute, it is returned to unmodified. 1537db96d56Sopenharmony_ci Otherwise the name and the value with any linesep characters removed 1547db96d56Sopenharmony_ci are passed to the header_factory method, and the resulting custom 1557db96d56Sopenharmony_ci header object is returned. Any surrogateescaped bytes get turned 1567db96d56Sopenharmony_ci into the unicode unknown-character glyph. 1577db96d56Sopenharmony_ci 1587db96d56Sopenharmony_ci """ 1597db96d56Sopenharmony_ci if hasattr(value, 'name'): 1607db96d56Sopenharmony_ci return value 1617db96d56Sopenharmony_ci # We can't use splitlines here because it splits on more than \r and \n. 1627db96d56Sopenharmony_ci value = ''.join(linesep_splitter.split(value)) 1637db96d56Sopenharmony_ci return self.header_factory(name, value) 1647db96d56Sopenharmony_ci 1657db96d56Sopenharmony_ci def fold(self, name, value): 1667db96d56Sopenharmony_ci """+ 1677db96d56Sopenharmony_ci Header folding is controlled by the refold_source policy setting. A 1687db96d56Sopenharmony_ci value is considered to be a 'source value' if and only if it does not 1697db96d56Sopenharmony_ci have a 'name' attribute (having a 'name' attribute means it is a header 1707db96d56Sopenharmony_ci object of some sort). If a source value needs to be refolded according 1717db96d56Sopenharmony_ci to the policy, it is converted into a custom header object by passing 1727db96d56Sopenharmony_ci the name and the value with any linesep characters removed to the 1737db96d56Sopenharmony_ci header_factory method. Folding of a custom header object is done by 1747db96d56Sopenharmony_ci calling its fold method with the current policy. 1757db96d56Sopenharmony_ci 1767db96d56Sopenharmony_ci Source values are split into lines using splitlines. If the value is 1777db96d56Sopenharmony_ci not to be refolded, the lines are rejoined using the linesep from the 1787db96d56Sopenharmony_ci policy and returned. The exception is lines containing non-ascii 1797db96d56Sopenharmony_ci binary data. In that case the value is refolded regardless of the 1807db96d56Sopenharmony_ci refold_source setting, which causes the binary data to be CTE encoded 1817db96d56Sopenharmony_ci using the unknown-8bit charset. 1827db96d56Sopenharmony_ci 1837db96d56Sopenharmony_ci """ 1847db96d56Sopenharmony_ci return self._fold(name, value, refold_binary=True) 1857db96d56Sopenharmony_ci 1867db96d56Sopenharmony_ci def fold_binary(self, name, value): 1877db96d56Sopenharmony_ci """+ 1887db96d56Sopenharmony_ci The same as fold if cte_type is 7bit, except that the returned value is 1897db96d56Sopenharmony_ci bytes. 1907db96d56Sopenharmony_ci 1917db96d56Sopenharmony_ci If cte_type is 8bit, non-ASCII binary data is converted back into 1927db96d56Sopenharmony_ci bytes. Headers with binary data are not refolded, regardless of the 1937db96d56Sopenharmony_ci refold_header setting, since there is no way to know whether the binary 1947db96d56Sopenharmony_ci data consists of single byte characters or multibyte characters. 1957db96d56Sopenharmony_ci 1967db96d56Sopenharmony_ci If utf8 is true, headers are encoded to utf8, otherwise to ascii with 1977db96d56Sopenharmony_ci non-ASCII unicode rendered as encoded words. 1987db96d56Sopenharmony_ci 1997db96d56Sopenharmony_ci """ 2007db96d56Sopenharmony_ci folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') 2017db96d56Sopenharmony_ci charset = 'utf8' if self.utf8 else 'ascii' 2027db96d56Sopenharmony_ci return folded.encode(charset, 'surrogateescape') 2037db96d56Sopenharmony_ci 2047db96d56Sopenharmony_ci def _fold(self, name, value, refold_binary=False): 2057db96d56Sopenharmony_ci if hasattr(value, 'name'): 2067db96d56Sopenharmony_ci return value.fold(policy=self) 2077db96d56Sopenharmony_ci maxlen = self.max_line_length if self.max_line_length else sys.maxsize 2087db96d56Sopenharmony_ci lines = value.splitlines() 2097db96d56Sopenharmony_ci refold = (self.refold_source == 'all' or 2107db96d56Sopenharmony_ci self.refold_source == 'long' and 2117db96d56Sopenharmony_ci (lines and len(lines[0])+len(name)+2 > maxlen or 2127db96d56Sopenharmony_ci any(len(x) > maxlen for x in lines[1:]))) 2137db96d56Sopenharmony_ci if refold or refold_binary and _has_surrogates(value): 2147db96d56Sopenharmony_ci return self.header_factory(name, ''.join(lines)).fold(policy=self) 2157db96d56Sopenharmony_ci return name + ': ' + self.linesep.join(lines) + self.linesep 2167db96d56Sopenharmony_ci 2177db96d56Sopenharmony_ci 2187db96d56Sopenharmony_cidefault = EmailPolicy() 2197db96d56Sopenharmony_ci# Make the default policy use the class default header_factory 2207db96d56Sopenharmony_cidel default.header_factory 2217db96d56Sopenharmony_cistrict = default.clone(raise_on_defect=True) 2227db96d56Sopenharmony_ciSMTP = default.clone(linesep='\r\n') 2237db96d56Sopenharmony_ciHTTP = default.clone(linesep='\r\n', max_line_length=None) 2247db96d56Sopenharmony_ciSMTPUTF8 = SMTP.clone(utf8=True) 225