17db96d56Sopenharmony_ci"""This will be the home for the policy that hooks in the new
27db96d56Sopenharmony_cicode that adds all the email6 features.
37db96d56Sopenharmony_ci"""
47db96d56Sopenharmony_ci
57db96d56Sopenharmony_ciimport re
67db96d56Sopenharmony_ciimport sys
77db96d56Sopenharmony_cifrom email._policybase import Policy, Compat32, compat32, _extend_docstrings
87db96d56Sopenharmony_cifrom email.utils import _has_surrogates
97db96d56Sopenharmony_cifrom email.headerregistry import HeaderRegistry as HeaderRegistry
107db96d56Sopenharmony_cifrom email.contentmanager import raw_data_manager
117db96d56Sopenharmony_cifrom email.message import EmailMessage
127db96d56Sopenharmony_ci
137db96d56Sopenharmony_ci__all__ = [
147db96d56Sopenharmony_ci    'Compat32',
157db96d56Sopenharmony_ci    'compat32',
167db96d56Sopenharmony_ci    'Policy',
177db96d56Sopenharmony_ci    'EmailPolicy',
187db96d56Sopenharmony_ci    'default',
197db96d56Sopenharmony_ci    'strict',
207db96d56Sopenharmony_ci    'SMTP',
217db96d56Sopenharmony_ci    'HTTP',
227db96d56Sopenharmony_ci    ]
237db96d56Sopenharmony_ci
247db96d56Sopenharmony_cilinesep_splitter = re.compile(r'\n|\r')
257db96d56Sopenharmony_ci
267db96d56Sopenharmony_ci@_extend_docstrings
277db96d56Sopenharmony_ciclass EmailPolicy(Policy):
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_ci    """+
307db96d56Sopenharmony_ci    PROVISIONAL
317db96d56Sopenharmony_ci
327db96d56Sopenharmony_ci    The API extensions enabled by this policy are currently provisional.
337db96d56Sopenharmony_ci    Refer to the documentation for details.
347db96d56Sopenharmony_ci
357db96d56Sopenharmony_ci    This policy adds new header parsing and folding algorithms.  Instead of
367db96d56Sopenharmony_ci    simple strings, headers are custom objects with custom attributes
377db96d56Sopenharmony_ci    depending on the type of the field.  The folding algorithm fully
387db96d56Sopenharmony_ci    implements RFCs 2047 and 5322.
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ci    In addition to the settable attributes listed above that apply to
417db96d56Sopenharmony_ci    all Policies, this policy adds the following additional attributes:
427db96d56Sopenharmony_ci
437db96d56Sopenharmony_ci    utf8                -- if False (the default) message headers will be
447db96d56Sopenharmony_ci                           serialized as ASCII, using encoded words to encode
457db96d56Sopenharmony_ci                           any non-ASCII characters in the source strings.  If
467db96d56Sopenharmony_ci                           True, the message headers will be serialized using
477db96d56Sopenharmony_ci                           utf8 and will not contain encoded words (see RFC
487db96d56Sopenharmony_ci                           6532 for more on this serialization format).
497db96d56Sopenharmony_ci
507db96d56Sopenharmony_ci    refold_source       -- if the value for a header in the Message object
517db96d56Sopenharmony_ci                           came from the parsing of some source, this attribute
527db96d56Sopenharmony_ci                           indicates whether or not a generator should refold
537db96d56Sopenharmony_ci                           that value when transforming the message back into
547db96d56Sopenharmony_ci                           stream form.  The possible values are:
557db96d56Sopenharmony_ci
567db96d56Sopenharmony_ci                           none  -- all source values use original folding
577db96d56Sopenharmony_ci                           long  -- source values that have any line that is
587db96d56Sopenharmony_ci                                    longer than max_line_length will be
597db96d56Sopenharmony_ci                                    refolded
607db96d56Sopenharmony_ci                           all  -- all values are refolded.
617db96d56Sopenharmony_ci
627db96d56Sopenharmony_ci                           The default is 'long'.
637db96d56Sopenharmony_ci
647db96d56Sopenharmony_ci    header_factory      -- a callable that takes two arguments, 'name' and
657db96d56Sopenharmony_ci                           'value', where 'name' is a header field name and
667db96d56Sopenharmony_ci                           'value' is an unfolded header field value, and
677db96d56Sopenharmony_ci                           returns a string-like object that represents that
687db96d56Sopenharmony_ci                           header.  A default header_factory is provided that
697db96d56Sopenharmony_ci                           understands some of the RFC5322 header field types.
707db96d56Sopenharmony_ci                           (Currently address fields and date fields have
717db96d56Sopenharmony_ci                           special treatment, while all other fields are
727db96d56Sopenharmony_ci                           treated as unstructured.  This list will be
737db96d56Sopenharmony_ci                           completed before the extension is marked stable.)
747db96d56Sopenharmony_ci
757db96d56Sopenharmony_ci    content_manager     -- an object with at least two methods: get_content
767db96d56Sopenharmony_ci                           and set_content.  When the get_content or
777db96d56Sopenharmony_ci                           set_content method of a Message object is called,
787db96d56Sopenharmony_ci                           it calls the corresponding method of this object,
797db96d56Sopenharmony_ci                           passing it the message object as its first argument,
807db96d56Sopenharmony_ci                           and any arguments or keywords that were passed to
817db96d56Sopenharmony_ci                           it as additional arguments.  The default
827db96d56Sopenharmony_ci                           content_manager is
837db96d56Sopenharmony_ci                           :data:`~email.contentmanager.raw_data_manager`.
847db96d56Sopenharmony_ci
857db96d56Sopenharmony_ci    """
867db96d56Sopenharmony_ci
877db96d56Sopenharmony_ci    message_factory = EmailMessage
887db96d56Sopenharmony_ci    utf8 = False
897db96d56Sopenharmony_ci    refold_source = 'long'
907db96d56Sopenharmony_ci    header_factory = HeaderRegistry()
917db96d56Sopenharmony_ci    content_manager = raw_data_manager
927db96d56Sopenharmony_ci
937db96d56Sopenharmony_ci    def __init__(self, **kw):
947db96d56Sopenharmony_ci        # Ensure that each new instance gets a unique header factory
957db96d56Sopenharmony_ci        # (as opposed to clones, which share the factory).
967db96d56Sopenharmony_ci        if 'header_factory' not in kw:
977db96d56Sopenharmony_ci            object.__setattr__(self, 'header_factory', HeaderRegistry())
987db96d56Sopenharmony_ci        super().__init__(**kw)
997db96d56Sopenharmony_ci
1007db96d56Sopenharmony_ci    def header_max_count(self, name):
1017db96d56Sopenharmony_ci        """+
1027db96d56Sopenharmony_ci        The implementation for this class returns the max_count attribute from
1037db96d56Sopenharmony_ci        the specialized header class that would be used to construct a header
1047db96d56Sopenharmony_ci        of type 'name'.
1057db96d56Sopenharmony_ci        """
1067db96d56Sopenharmony_ci        return self.header_factory[name].max_count
1077db96d56Sopenharmony_ci
1087db96d56Sopenharmony_ci    # The logic of the next three methods is chosen such that it is possible to
1097db96d56Sopenharmony_ci    # switch a Message object between a Compat32 policy and a policy derived
1107db96d56Sopenharmony_ci    # from this class and have the results stay consistent.  This allows a
1117db96d56Sopenharmony_ci    # Message object constructed with this policy to be passed to a library
1127db96d56Sopenharmony_ci    # that only handles Compat32 objects, or to receive such an object and
1137db96d56Sopenharmony_ci    # convert it to use the newer style by just changing its policy.  It is
1147db96d56Sopenharmony_ci    # also chosen because it postpones the relatively expensive full rfc5322
1157db96d56Sopenharmony_ci    # parse until as late as possible when parsing from source, since in many
1167db96d56Sopenharmony_ci    # applications only a few headers will actually be inspected.
1177db96d56Sopenharmony_ci
1187db96d56Sopenharmony_ci    def header_source_parse(self, sourcelines):
1197db96d56Sopenharmony_ci        """+
1207db96d56Sopenharmony_ci        The name is parsed as everything up to the ':' and returned unmodified.
1217db96d56Sopenharmony_ci        The value is determined by stripping leading whitespace off the
1227db96d56Sopenharmony_ci        remainder of the first line, joining all subsequent lines together, and
1237db96d56Sopenharmony_ci        stripping any trailing carriage return or linefeed characters.  (This
1247db96d56Sopenharmony_ci        is the same as Compat32).
1257db96d56Sopenharmony_ci
1267db96d56Sopenharmony_ci        """
1277db96d56Sopenharmony_ci        name, value = sourcelines[0].split(':', 1)
1287db96d56Sopenharmony_ci        value = value.lstrip(' \t') + ''.join(sourcelines[1:])
1297db96d56Sopenharmony_ci        return (name, value.rstrip('\r\n'))
1307db96d56Sopenharmony_ci
1317db96d56Sopenharmony_ci    def header_store_parse(self, name, value):
1327db96d56Sopenharmony_ci        """+
1337db96d56Sopenharmony_ci        The name is returned unchanged.  If the input value has a 'name'
1347db96d56Sopenharmony_ci        attribute and it matches the name ignoring case, the value is returned
1357db96d56Sopenharmony_ci        unchanged.  Otherwise the name and value are passed to header_factory
1367db96d56Sopenharmony_ci        method, and the resulting custom header object is returned as the
1377db96d56Sopenharmony_ci        value.  In this case a ValueError is raised if the input value contains
1387db96d56Sopenharmony_ci        CR or LF characters.
1397db96d56Sopenharmony_ci
1407db96d56Sopenharmony_ci        """
1417db96d56Sopenharmony_ci        if hasattr(value, 'name') and value.name.lower() == name.lower():
1427db96d56Sopenharmony_ci            return (name, value)
1437db96d56Sopenharmony_ci        if isinstance(value, str) and len(value.splitlines())>1:
1447db96d56Sopenharmony_ci            # XXX this error message isn't quite right when we use splitlines
1457db96d56Sopenharmony_ci            # (see issue 22233), but I'm not sure what should happen here.
1467db96d56Sopenharmony_ci            raise ValueError("Header values may not contain linefeed "
1477db96d56Sopenharmony_ci                             "or carriage return characters")
1487db96d56Sopenharmony_ci        return (name, self.header_factory(name, value))
1497db96d56Sopenharmony_ci
1507db96d56Sopenharmony_ci    def header_fetch_parse(self, name, value):
1517db96d56Sopenharmony_ci        """+
1527db96d56Sopenharmony_ci        If the value has a 'name' attribute, it is returned to unmodified.
1537db96d56Sopenharmony_ci        Otherwise the name and the value with any linesep characters removed
1547db96d56Sopenharmony_ci        are passed to the header_factory method, and the resulting custom
1557db96d56Sopenharmony_ci        header object is returned.  Any surrogateescaped bytes get turned
1567db96d56Sopenharmony_ci        into the unicode unknown-character glyph.
1577db96d56Sopenharmony_ci
1587db96d56Sopenharmony_ci        """
1597db96d56Sopenharmony_ci        if hasattr(value, 'name'):
1607db96d56Sopenharmony_ci            return value
1617db96d56Sopenharmony_ci        # We can't use splitlines here because it splits on more than \r and \n.
1627db96d56Sopenharmony_ci        value = ''.join(linesep_splitter.split(value))
1637db96d56Sopenharmony_ci        return self.header_factory(name, value)
1647db96d56Sopenharmony_ci
1657db96d56Sopenharmony_ci    def fold(self, name, value):
1667db96d56Sopenharmony_ci        """+
1677db96d56Sopenharmony_ci        Header folding is controlled by the refold_source policy setting.  A
1687db96d56Sopenharmony_ci        value is considered to be a 'source value' if and only if it does not
1697db96d56Sopenharmony_ci        have a 'name' attribute (having a 'name' attribute means it is a header
1707db96d56Sopenharmony_ci        object of some sort).  If a source value needs to be refolded according
1717db96d56Sopenharmony_ci        to the policy, it is converted into a custom header object by passing
1727db96d56Sopenharmony_ci        the name and the value with any linesep characters removed to the
1737db96d56Sopenharmony_ci        header_factory method.  Folding of a custom header object is done by
1747db96d56Sopenharmony_ci        calling its fold method with the current policy.
1757db96d56Sopenharmony_ci
1767db96d56Sopenharmony_ci        Source values are split into lines using splitlines.  If the value is
1777db96d56Sopenharmony_ci        not to be refolded, the lines are rejoined using the linesep from the
1787db96d56Sopenharmony_ci        policy and returned.  The exception is lines containing non-ascii
1797db96d56Sopenharmony_ci        binary data.  In that case the value is refolded regardless of the
1807db96d56Sopenharmony_ci        refold_source setting, which causes the binary data to be CTE encoded
1817db96d56Sopenharmony_ci        using the unknown-8bit charset.
1827db96d56Sopenharmony_ci
1837db96d56Sopenharmony_ci        """
1847db96d56Sopenharmony_ci        return self._fold(name, value, refold_binary=True)
1857db96d56Sopenharmony_ci
1867db96d56Sopenharmony_ci    def fold_binary(self, name, value):
1877db96d56Sopenharmony_ci        """+
1887db96d56Sopenharmony_ci        The same as fold if cte_type is 7bit, except that the returned value is
1897db96d56Sopenharmony_ci        bytes.
1907db96d56Sopenharmony_ci
1917db96d56Sopenharmony_ci        If cte_type is 8bit, non-ASCII binary data is converted back into
1927db96d56Sopenharmony_ci        bytes.  Headers with binary data are not refolded, regardless of the
1937db96d56Sopenharmony_ci        refold_header setting, since there is no way to know whether the binary
1947db96d56Sopenharmony_ci        data consists of single byte characters or multibyte characters.
1957db96d56Sopenharmony_ci
1967db96d56Sopenharmony_ci        If utf8 is true, headers are encoded to utf8, otherwise to ascii with
1977db96d56Sopenharmony_ci        non-ASCII unicode rendered as encoded words.
1987db96d56Sopenharmony_ci
1997db96d56Sopenharmony_ci        """
2007db96d56Sopenharmony_ci        folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
2017db96d56Sopenharmony_ci        charset = 'utf8' if self.utf8 else 'ascii'
2027db96d56Sopenharmony_ci        return folded.encode(charset, 'surrogateescape')
2037db96d56Sopenharmony_ci
2047db96d56Sopenharmony_ci    def _fold(self, name, value, refold_binary=False):
2057db96d56Sopenharmony_ci        if hasattr(value, 'name'):
2067db96d56Sopenharmony_ci            return value.fold(policy=self)
2077db96d56Sopenharmony_ci        maxlen = self.max_line_length if self.max_line_length else sys.maxsize
2087db96d56Sopenharmony_ci        lines = value.splitlines()
2097db96d56Sopenharmony_ci        refold = (self.refold_source == 'all' or
2107db96d56Sopenharmony_ci                  self.refold_source == 'long' and
2117db96d56Sopenharmony_ci                    (lines and len(lines[0])+len(name)+2 > maxlen or
2127db96d56Sopenharmony_ci                     any(len(x) > maxlen for x in lines[1:])))
2137db96d56Sopenharmony_ci        if refold or refold_binary and _has_surrogates(value):
2147db96d56Sopenharmony_ci            return self.header_factory(name, ''.join(lines)).fold(policy=self)
2157db96d56Sopenharmony_ci        return name + ': ' + self.linesep.join(lines) + self.linesep
2167db96d56Sopenharmony_ci
2177db96d56Sopenharmony_ci
2187db96d56Sopenharmony_cidefault = EmailPolicy()
2197db96d56Sopenharmony_ci# Make the default policy use the class default header_factory
2207db96d56Sopenharmony_cidel default.header_factory
2217db96d56Sopenharmony_cistrict = default.clone(raise_on_defect=True)
2227db96d56Sopenharmony_ciSMTP = default.clone(linesep='\r\n')
2237db96d56Sopenharmony_ciHTTP = default.clone(linesep='\r\n', max_line_length=None)
2247db96d56Sopenharmony_ciSMTPUTF8 = SMTP.clone(utf8=True)
225