17db96d56Sopenharmony_ci# Copyright (C) 2001-2007 Python Software Foundation
27db96d56Sopenharmony_ci# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
37db96d56Sopenharmony_ci# Contact: email-sig@python.org
47db96d56Sopenharmony_ci
57db96d56Sopenharmony_ci"""A parser of RFC 2822 and MIME email messages."""
67db96d56Sopenharmony_ci
77db96d56Sopenharmony_ci__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
87db96d56Sopenharmony_ci           'FeedParser', 'BytesFeedParser']
97db96d56Sopenharmony_ci
107db96d56Sopenharmony_cifrom io import StringIO, TextIOWrapper
117db96d56Sopenharmony_ci
127db96d56Sopenharmony_cifrom email.feedparser import FeedParser, BytesFeedParser
137db96d56Sopenharmony_cifrom email._policybase import compat32
147db96d56Sopenharmony_ci
157db96d56Sopenharmony_ci
167db96d56Sopenharmony_ciclass Parser:
177db96d56Sopenharmony_ci    def __init__(self, _class=None, *, policy=compat32):
187db96d56Sopenharmony_ci        """Parser of RFC 2822 and MIME email messages.
197db96d56Sopenharmony_ci
207db96d56Sopenharmony_ci        Creates an in-memory object tree representing the email message, which
217db96d56Sopenharmony_ci        can then be manipulated and turned over to a Generator to return the
227db96d56Sopenharmony_ci        textual representation of the message.
237db96d56Sopenharmony_ci
247db96d56Sopenharmony_ci        The string must be formatted as a block of RFC 2822 headers and header
257db96d56Sopenharmony_ci        continuation lines, optionally preceded by a `Unix-from' header.  The
267db96d56Sopenharmony_ci        header block is terminated either by the end of the string or by a
277db96d56Sopenharmony_ci        blank line.
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_ci        _class is the class to instantiate for new message objects when they
307db96d56Sopenharmony_ci        must be created.  This class must have a constructor that can take
317db96d56Sopenharmony_ci        zero arguments.  Default is Message.Message.
327db96d56Sopenharmony_ci
337db96d56Sopenharmony_ci        The policy keyword specifies a policy object that controls a number of
347db96d56Sopenharmony_ci        aspects of the parser's operation.  The default policy maintains
357db96d56Sopenharmony_ci        backward compatibility.
367db96d56Sopenharmony_ci
377db96d56Sopenharmony_ci        """
387db96d56Sopenharmony_ci        self._class = _class
397db96d56Sopenharmony_ci        self.policy = policy
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ci    def parse(self, fp, headersonly=False):
427db96d56Sopenharmony_ci        """Create a message structure from the data in a file.
437db96d56Sopenharmony_ci
447db96d56Sopenharmony_ci        Reads all the data from the file and returns the root of the message
457db96d56Sopenharmony_ci        structure.  Optional headersonly is a flag specifying whether to stop
467db96d56Sopenharmony_ci        parsing after reading the headers or not.  The default is False,
477db96d56Sopenharmony_ci        meaning it parses the entire contents of the file.
487db96d56Sopenharmony_ci        """
497db96d56Sopenharmony_ci        feedparser = FeedParser(self._class, policy=self.policy)
507db96d56Sopenharmony_ci        if headersonly:
517db96d56Sopenharmony_ci            feedparser._set_headersonly()
527db96d56Sopenharmony_ci        while True:
537db96d56Sopenharmony_ci            data = fp.read(8192)
547db96d56Sopenharmony_ci            if not data:
557db96d56Sopenharmony_ci                break
567db96d56Sopenharmony_ci            feedparser.feed(data)
577db96d56Sopenharmony_ci        return feedparser.close()
587db96d56Sopenharmony_ci
597db96d56Sopenharmony_ci    def parsestr(self, text, headersonly=False):
607db96d56Sopenharmony_ci        """Create a message structure from a string.
617db96d56Sopenharmony_ci
627db96d56Sopenharmony_ci        Returns the root of the message structure.  Optional headersonly is a
637db96d56Sopenharmony_ci        flag specifying whether to stop parsing after reading the headers or
647db96d56Sopenharmony_ci        not.  The default is False, meaning it parses the entire contents of
657db96d56Sopenharmony_ci        the file.
667db96d56Sopenharmony_ci        """
677db96d56Sopenharmony_ci        return self.parse(StringIO(text), headersonly=headersonly)
687db96d56Sopenharmony_ci
697db96d56Sopenharmony_ci
707db96d56Sopenharmony_ci
717db96d56Sopenharmony_ciclass HeaderParser(Parser):
727db96d56Sopenharmony_ci    def parse(self, fp, headersonly=True):
737db96d56Sopenharmony_ci        return Parser.parse(self, fp, True)
747db96d56Sopenharmony_ci
757db96d56Sopenharmony_ci    def parsestr(self, text, headersonly=True):
767db96d56Sopenharmony_ci        return Parser.parsestr(self, text, True)
777db96d56Sopenharmony_ci
787db96d56Sopenharmony_ci
797db96d56Sopenharmony_ciclass BytesParser:
807db96d56Sopenharmony_ci
817db96d56Sopenharmony_ci    def __init__(self, *args, **kw):
827db96d56Sopenharmony_ci        """Parser of binary RFC 2822 and MIME email messages.
837db96d56Sopenharmony_ci
847db96d56Sopenharmony_ci        Creates an in-memory object tree representing the email message, which
857db96d56Sopenharmony_ci        can then be manipulated and turned over to a Generator to return the
867db96d56Sopenharmony_ci        textual representation of the message.
877db96d56Sopenharmony_ci
887db96d56Sopenharmony_ci        The input must be formatted as a block of RFC 2822 headers and header
897db96d56Sopenharmony_ci        continuation lines, optionally preceded by a `Unix-from' header.  The
907db96d56Sopenharmony_ci        header block is terminated either by the end of the input or by a
917db96d56Sopenharmony_ci        blank line.
927db96d56Sopenharmony_ci
937db96d56Sopenharmony_ci        _class is the class to instantiate for new message objects when they
947db96d56Sopenharmony_ci        must be created.  This class must have a constructor that can take
957db96d56Sopenharmony_ci        zero arguments.  Default is Message.Message.
967db96d56Sopenharmony_ci        """
977db96d56Sopenharmony_ci        self.parser = Parser(*args, **kw)
987db96d56Sopenharmony_ci
997db96d56Sopenharmony_ci    def parse(self, fp, headersonly=False):
1007db96d56Sopenharmony_ci        """Create a message structure from the data in a binary file.
1017db96d56Sopenharmony_ci
1027db96d56Sopenharmony_ci        Reads all the data from the file and returns the root of the message
1037db96d56Sopenharmony_ci        structure.  Optional headersonly is a flag specifying whether to stop
1047db96d56Sopenharmony_ci        parsing after reading the headers or not.  The default is False,
1057db96d56Sopenharmony_ci        meaning it parses the entire contents of the file.
1067db96d56Sopenharmony_ci        """
1077db96d56Sopenharmony_ci        fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
1087db96d56Sopenharmony_ci        try:
1097db96d56Sopenharmony_ci            return self.parser.parse(fp, headersonly)
1107db96d56Sopenharmony_ci        finally:
1117db96d56Sopenharmony_ci            fp.detach()
1127db96d56Sopenharmony_ci
1137db96d56Sopenharmony_ci
1147db96d56Sopenharmony_ci    def parsebytes(self, text, headersonly=False):
1157db96d56Sopenharmony_ci        """Create a message structure from a byte string.
1167db96d56Sopenharmony_ci
1177db96d56Sopenharmony_ci        Returns the root of the message structure.  Optional headersonly is a
1187db96d56Sopenharmony_ci        flag specifying whether to stop parsing after reading the headers or
1197db96d56Sopenharmony_ci        not.  The default is False, meaning it parses the entire contents of
1207db96d56Sopenharmony_ci        the file.
1217db96d56Sopenharmony_ci        """
1227db96d56Sopenharmony_ci        text = text.decode('ASCII', errors='surrogateescape')
1237db96d56Sopenharmony_ci        return self.parser.parsestr(text, headersonly)
1247db96d56Sopenharmony_ci
1257db96d56Sopenharmony_ci
1267db96d56Sopenharmony_ciclass BytesHeaderParser(BytesParser):
1277db96d56Sopenharmony_ci    def parse(self, fp, headersonly=True):
1287db96d56Sopenharmony_ci        return BytesParser.parse(self, fp, headersonly=True)
1297db96d56Sopenharmony_ci
1307db96d56Sopenharmony_ci    def parsebytes(self, text, headersonly=True):
1317db96d56Sopenharmony_ci        return BytesParser.parsebytes(self, text, headersonly=True)
132