17db96d56Sopenharmony_ci# Copyright (C) 2001-2007 Python Software Foundation 27db96d56Sopenharmony_ci# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter 37db96d56Sopenharmony_ci# Contact: email-sig@python.org 47db96d56Sopenharmony_ci 57db96d56Sopenharmony_ci"""A parser of RFC 2822 and MIME email messages.""" 67db96d56Sopenharmony_ci 77db96d56Sopenharmony_ci__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser', 87db96d56Sopenharmony_ci 'FeedParser', 'BytesFeedParser'] 97db96d56Sopenharmony_ci 107db96d56Sopenharmony_cifrom io import StringIO, TextIOWrapper 117db96d56Sopenharmony_ci 127db96d56Sopenharmony_cifrom email.feedparser import FeedParser, BytesFeedParser 137db96d56Sopenharmony_cifrom email._policybase import compat32 147db96d56Sopenharmony_ci 157db96d56Sopenharmony_ci 167db96d56Sopenharmony_ciclass Parser: 177db96d56Sopenharmony_ci def __init__(self, _class=None, *, policy=compat32): 187db96d56Sopenharmony_ci """Parser of RFC 2822 and MIME email messages. 197db96d56Sopenharmony_ci 207db96d56Sopenharmony_ci Creates an in-memory object tree representing the email message, which 217db96d56Sopenharmony_ci can then be manipulated and turned over to a Generator to return the 227db96d56Sopenharmony_ci textual representation of the message. 237db96d56Sopenharmony_ci 247db96d56Sopenharmony_ci The string must be formatted as a block of RFC 2822 headers and header 257db96d56Sopenharmony_ci continuation lines, optionally preceded by a `Unix-from' header. The 267db96d56Sopenharmony_ci header block is terminated either by the end of the string or by a 277db96d56Sopenharmony_ci blank line. 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ci _class is the class to instantiate for new message objects when they 307db96d56Sopenharmony_ci must be created. This class must have a constructor that can take 317db96d56Sopenharmony_ci zero arguments. Default is Message.Message. 327db96d56Sopenharmony_ci 337db96d56Sopenharmony_ci The policy keyword specifies a policy object that controls a number of 347db96d56Sopenharmony_ci aspects of the parser's operation. The default policy maintains 357db96d56Sopenharmony_ci backward compatibility. 367db96d56Sopenharmony_ci 377db96d56Sopenharmony_ci """ 387db96d56Sopenharmony_ci self._class = _class 397db96d56Sopenharmony_ci self.policy = policy 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_ci def parse(self, fp, headersonly=False): 427db96d56Sopenharmony_ci """Create a message structure from the data in a file. 437db96d56Sopenharmony_ci 447db96d56Sopenharmony_ci Reads all the data from the file and returns the root of the message 457db96d56Sopenharmony_ci structure. Optional headersonly is a flag specifying whether to stop 467db96d56Sopenharmony_ci parsing after reading the headers or not. The default is False, 477db96d56Sopenharmony_ci meaning it parses the entire contents of the file. 487db96d56Sopenharmony_ci """ 497db96d56Sopenharmony_ci feedparser = FeedParser(self._class, policy=self.policy) 507db96d56Sopenharmony_ci if headersonly: 517db96d56Sopenharmony_ci feedparser._set_headersonly() 527db96d56Sopenharmony_ci while True: 537db96d56Sopenharmony_ci data = fp.read(8192) 547db96d56Sopenharmony_ci if not data: 557db96d56Sopenharmony_ci break 567db96d56Sopenharmony_ci feedparser.feed(data) 577db96d56Sopenharmony_ci return feedparser.close() 587db96d56Sopenharmony_ci 597db96d56Sopenharmony_ci def parsestr(self, text, headersonly=False): 607db96d56Sopenharmony_ci """Create a message structure from a string. 617db96d56Sopenharmony_ci 627db96d56Sopenharmony_ci Returns the root of the message structure. Optional headersonly is a 637db96d56Sopenharmony_ci flag specifying whether to stop parsing after reading the headers or 647db96d56Sopenharmony_ci not. The default is False, meaning it parses the entire contents of 657db96d56Sopenharmony_ci the file. 667db96d56Sopenharmony_ci """ 677db96d56Sopenharmony_ci return self.parse(StringIO(text), headersonly=headersonly) 687db96d56Sopenharmony_ci 697db96d56Sopenharmony_ci 707db96d56Sopenharmony_ci 717db96d56Sopenharmony_ciclass HeaderParser(Parser): 727db96d56Sopenharmony_ci def parse(self, fp, headersonly=True): 737db96d56Sopenharmony_ci return Parser.parse(self, fp, True) 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_ci def parsestr(self, text, headersonly=True): 767db96d56Sopenharmony_ci return Parser.parsestr(self, text, True) 777db96d56Sopenharmony_ci 787db96d56Sopenharmony_ci 797db96d56Sopenharmony_ciclass BytesParser: 807db96d56Sopenharmony_ci 817db96d56Sopenharmony_ci def __init__(self, *args, **kw): 827db96d56Sopenharmony_ci """Parser of binary RFC 2822 and MIME email messages. 837db96d56Sopenharmony_ci 847db96d56Sopenharmony_ci Creates an in-memory object tree representing the email message, which 857db96d56Sopenharmony_ci can then be manipulated and turned over to a Generator to return the 867db96d56Sopenharmony_ci textual representation of the message. 877db96d56Sopenharmony_ci 887db96d56Sopenharmony_ci The input must be formatted as a block of RFC 2822 headers and header 897db96d56Sopenharmony_ci continuation lines, optionally preceded by a `Unix-from' header. The 907db96d56Sopenharmony_ci header block is terminated either by the end of the input or by a 917db96d56Sopenharmony_ci blank line. 927db96d56Sopenharmony_ci 937db96d56Sopenharmony_ci _class is the class to instantiate for new message objects when they 947db96d56Sopenharmony_ci must be created. This class must have a constructor that can take 957db96d56Sopenharmony_ci zero arguments. Default is Message.Message. 967db96d56Sopenharmony_ci """ 977db96d56Sopenharmony_ci self.parser = Parser(*args, **kw) 987db96d56Sopenharmony_ci 997db96d56Sopenharmony_ci def parse(self, fp, headersonly=False): 1007db96d56Sopenharmony_ci """Create a message structure from the data in a binary file. 1017db96d56Sopenharmony_ci 1027db96d56Sopenharmony_ci Reads all the data from the file and returns the root of the message 1037db96d56Sopenharmony_ci structure. Optional headersonly is a flag specifying whether to stop 1047db96d56Sopenharmony_ci parsing after reading the headers or not. The default is False, 1057db96d56Sopenharmony_ci meaning it parses the entire contents of the file. 1067db96d56Sopenharmony_ci """ 1077db96d56Sopenharmony_ci fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape') 1087db96d56Sopenharmony_ci try: 1097db96d56Sopenharmony_ci return self.parser.parse(fp, headersonly) 1107db96d56Sopenharmony_ci finally: 1117db96d56Sopenharmony_ci fp.detach() 1127db96d56Sopenharmony_ci 1137db96d56Sopenharmony_ci 1147db96d56Sopenharmony_ci def parsebytes(self, text, headersonly=False): 1157db96d56Sopenharmony_ci """Create a message structure from a byte string. 1167db96d56Sopenharmony_ci 1177db96d56Sopenharmony_ci Returns the root of the message structure. Optional headersonly is a 1187db96d56Sopenharmony_ci flag specifying whether to stop parsing after reading the headers or 1197db96d56Sopenharmony_ci not. The default is False, meaning it parses the entire contents of 1207db96d56Sopenharmony_ci the file. 1217db96d56Sopenharmony_ci """ 1227db96d56Sopenharmony_ci text = text.decode('ASCII', errors='surrogateescape') 1237db96d56Sopenharmony_ci return self.parser.parsestr(text, headersonly) 1247db96d56Sopenharmony_ci 1257db96d56Sopenharmony_ci 1267db96d56Sopenharmony_ciclass BytesHeaderParser(BytesParser): 1277db96d56Sopenharmony_ci def parse(self, fp, headersonly=True): 1287db96d56Sopenharmony_ci return BytesParser.parse(self, fp, headersonly=True) 1297db96d56Sopenharmony_ci 1307db96d56Sopenharmony_ci def parsebytes(self, text, headersonly=True): 1317db96d56Sopenharmony_ci return BytesParser.parsebytes(self, text, headersonly=True) 132