xref: /third_party/python/Lib/xml/dom/pulldom.py (revision 7db96d56)
17db96d56Sopenharmony_ciimport xml.sax
27db96d56Sopenharmony_ciimport xml.sax.handler
37db96d56Sopenharmony_ci
47db96d56Sopenharmony_ciSTART_ELEMENT = "START_ELEMENT"
57db96d56Sopenharmony_ciEND_ELEMENT = "END_ELEMENT"
67db96d56Sopenharmony_ciCOMMENT = "COMMENT"
77db96d56Sopenharmony_ciSTART_DOCUMENT = "START_DOCUMENT"
87db96d56Sopenharmony_ciEND_DOCUMENT = "END_DOCUMENT"
97db96d56Sopenharmony_ciPROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
107db96d56Sopenharmony_ciIGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
117db96d56Sopenharmony_ciCHARACTERS = "CHARACTERS"
127db96d56Sopenharmony_ci
137db96d56Sopenharmony_ciclass PullDOM(xml.sax.ContentHandler):
147db96d56Sopenharmony_ci    _locator = None
157db96d56Sopenharmony_ci    document = None
167db96d56Sopenharmony_ci
177db96d56Sopenharmony_ci    def __init__(self, documentFactory=None):
187db96d56Sopenharmony_ci        from xml.dom import XML_NAMESPACE
197db96d56Sopenharmony_ci        self.documentFactory = documentFactory
207db96d56Sopenharmony_ci        self.firstEvent = [None, None]
217db96d56Sopenharmony_ci        self.lastEvent = self.firstEvent
227db96d56Sopenharmony_ci        self.elementStack = []
237db96d56Sopenharmony_ci        self.push = self.elementStack.append
247db96d56Sopenharmony_ci        try:
257db96d56Sopenharmony_ci            self.pop = self.elementStack.pop
267db96d56Sopenharmony_ci        except AttributeError:
277db96d56Sopenharmony_ci            # use class' pop instead
287db96d56Sopenharmony_ci            pass
297db96d56Sopenharmony_ci        self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts
307db96d56Sopenharmony_ci        self._current_context = self._ns_contexts[-1]
317db96d56Sopenharmony_ci        self.pending_events = []
327db96d56Sopenharmony_ci
337db96d56Sopenharmony_ci    def pop(self):
347db96d56Sopenharmony_ci        result = self.elementStack[-1]
357db96d56Sopenharmony_ci        del self.elementStack[-1]
367db96d56Sopenharmony_ci        return result
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_ci    def setDocumentLocator(self, locator):
397db96d56Sopenharmony_ci        self._locator = locator
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ci    def startPrefixMapping(self, prefix, uri):
427db96d56Sopenharmony_ci        if not hasattr(self, '_xmlns_attrs'):
437db96d56Sopenharmony_ci            self._xmlns_attrs = []
447db96d56Sopenharmony_ci        self._xmlns_attrs.append((prefix or 'xmlns', uri))
457db96d56Sopenharmony_ci        self._ns_contexts.append(self._current_context.copy())
467db96d56Sopenharmony_ci        self._current_context[uri] = prefix or None
477db96d56Sopenharmony_ci
487db96d56Sopenharmony_ci    def endPrefixMapping(self, prefix):
497db96d56Sopenharmony_ci        self._current_context = self._ns_contexts.pop()
507db96d56Sopenharmony_ci
517db96d56Sopenharmony_ci    def startElementNS(self, name, tagName , attrs):
527db96d56Sopenharmony_ci        # Retrieve xml namespace declaration attributes.
537db96d56Sopenharmony_ci        xmlns_uri = 'http://www.w3.org/2000/xmlns/'
547db96d56Sopenharmony_ci        xmlns_attrs = getattr(self, '_xmlns_attrs', None)
557db96d56Sopenharmony_ci        if xmlns_attrs is not None:
567db96d56Sopenharmony_ci            for aname, value in xmlns_attrs:
577db96d56Sopenharmony_ci                attrs._attrs[(xmlns_uri, aname)] = value
587db96d56Sopenharmony_ci            self._xmlns_attrs = []
597db96d56Sopenharmony_ci        uri, localname = name
607db96d56Sopenharmony_ci        if uri:
617db96d56Sopenharmony_ci            # When using namespaces, the reader may or may not
627db96d56Sopenharmony_ci            # provide us with the original name. If not, create
637db96d56Sopenharmony_ci            # *a* valid tagName from the current context.
647db96d56Sopenharmony_ci            if tagName is None:
657db96d56Sopenharmony_ci                prefix = self._current_context[uri]
667db96d56Sopenharmony_ci                if prefix:
677db96d56Sopenharmony_ci                    tagName = prefix + ":" + localname
687db96d56Sopenharmony_ci                else:
697db96d56Sopenharmony_ci                    tagName = localname
707db96d56Sopenharmony_ci            if self.document:
717db96d56Sopenharmony_ci                node = self.document.createElementNS(uri, tagName)
727db96d56Sopenharmony_ci            else:
737db96d56Sopenharmony_ci                node = self.buildDocument(uri, tagName)
747db96d56Sopenharmony_ci        else:
757db96d56Sopenharmony_ci            # When the tagname is not prefixed, it just appears as
767db96d56Sopenharmony_ci            # localname
777db96d56Sopenharmony_ci            if self.document:
787db96d56Sopenharmony_ci                node = self.document.createElement(localname)
797db96d56Sopenharmony_ci            else:
807db96d56Sopenharmony_ci                node = self.buildDocument(None, localname)
817db96d56Sopenharmony_ci
827db96d56Sopenharmony_ci        for aname,value in attrs.items():
837db96d56Sopenharmony_ci            a_uri, a_localname = aname
847db96d56Sopenharmony_ci            if a_uri == xmlns_uri:
857db96d56Sopenharmony_ci                if a_localname == 'xmlns':
867db96d56Sopenharmony_ci                    qname = a_localname
877db96d56Sopenharmony_ci                else:
887db96d56Sopenharmony_ci                    qname = 'xmlns:' + a_localname
897db96d56Sopenharmony_ci                attr = self.document.createAttributeNS(a_uri, qname)
907db96d56Sopenharmony_ci                node.setAttributeNodeNS(attr)
917db96d56Sopenharmony_ci            elif a_uri:
927db96d56Sopenharmony_ci                prefix = self._current_context[a_uri]
937db96d56Sopenharmony_ci                if prefix:
947db96d56Sopenharmony_ci                    qname = prefix + ":" + a_localname
957db96d56Sopenharmony_ci                else:
967db96d56Sopenharmony_ci                    qname = a_localname
977db96d56Sopenharmony_ci                attr = self.document.createAttributeNS(a_uri, qname)
987db96d56Sopenharmony_ci                node.setAttributeNodeNS(attr)
997db96d56Sopenharmony_ci            else:
1007db96d56Sopenharmony_ci                attr = self.document.createAttribute(a_localname)
1017db96d56Sopenharmony_ci                node.setAttributeNode(attr)
1027db96d56Sopenharmony_ci            attr.value = value
1037db96d56Sopenharmony_ci
1047db96d56Sopenharmony_ci        self.lastEvent[1] = [(START_ELEMENT, node), None]
1057db96d56Sopenharmony_ci        self.lastEvent = self.lastEvent[1]
1067db96d56Sopenharmony_ci        self.push(node)
1077db96d56Sopenharmony_ci
1087db96d56Sopenharmony_ci    def endElementNS(self, name, tagName):
1097db96d56Sopenharmony_ci        self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
1107db96d56Sopenharmony_ci        self.lastEvent = self.lastEvent[1]
1117db96d56Sopenharmony_ci
1127db96d56Sopenharmony_ci    def startElement(self, name, attrs):
1137db96d56Sopenharmony_ci        if self.document:
1147db96d56Sopenharmony_ci            node = self.document.createElement(name)
1157db96d56Sopenharmony_ci        else:
1167db96d56Sopenharmony_ci            node = self.buildDocument(None, name)
1177db96d56Sopenharmony_ci
1187db96d56Sopenharmony_ci        for aname,value in attrs.items():
1197db96d56Sopenharmony_ci            attr = self.document.createAttribute(aname)
1207db96d56Sopenharmony_ci            attr.value = value
1217db96d56Sopenharmony_ci            node.setAttributeNode(attr)
1227db96d56Sopenharmony_ci
1237db96d56Sopenharmony_ci        self.lastEvent[1] = [(START_ELEMENT, node), None]
1247db96d56Sopenharmony_ci        self.lastEvent = self.lastEvent[1]
1257db96d56Sopenharmony_ci        self.push(node)
1267db96d56Sopenharmony_ci
1277db96d56Sopenharmony_ci    def endElement(self, name):
1287db96d56Sopenharmony_ci        self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
1297db96d56Sopenharmony_ci        self.lastEvent = self.lastEvent[1]
1307db96d56Sopenharmony_ci
1317db96d56Sopenharmony_ci    def comment(self, s):
1327db96d56Sopenharmony_ci        if self.document:
1337db96d56Sopenharmony_ci            node = self.document.createComment(s)
1347db96d56Sopenharmony_ci            self.lastEvent[1] = [(COMMENT, node), None]
1357db96d56Sopenharmony_ci            self.lastEvent = self.lastEvent[1]
1367db96d56Sopenharmony_ci        else:
1377db96d56Sopenharmony_ci            event = [(COMMENT, s), None]
1387db96d56Sopenharmony_ci            self.pending_events.append(event)
1397db96d56Sopenharmony_ci
1407db96d56Sopenharmony_ci    def processingInstruction(self, target, data):
1417db96d56Sopenharmony_ci        if self.document:
1427db96d56Sopenharmony_ci            node = self.document.createProcessingInstruction(target, data)
1437db96d56Sopenharmony_ci            self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
1447db96d56Sopenharmony_ci            self.lastEvent = self.lastEvent[1]
1457db96d56Sopenharmony_ci        else:
1467db96d56Sopenharmony_ci            event = [(PROCESSING_INSTRUCTION, target, data), None]
1477db96d56Sopenharmony_ci            self.pending_events.append(event)
1487db96d56Sopenharmony_ci
1497db96d56Sopenharmony_ci    def ignorableWhitespace(self, chars):
1507db96d56Sopenharmony_ci        node = self.document.createTextNode(chars)
1517db96d56Sopenharmony_ci        self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
1527db96d56Sopenharmony_ci        self.lastEvent = self.lastEvent[1]
1537db96d56Sopenharmony_ci
1547db96d56Sopenharmony_ci    def characters(self, chars):
1557db96d56Sopenharmony_ci        node = self.document.createTextNode(chars)
1567db96d56Sopenharmony_ci        self.lastEvent[1] = [(CHARACTERS, node), None]
1577db96d56Sopenharmony_ci        self.lastEvent = self.lastEvent[1]
1587db96d56Sopenharmony_ci
1597db96d56Sopenharmony_ci    def startDocument(self):
1607db96d56Sopenharmony_ci        if self.documentFactory is None:
1617db96d56Sopenharmony_ci            import xml.dom.minidom
1627db96d56Sopenharmony_ci            self.documentFactory = xml.dom.minidom.Document.implementation
1637db96d56Sopenharmony_ci
1647db96d56Sopenharmony_ci    def buildDocument(self, uri, tagname):
1657db96d56Sopenharmony_ci        # Can't do that in startDocument, since we need the tagname
1667db96d56Sopenharmony_ci        # XXX: obtain DocumentType
1677db96d56Sopenharmony_ci        node = self.documentFactory.createDocument(uri, tagname, None)
1687db96d56Sopenharmony_ci        self.document = node
1697db96d56Sopenharmony_ci        self.lastEvent[1] = [(START_DOCUMENT, node), None]
1707db96d56Sopenharmony_ci        self.lastEvent = self.lastEvent[1]
1717db96d56Sopenharmony_ci        self.push(node)
1727db96d56Sopenharmony_ci        # Put everything we have seen so far into the document
1737db96d56Sopenharmony_ci        for e in self.pending_events:
1747db96d56Sopenharmony_ci            if e[0][0] == PROCESSING_INSTRUCTION:
1757db96d56Sopenharmony_ci                _,target,data = e[0]
1767db96d56Sopenharmony_ci                n = self.document.createProcessingInstruction(target, data)
1777db96d56Sopenharmony_ci                e[0] = (PROCESSING_INSTRUCTION, n)
1787db96d56Sopenharmony_ci            elif e[0][0] == COMMENT:
1797db96d56Sopenharmony_ci                n = self.document.createComment(e[0][1])
1807db96d56Sopenharmony_ci                e[0] = (COMMENT, n)
1817db96d56Sopenharmony_ci            else:
1827db96d56Sopenharmony_ci                raise AssertionError("Unknown pending event ",e[0][0])
1837db96d56Sopenharmony_ci            self.lastEvent[1] = e
1847db96d56Sopenharmony_ci            self.lastEvent = e
1857db96d56Sopenharmony_ci        self.pending_events = None
1867db96d56Sopenharmony_ci        return node.firstChild
1877db96d56Sopenharmony_ci
1887db96d56Sopenharmony_ci    def endDocument(self):
1897db96d56Sopenharmony_ci        self.lastEvent[1] = [(END_DOCUMENT, self.document), None]
1907db96d56Sopenharmony_ci        self.pop()
1917db96d56Sopenharmony_ci
1927db96d56Sopenharmony_ci    def clear(self):
1937db96d56Sopenharmony_ci        "clear(): Explicitly release parsing structures"
1947db96d56Sopenharmony_ci        self.document = None
1957db96d56Sopenharmony_ci
1967db96d56Sopenharmony_ciclass ErrorHandler:
1977db96d56Sopenharmony_ci    def warning(self, exception):
1987db96d56Sopenharmony_ci        print(exception)
1997db96d56Sopenharmony_ci    def error(self, exception):
2007db96d56Sopenharmony_ci        raise exception
2017db96d56Sopenharmony_ci    def fatalError(self, exception):
2027db96d56Sopenharmony_ci        raise exception
2037db96d56Sopenharmony_ci
2047db96d56Sopenharmony_ciclass DOMEventStream:
2057db96d56Sopenharmony_ci    def __init__(self, stream, parser, bufsize):
2067db96d56Sopenharmony_ci        self.stream = stream
2077db96d56Sopenharmony_ci        self.parser = parser
2087db96d56Sopenharmony_ci        self.bufsize = bufsize
2097db96d56Sopenharmony_ci        if not hasattr(self.parser, 'feed'):
2107db96d56Sopenharmony_ci            self.getEvent = self._slurp
2117db96d56Sopenharmony_ci        self.reset()
2127db96d56Sopenharmony_ci
2137db96d56Sopenharmony_ci    def reset(self):
2147db96d56Sopenharmony_ci        self.pulldom = PullDOM()
2157db96d56Sopenharmony_ci        # This content handler relies on namespace support
2167db96d56Sopenharmony_ci        self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
2177db96d56Sopenharmony_ci        self.parser.setContentHandler(self.pulldom)
2187db96d56Sopenharmony_ci
2197db96d56Sopenharmony_ci    def __next__(self):
2207db96d56Sopenharmony_ci        rc = self.getEvent()
2217db96d56Sopenharmony_ci        if rc:
2227db96d56Sopenharmony_ci            return rc
2237db96d56Sopenharmony_ci        raise StopIteration
2247db96d56Sopenharmony_ci
2257db96d56Sopenharmony_ci    def __iter__(self):
2267db96d56Sopenharmony_ci        return self
2277db96d56Sopenharmony_ci
2287db96d56Sopenharmony_ci    def expandNode(self, node):
2297db96d56Sopenharmony_ci        event = self.getEvent()
2307db96d56Sopenharmony_ci        parents = [node]
2317db96d56Sopenharmony_ci        while event:
2327db96d56Sopenharmony_ci            token, cur_node = event
2337db96d56Sopenharmony_ci            if cur_node is node:
2347db96d56Sopenharmony_ci                return
2357db96d56Sopenharmony_ci            if token != END_ELEMENT:
2367db96d56Sopenharmony_ci                parents[-1].appendChild(cur_node)
2377db96d56Sopenharmony_ci            if token == START_ELEMENT:
2387db96d56Sopenharmony_ci                parents.append(cur_node)
2397db96d56Sopenharmony_ci            elif token == END_ELEMENT:
2407db96d56Sopenharmony_ci                del parents[-1]
2417db96d56Sopenharmony_ci            event = self.getEvent()
2427db96d56Sopenharmony_ci
2437db96d56Sopenharmony_ci    def getEvent(self):
2447db96d56Sopenharmony_ci        # use IncrementalParser interface, so we get the desired
2457db96d56Sopenharmony_ci        # pull effect
2467db96d56Sopenharmony_ci        if not self.pulldom.firstEvent[1]:
2477db96d56Sopenharmony_ci            self.pulldom.lastEvent = self.pulldom.firstEvent
2487db96d56Sopenharmony_ci        while not self.pulldom.firstEvent[1]:
2497db96d56Sopenharmony_ci            buf = self.stream.read(self.bufsize)
2507db96d56Sopenharmony_ci            if not buf:
2517db96d56Sopenharmony_ci                self.parser.close()
2527db96d56Sopenharmony_ci                return None
2537db96d56Sopenharmony_ci            self.parser.feed(buf)
2547db96d56Sopenharmony_ci        rc = self.pulldom.firstEvent[1][0]
2557db96d56Sopenharmony_ci        self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
2567db96d56Sopenharmony_ci        return rc
2577db96d56Sopenharmony_ci
2587db96d56Sopenharmony_ci    def _slurp(self):
2597db96d56Sopenharmony_ci        """ Fallback replacement for getEvent() using the
2607db96d56Sopenharmony_ci            standard SAX2 interface, which means we slurp the
2617db96d56Sopenharmony_ci            SAX events into memory (no performance gain, but
2627db96d56Sopenharmony_ci            we are compatible to all SAX parsers).
2637db96d56Sopenharmony_ci        """
2647db96d56Sopenharmony_ci        self.parser.parse(self.stream)
2657db96d56Sopenharmony_ci        self.getEvent = self._emit
2667db96d56Sopenharmony_ci        return self._emit()
2677db96d56Sopenharmony_ci
2687db96d56Sopenharmony_ci    def _emit(self):
2697db96d56Sopenharmony_ci        """ Fallback replacement for getEvent() that emits
2707db96d56Sopenharmony_ci            the events that _slurp() read previously.
2717db96d56Sopenharmony_ci        """
2727db96d56Sopenharmony_ci        rc = self.pulldom.firstEvent[1][0]
2737db96d56Sopenharmony_ci        self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
2747db96d56Sopenharmony_ci        return rc
2757db96d56Sopenharmony_ci
2767db96d56Sopenharmony_ci    def clear(self):
2777db96d56Sopenharmony_ci        """clear(): Explicitly release parsing objects"""
2787db96d56Sopenharmony_ci        self.pulldom.clear()
2797db96d56Sopenharmony_ci        del self.pulldom
2807db96d56Sopenharmony_ci        self.parser = None
2817db96d56Sopenharmony_ci        self.stream = None
2827db96d56Sopenharmony_ci
2837db96d56Sopenharmony_ciclass SAX2DOM(PullDOM):
2847db96d56Sopenharmony_ci
2857db96d56Sopenharmony_ci    def startElementNS(self, name, tagName , attrs):
2867db96d56Sopenharmony_ci        PullDOM.startElementNS(self, name, tagName, attrs)
2877db96d56Sopenharmony_ci        curNode = self.elementStack[-1]
2887db96d56Sopenharmony_ci        parentNode = self.elementStack[-2]
2897db96d56Sopenharmony_ci        parentNode.appendChild(curNode)
2907db96d56Sopenharmony_ci
2917db96d56Sopenharmony_ci    def startElement(self, name, attrs):
2927db96d56Sopenharmony_ci        PullDOM.startElement(self, name, attrs)
2937db96d56Sopenharmony_ci        curNode = self.elementStack[-1]
2947db96d56Sopenharmony_ci        parentNode = self.elementStack[-2]
2957db96d56Sopenharmony_ci        parentNode.appendChild(curNode)
2967db96d56Sopenharmony_ci
2977db96d56Sopenharmony_ci    def processingInstruction(self, target, data):
2987db96d56Sopenharmony_ci        PullDOM.processingInstruction(self, target, data)
2997db96d56Sopenharmony_ci        node = self.lastEvent[0][1]
3007db96d56Sopenharmony_ci        parentNode = self.elementStack[-1]
3017db96d56Sopenharmony_ci        parentNode.appendChild(node)
3027db96d56Sopenharmony_ci
3037db96d56Sopenharmony_ci    def ignorableWhitespace(self, chars):
3047db96d56Sopenharmony_ci        PullDOM.ignorableWhitespace(self, chars)
3057db96d56Sopenharmony_ci        node = self.lastEvent[0][1]
3067db96d56Sopenharmony_ci        parentNode = self.elementStack[-1]
3077db96d56Sopenharmony_ci        parentNode.appendChild(node)
3087db96d56Sopenharmony_ci
3097db96d56Sopenharmony_ci    def characters(self, chars):
3107db96d56Sopenharmony_ci        PullDOM.characters(self, chars)
3117db96d56Sopenharmony_ci        node = self.lastEvent[0][1]
3127db96d56Sopenharmony_ci        parentNode = self.elementStack[-1]
3137db96d56Sopenharmony_ci        parentNode.appendChild(node)
3147db96d56Sopenharmony_ci
3157db96d56Sopenharmony_ci
3167db96d56Sopenharmony_cidefault_bufsize = (2 ** 14) - 20
3177db96d56Sopenharmony_ci
3187db96d56Sopenharmony_cidef parse(stream_or_string, parser=None, bufsize=None):
3197db96d56Sopenharmony_ci    if bufsize is None:
3207db96d56Sopenharmony_ci        bufsize = default_bufsize
3217db96d56Sopenharmony_ci    if isinstance(stream_or_string, str):
3227db96d56Sopenharmony_ci        stream = open(stream_or_string, 'rb')
3237db96d56Sopenharmony_ci    else:
3247db96d56Sopenharmony_ci        stream = stream_or_string
3257db96d56Sopenharmony_ci    if not parser:
3267db96d56Sopenharmony_ci        parser = xml.sax.make_parser()
3277db96d56Sopenharmony_ci    return DOMEventStream(stream, parser, bufsize)
3287db96d56Sopenharmony_ci
3297db96d56Sopenharmony_cidef parseString(string, parser=None):
3307db96d56Sopenharmony_ci    from io import StringIO
3317db96d56Sopenharmony_ci
3327db96d56Sopenharmony_ci    bufsize = len(string)
3337db96d56Sopenharmony_ci    buf = StringIO(string)
3347db96d56Sopenharmony_ci    if not parser:
3357db96d56Sopenharmony_ci        parser = xml.sax.make_parser()
3367db96d56Sopenharmony_ci    return DOMEventStream(buf, parser, bufsize)
337