17db96d56Sopenharmony_ciimport xml.sax 27db96d56Sopenharmony_ciimport xml.sax.handler 37db96d56Sopenharmony_ci 47db96d56Sopenharmony_ciSTART_ELEMENT = "START_ELEMENT" 57db96d56Sopenharmony_ciEND_ELEMENT = "END_ELEMENT" 67db96d56Sopenharmony_ciCOMMENT = "COMMENT" 77db96d56Sopenharmony_ciSTART_DOCUMENT = "START_DOCUMENT" 87db96d56Sopenharmony_ciEND_DOCUMENT = "END_DOCUMENT" 97db96d56Sopenharmony_ciPROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION" 107db96d56Sopenharmony_ciIGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE" 117db96d56Sopenharmony_ciCHARACTERS = "CHARACTERS" 127db96d56Sopenharmony_ci 137db96d56Sopenharmony_ciclass PullDOM(xml.sax.ContentHandler): 147db96d56Sopenharmony_ci _locator = None 157db96d56Sopenharmony_ci document = None 167db96d56Sopenharmony_ci 177db96d56Sopenharmony_ci def __init__(self, documentFactory=None): 187db96d56Sopenharmony_ci from xml.dom import XML_NAMESPACE 197db96d56Sopenharmony_ci self.documentFactory = documentFactory 207db96d56Sopenharmony_ci self.firstEvent = [None, None] 217db96d56Sopenharmony_ci self.lastEvent = self.firstEvent 227db96d56Sopenharmony_ci self.elementStack = [] 237db96d56Sopenharmony_ci self.push = self.elementStack.append 247db96d56Sopenharmony_ci try: 257db96d56Sopenharmony_ci self.pop = self.elementStack.pop 267db96d56Sopenharmony_ci except AttributeError: 277db96d56Sopenharmony_ci # use class' pop instead 287db96d56Sopenharmony_ci pass 297db96d56Sopenharmony_ci self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts 307db96d56Sopenharmony_ci self._current_context = self._ns_contexts[-1] 317db96d56Sopenharmony_ci self.pending_events = [] 327db96d56Sopenharmony_ci 337db96d56Sopenharmony_ci def pop(self): 347db96d56Sopenharmony_ci result = self.elementStack[-1] 357db96d56Sopenharmony_ci del self.elementStack[-1] 367db96d56Sopenharmony_ci return result 377db96d56Sopenharmony_ci 387db96d56Sopenharmony_ci def setDocumentLocator(self, locator): 397db96d56Sopenharmony_ci self._locator = locator 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_ci def startPrefixMapping(self, prefix, uri): 427db96d56Sopenharmony_ci if not hasattr(self, '_xmlns_attrs'): 437db96d56Sopenharmony_ci self._xmlns_attrs = [] 447db96d56Sopenharmony_ci self._xmlns_attrs.append((prefix or 'xmlns', uri)) 457db96d56Sopenharmony_ci self._ns_contexts.append(self._current_context.copy()) 467db96d56Sopenharmony_ci self._current_context[uri] = prefix or None 477db96d56Sopenharmony_ci 487db96d56Sopenharmony_ci def endPrefixMapping(self, prefix): 497db96d56Sopenharmony_ci self._current_context = self._ns_contexts.pop() 507db96d56Sopenharmony_ci 517db96d56Sopenharmony_ci def startElementNS(self, name, tagName , attrs): 527db96d56Sopenharmony_ci # Retrieve xml namespace declaration attributes. 537db96d56Sopenharmony_ci xmlns_uri = 'http://www.w3.org/2000/xmlns/' 547db96d56Sopenharmony_ci xmlns_attrs = getattr(self, '_xmlns_attrs', None) 557db96d56Sopenharmony_ci if xmlns_attrs is not None: 567db96d56Sopenharmony_ci for aname, value in xmlns_attrs: 577db96d56Sopenharmony_ci attrs._attrs[(xmlns_uri, aname)] = value 587db96d56Sopenharmony_ci self._xmlns_attrs = [] 597db96d56Sopenharmony_ci uri, localname = name 607db96d56Sopenharmony_ci if uri: 617db96d56Sopenharmony_ci # When using namespaces, the reader may or may not 627db96d56Sopenharmony_ci # provide us with the original name. If not, create 637db96d56Sopenharmony_ci # *a* valid tagName from the current context. 647db96d56Sopenharmony_ci if tagName is None: 657db96d56Sopenharmony_ci prefix = self._current_context[uri] 667db96d56Sopenharmony_ci if prefix: 677db96d56Sopenharmony_ci tagName = prefix + ":" + localname 687db96d56Sopenharmony_ci else: 697db96d56Sopenharmony_ci tagName = localname 707db96d56Sopenharmony_ci if self.document: 717db96d56Sopenharmony_ci node = self.document.createElementNS(uri, tagName) 727db96d56Sopenharmony_ci else: 737db96d56Sopenharmony_ci node = self.buildDocument(uri, tagName) 747db96d56Sopenharmony_ci else: 757db96d56Sopenharmony_ci # When the tagname is not prefixed, it just appears as 767db96d56Sopenharmony_ci # localname 777db96d56Sopenharmony_ci if self.document: 787db96d56Sopenharmony_ci node = self.document.createElement(localname) 797db96d56Sopenharmony_ci else: 807db96d56Sopenharmony_ci node = self.buildDocument(None, localname) 817db96d56Sopenharmony_ci 827db96d56Sopenharmony_ci for aname,value in attrs.items(): 837db96d56Sopenharmony_ci a_uri, a_localname = aname 847db96d56Sopenharmony_ci if a_uri == xmlns_uri: 857db96d56Sopenharmony_ci if a_localname == 'xmlns': 867db96d56Sopenharmony_ci qname = a_localname 877db96d56Sopenharmony_ci else: 887db96d56Sopenharmony_ci qname = 'xmlns:' + a_localname 897db96d56Sopenharmony_ci attr = self.document.createAttributeNS(a_uri, qname) 907db96d56Sopenharmony_ci node.setAttributeNodeNS(attr) 917db96d56Sopenharmony_ci elif a_uri: 927db96d56Sopenharmony_ci prefix = self._current_context[a_uri] 937db96d56Sopenharmony_ci if prefix: 947db96d56Sopenharmony_ci qname = prefix + ":" + a_localname 957db96d56Sopenharmony_ci else: 967db96d56Sopenharmony_ci qname = a_localname 977db96d56Sopenharmony_ci attr = self.document.createAttributeNS(a_uri, qname) 987db96d56Sopenharmony_ci node.setAttributeNodeNS(attr) 997db96d56Sopenharmony_ci else: 1007db96d56Sopenharmony_ci attr = self.document.createAttribute(a_localname) 1017db96d56Sopenharmony_ci node.setAttributeNode(attr) 1027db96d56Sopenharmony_ci attr.value = value 1037db96d56Sopenharmony_ci 1047db96d56Sopenharmony_ci self.lastEvent[1] = [(START_ELEMENT, node), None] 1057db96d56Sopenharmony_ci self.lastEvent = self.lastEvent[1] 1067db96d56Sopenharmony_ci self.push(node) 1077db96d56Sopenharmony_ci 1087db96d56Sopenharmony_ci def endElementNS(self, name, tagName): 1097db96d56Sopenharmony_ci self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] 1107db96d56Sopenharmony_ci self.lastEvent = self.lastEvent[1] 1117db96d56Sopenharmony_ci 1127db96d56Sopenharmony_ci def startElement(self, name, attrs): 1137db96d56Sopenharmony_ci if self.document: 1147db96d56Sopenharmony_ci node = self.document.createElement(name) 1157db96d56Sopenharmony_ci else: 1167db96d56Sopenharmony_ci node = self.buildDocument(None, name) 1177db96d56Sopenharmony_ci 1187db96d56Sopenharmony_ci for aname,value in attrs.items(): 1197db96d56Sopenharmony_ci attr = self.document.createAttribute(aname) 1207db96d56Sopenharmony_ci attr.value = value 1217db96d56Sopenharmony_ci node.setAttributeNode(attr) 1227db96d56Sopenharmony_ci 1237db96d56Sopenharmony_ci self.lastEvent[1] = [(START_ELEMENT, node), None] 1247db96d56Sopenharmony_ci self.lastEvent = self.lastEvent[1] 1257db96d56Sopenharmony_ci self.push(node) 1267db96d56Sopenharmony_ci 1277db96d56Sopenharmony_ci def endElement(self, name): 1287db96d56Sopenharmony_ci self.lastEvent[1] = [(END_ELEMENT, self.pop()), None] 1297db96d56Sopenharmony_ci self.lastEvent = self.lastEvent[1] 1307db96d56Sopenharmony_ci 1317db96d56Sopenharmony_ci def comment(self, s): 1327db96d56Sopenharmony_ci if self.document: 1337db96d56Sopenharmony_ci node = self.document.createComment(s) 1347db96d56Sopenharmony_ci self.lastEvent[1] = [(COMMENT, node), None] 1357db96d56Sopenharmony_ci self.lastEvent = self.lastEvent[1] 1367db96d56Sopenharmony_ci else: 1377db96d56Sopenharmony_ci event = [(COMMENT, s), None] 1387db96d56Sopenharmony_ci self.pending_events.append(event) 1397db96d56Sopenharmony_ci 1407db96d56Sopenharmony_ci def processingInstruction(self, target, data): 1417db96d56Sopenharmony_ci if self.document: 1427db96d56Sopenharmony_ci node = self.document.createProcessingInstruction(target, data) 1437db96d56Sopenharmony_ci self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None] 1447db96d56Sopenharmony_ci self.lastEvent = self.lastEvent[1] 1457db96d56Sopenharmony_ci else: 1467db96d56Sopenharmony_ci event = [(PROCESSING_INSTRUCTION, target, data), None] 1477db96d56Sopenharmony_ci self.pending_events.append(event) 1487db96d56Sopenharmony_ci 1497db96d56Sopenharmony_ci def ignorableWhitespace(self, chars): 1507db96d56Sopenharmony_ci node = self.document.createTextNode(chars) 1517db96d56Sopenharmony_ci self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None] 1527db96d56Sopenharmony_ci self.lastEvent = self.lastEvent[1] 1537db96d56Sopenharmony_ci 1547db96d56Sopenharmony_ci def characters(self, chars): 1557db96d56Sopenharmony_ci node = self.document.createTextNode(chars) 1567db96d56Sopenharmony_ci self.lastEvent[1] = [(CHARACTERS, node), None] 1577db96d56Sopenharmony_ci self.lastEvent = self.lastEvent[1] 1587db96d56Sopenharmony_ci 1597db96d56Sopenharmony_ci def startDocument(self): 1607db96d56Sopenharmony_ci if self.documentFactory is None: 1617db96d56Sopenharmony_ci import xml.dom.minidom 1627db96d56Sopenharmony_ci self.documentFactory = xml.dom.minidom.Document.implementation 1637db96d56Sopenharmony_ci 1647db96d56Sopenharmony_ci def buildDocument(self, uri, tagname): 1657db96d56Sopenharmony_ci # Can't do that in startDocument, since we need the tagname 1667db96d56Sopenharmony_ci # XXX: obtain DocumentType 1677db96d56Sopenharmony_ci node = self.documentFactory.createDocument(uri, tagname, None) 1687db96d56Sopenharmony_ci self.document = node 1697db96d56Sopenharmony_ci self.lastEvent[1] = [(START_DOCUMENT, node), None] 1707db96d56Sopenharmony_ci self.lastEvent = self.lastEvent[1] 1717db96d56Sopenharmony_ci self.push(node) 1727db96d56Sopenharmony_ci # Put everything we have seen so far into the document 1737db96d56Sopenharmony_ci for e in self.pending_events: 1747db96d56Sopenharmony_ci if e[0][0] == PROCESSING_INSTRUCTION: 1757db96d56Sopenharmony_ci _,target,data = e[0] 1767db96d56Sopenharmony_ci n = self.document.createProcessingInstruction(target, data) 1777db96d56Sopenharmony_ci e[0] = (PROCESSING_INSTRUCTION, n) 1787db96d56Sopenharmony_ci elif e[0][0] == COMMENT: 1797db96d56Sopenharmony_ci n = self.document.createComment(e[0][1]) 1807db96d56Sopenharmony_ci e[0] = (COMMENT, n) 1817db96d56Sopenharmony_ci else: 1827db96d56Sopenharmony_ci raise AssertionError("Unknown pending event ",e[0][0]) 1837db96d56Sopenharmony_ci self.lastEvent[1] = e 1847db96d56Sopenharmony_ci self.lastEvent = e 1857db96d56Sopenharmony_ci self.pending_events = None 1867db96d56Sopenharmony_ci return node.firstChild 1877db96d56Sopenharmony_ci 1887db96d56Sopenharmony_ci def endDocument(self): 1897db96d56Sopenharmony_ci self.lastEvent[1] = [(END_DOCUMENT, self.document), None] 1907db96d56Sopenharmony_ci self.pop() 1917db96d56Sopenharmony_ci 1927db96d56Sopenharmony_ci def clear(self): 1937db96d56Sopenharmony_ci "clear(): Explicitly release parsing structures" 1947db96d56Sopenharmony_ci self.document = None 1957db96d56Sopenharmony_ci 1967db96d56Sopenharmony_ciclass ErrorHandler: 1977db96d56Sopenharmony_ci def warning(self, exception): 1987db96d56Sopenharmony_ci print(exception) 1997db96d56Sopenharmony_ci def error(self, exception): 2007db96d56Sopenharmony_ci raise exception 2017db96d56Sopenharmony_ci def fatalError(self, exception): 2027db96d56Sopenharmony_ci raise exception 2037db96d56Sopenharmony_ci 2047db96d56Sopenharmony_ciclass DOMEventStream: 2057db96d56Sopenharmony_ci def __init__(self, stream, parser, bufsize): 2067db96d56Sopenharmony_ci self.stream = stream 2077db96d56Sopenharmony_ci self.parser = parser 2087db96d56Sopenharmony_ci self.bufsize = bufsize 2097db96d56Sopenharmony_ci if not hasattr(self.parser, 'feed'): 2107db96d56Sopenharmony_ci self.getEvent = self._slurp 2117db96d56Sopenharmony_ci self.reset() 2127db96d56Sopenharmony_ci 2137db96d56Sopenharmony_ci def reset(self): 2147db96d56Sopenharmony_ci self.pulldom = PullDOM() 2157db96d56Sopenharmony_ci # This content handler relies on namespace support 2167db96d56Sopenharmony_ci self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) 2177db96d56Sopenharmony_ci self.parser.setContentHandler(self.pulldom) 2187db96d56Sopenharmony_ci 2197db96d56Sopenharmony_ci def __next__(self): 2207db96d56Sopenharmony_ci rc = self.getEvent() 2217db96d56Sopenharmony_ci if rc: 2227db96d56Sopenharmony_ci return rc 2237db96d56Sopenharmony_ci raise StopIteration 2247db96d56Sopenharmony_ci 2257db96d56Sopenharmony_ci def __iter__(self): 2267db96d56Sopenharmony_ci return self 2277db96d56Sopenharmony_ci 2287db96d56Sopenharmony_ci def expandNode(self, node): 2297db96d56Sopenharmony_ci event = self.getEvent() 2307db96d56Sopenharmony_ci parents = [node] 2317db96d56Sopenharmony_ci while event: 2327db96d56Sopenharmony_ci token, cur_node = event 2337db96d56Sopenharmony_ci if cur_node is node: 2347db96d56Sopenharmony_ci return 2357db96d56Sopenharmony_ci if token != END_ELEMENT: 2367db96d56Sopenharmony_ci parents[-1].appendChild(cur_node) 2377db96d56Sopenharmony_ci if token == START_ELEMENT: 2387db96d56Sopenharmony_ci parents.append(cur_node) 2397db96d56Sopenharmony_ci elif token == END_ELEMENT: 2407db96d56Sopenharmony_ci del parents[-1] 2417db96d56Sopenharmony_ci event = self.getEvent() 2427db96d56Sopenharmony_ci 2437db96d56Sopenharmony_ci def getEvent(self): 2447db96d56Sopenharmony_ci # use IncrementalParser interface, so we get the desired 2457db96d56Sopenharmony_ci # pull effect 2467db96d56Sopenharmony_ci if not self.pulldom.firstEvent[1]: 2477db96d56Sopenharmony_ci self.pulldom.lastEvent = self.pulldom.firstEvent 2487db96d56Sopenharmony_ci while not self.pulldom.firstEvent[1]: 2497db96d56Sopenharmony_ci buf = self.stream.read(self.bufsize) 2507db96d56Sopenharmony_ci if not buf: 2517db96d56Sopenharmony_ci self.parser.close() 2527db96d56Sopenharmony_ci return None 2537db96d56Sopenharmony_ci self.parser.feed(buf) 2547db96d56Sopenharmony_ci rc = self.pulldom.firstEvent[1][0] 2557db96d56Sopenharmony_ci self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] 2567db96d56Sopenharmony_ci return rc 2577db96d56Sopenharmony_ci 2587db96d56Sopenharmony_ci def _slurp(self): 2597db96d56Sopenharmony_ci """ Fallback replacement for getEvent() using the 2607db96d56Sopenharmony_ci standard SAX2 interface, which means we slurp the 2617db96d56Sopenharmony_ci SAX events into memory (no performance gain, but 2627db96d56Sopenharmony_ci we are compatible to all SAX parsers). 2637db96d56Sopenharmony_ci """ 2647db96d56Sopenharmony_ci self.parser.parse(self.stream) 2657db96d56Sopenharmony_ci self.getEvent = self._emit 2667db96d56Sopenharmony_ci return self._emit() 2677db96d56Sopenharmony_ci 2687db96d56Sopenharmony_ci def _emit(self): 2697db96d56Sopenharmony_ci """ Fallback replacement for getEvent() that emits 2707db96d56Sopenharmony_ci the events that _slurp() read previously. 2717db96d56Sopenharmony_ci """ 2727db96d56Sopenharmony_ci rc = self.pulldom.firstEvent[1][0] 2737db96d56Sopenharmony_ci self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1] 2747db96d56Sopenharmony_ci return rc 2757db96d56Sopenharmony_ci 2767db96d56Sopenharmony_ci def clear(self): 2777db96d56Sopenharmony_ci """clear(): Explicitly release parsing objects""" 2787db96d56Sopenharmony_ci self.pulldom.clear() 2797db96d56Sopenharmony_ci del self.pulldom 2807db96d56Sopenharmony_ci self.parser = None 2817db96d56Sopenharmony_ci self.stream = None 2827db96d56Sopenharmony_ci 2837db96d56Sopenharmony_ciclass SAX2DOM(PullDOM): 2847db96d56Sopenharmony_ci 2857db96d56Sopenharmony_ci def startElementNS(self, name, tagName , attrs): 2867db96d56Sopenharmony_ci PullDOM.startElementNS(self, name, tagName, attrs) 2877db96d56Sopenharmony_ci curNode = self.elementStack[-1] 2887db96d56Sopenharmony_ci parentNode = self.elementStack[-2] 2897db96d56Sopenharmony_ci parentNode.appendChild(curNode) 2907db96d56Sopenharmony_ci 2917db96d56Sopenharmony_ci def startElement(self, name, attrs): 2927db96d56Sopenharmony_ci PullDOM.startElement(self, name, attrs) 2937db96d56Sopenharmony_ci curNode = self.elementStack[-1] 2947db96d56Sopenharmony_ci parentNode = self.elementStack[-2] 2957db96d56Sopenharmony_ci parentNode.appendChild(curNode) 2967db96d56Sopenharmony_ci 2977db96d56Sopenharmony_ci def processingInstruction(self, target, data): 2987db96d56Sopenharmony_ci PullDOM.processingInstruction(self, target, data) 2997db96d56Sopenharmony_ci node = self.lastEvent[0][1] 3007db96d56Sopenharmony_ci parentNode = self.elementStack[-1] 3017db96d56Sopenharmony_ci parentNode.appendChild(node) 3027db96d56Sopenharmony_ci 3037db96d56Sopenharmony_ci def ignorableWhitespace(self, chars): 3047db96d56Sopenharmony_ci PullDOM.ignorableWhitespace(self, chars) 3057db96d56Sopenharmony_ci node = self.lastEvent[0][1] 3067db96d56Sopenharmony_ci parentNode = self.elementStack[-1] 3077db96d56Sopenharmony_ci parentNode.appendChild(node) 3087db96d56Sopenharmony_ci 3097db96d56Sopenharmony_ci def characters(self, chars): 3107db96d56Sopenharmony_ci PullDOM.characters(self, chars) 3117db96d56Sopenharmony_ci node = self.lastEvent[0][1] 3127db96d56Sopenharmony_ci parentNode = self.elementStack[-1] 3137db96d56Sopenharmony_ci parentNode.appendChild(node) 3147db96d56Sopenharmony_ci 3157db96d56Sopenharmony_ci 3167db96d56Sopenharmony_cidefault_bufsize = (2 ** 14) - 20 3177db96d56Sopenharmony_ci 3187db96d56Sopenharmony_cidef parse(stream_or_string, parser=None, bufsize=None): 3197db96d56Sopenharmony_ci if bufsize is None: 3207db96d56Sopenharmony_ci bufsize = default_bufsize 3217db96d56Sopenharmony_ci if isinstance(stream_or_string, str): 3227db96d56Sopenharmony_ci stream = open(stream_or_string, 'rb') 3237db96d56Sopenharmony_ci else: 3247db96d56Sopenharmony_ci stream = stream_or_string 3257db96d56Sopenharmony_ci if not parser: 3267db96d56Sopenharmony_ci parser = xml.sax.make_parser() 3277db96d56Sopenharmony_ci return DOMEventStream(stream, parser, bufsize) 3287db96d56Sopenharmony_ci 3297db96d56Sopenharmony_cidef parseString(string, parser=None): 3307db96d56Sopenharmony_ci from io import StringIO 3317db96d56Sopenharmony_ci 3327db96d56Sopenharmony_ci bufsize = len(string) 3337db96d56Sopenharmony_ci buf = StringIO(string) 3347db96d56Sopenharmony_ci if not parser: 3357db96d56Sopenharmony_ci parser = xml.sax.make_parser() 3367db96d56Sopenharmony_ci return DOMEventStream(buf, parser, bufsize) 337