17db96d56Sopenharmony_ci"""\ 27db96d56Sopenharmony_ciA library of useful helper classes to the SAX classes, for the 37db96d56Sopenharmony_ciconvenience of application and driver writers. 47db96d56Sopenharmony_ci""" 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ciimport os, urllib.parse, urllib.request 77db96d56Sopenharmony_ciimport io 87db96d56Sopenharmony_ciimport codecs 97db96d56Sopenharmony_cifrom . import handler 107db96d56Sopenharmony_cifrom . import xmlreader 117db96d56Sopenharmony_ci 127db96d56Sopenharmony_cidef __dict_replace(s, d): 137db96d56Sopenharmony_ci """Replace substrings of a string using a dictionary.""" 147db96d56Sopenharmony_ci for key, value in d.items(): 157db96d56Sopenharmony_ci s = s.replace(key, value) 167db96d56Sopenharmony_ci return s 177db96d56Sopenharmony_ci 187db96d56Sopenharmony_cidef escape(data, entities={}): 197db96d56Sopenharmony_ci """Escape &, <, and > in a string of data. 207db96d56Sopenharmony_ci 217db96d56Sopenharmony_ci You can escape other strings of data by passing a dictionary as 227db96d56Sopenharmony_ci the optional entities parameter. The keys and values must all be 237db96d56Sopenharmony_ci strings; each key will be replaced with its corresponding value. 247db96d56Sopenharmony_ci """ 257db96d56Sopenharmony_ci 267db96d56Sopenharmony_ci # must do ampersand first 277db96d56Sopenharmony_ci data = data.replace("&", "&") 287db96d56Sopenharmony_ci data = data.replace(">", ">") 297db96d56Sopenharmony_ci data = data.replace("<", "<") 307db96d56Sopenharmony_ci if entities: 317db96d56Sopenharmony_ci data = __dict_replace(data, entities) 327db96d56Sopenharmony_ci return data 337db96d56Sopenharmony_ci 347db96d56Sopenharmony_cidef unescape(data, entities={}): 357db96d56Sopenharmony_ci """Unescape &, <, and > in a string of data. 367db96d56Sopenharmony_ci 377db96d56Sopenharmony_ci You can unescape other strings of data by passing a dictionary as 387db96d56Sopenharmony_ci the optional entities parameter. The keys and values must all be 397db96d56Sopenharmony_ci strings; each key will be replaced with its corresponding value. 407db96d56Sopenharmony_ci """ 417db96d56Sopenharmony_ci data = data.replace("<", "<") 427db96d56Sopenharmony_ci data = data.replace(">", ">") 437db96d56Sopenharmony_ci if entities: 447db96d56Sopenharmony_ci data = __dict_replace(data, entities) 457db96d56Sopenharmony_ci # must do ampersand last 467db96d56Sopenharmony_ci return data.replace("&", "&") 477db96d56Sopenharmony_ci 487db96d56Sopenharmony_cidef quoteattr(data, entities={}): 497db96d56Sopenharmony_ci """Escape and quote an attribute value. 507db96d56Sopenharmony_ci 517db96d56Sopenharmony_ci Escape &, <, and > in a string of data, then quote it for use as 527db96d56Sopenharmony_ci an attribute value. The \" character will be escaped as well, if 537db96d56Sopenharmony_ci necessary. 547db96d56Sopenharmony_ci 557db96d56Sopenharmony_ci You can escape other strings of data by passing a dictionary as 567db96d56Sopenharmony_ci the optional entities parameter. The keys and values must all be 577db96d56Sopenharmony_ci strings; each key will be replaced with its corresponding value. 587db96d56Sopenharmony_ci """ 597db96d56Sopenharmony_ci entities = {**entities, '\n': ' ', '\r': ' ', '\t':'	'} 607db96d56Sopenharmony_ci data = escape(data, entities) 617db96d56Sopenharmony_ci if '"' in data: 627db96d56Sopenharmony_ci if "'" in data: 637db96d56Sopenharmony_ci data = '"%s"' % data.replace('"', """) 647db96d56Sopenharmony_ci else: 657db96d56Sopenharmony_ci data = "'%s'" % data 667db96d56Sopenharmony_ci else: 677db96d56Sopenharmony_ci data = '"%s"' % data 687db96d56Sopenharmony_ci return data 697db96d56Sopenharmony_ci 707db96d56Sopenharmony_ci 717db96d56Sopenharmony_cidef _gettextwriter(out, encoding): 727db96d56Sopenharmony_ci if out is None: 737db96d56Sopenharmony_ci import sys 747db96d56Sopenharmony_ci return sys.stdout 757db96d56Sopenharmony_ci 767db96d56Sopenharmony_ci if isinstance(out, io.TextIOBase): 777db96d56Sopenharmony_ci # use a text writer as is 787db96d56Sopenharmony_ci return out 797db96d56Sopenharmony_ci 807db96d56Sopenharmony_ci if isinstance(out, (codecs.StreamWriter, codecs.StreamReaderWriter)): 817db96d56Sopenharmony_ci # use a codecs stream writer as is 827db96d56Sopenharmony_ci return out 837db96d56Sopenharmony_ci 847db96d56Sopenharmony_ci # wrap a binary writer with TextIOWrapper 857db96d56Sopenharmony_ci if isinstance(out, io.RawIOBase): 867db96d56Sopenharmony_ci # Keep the original file open when the TextIOWrapper is 877db96d56Sopenharmony_ci # destroyed 887db96d56Sopenharmony_ci class _wrapper: 897db96d56Sopenharmony_ci __class__ = out.__class__ 907db96d56Sopenharmony_ci def __getattr__(self, name): 917db96d56Sopenharmony_ci return getattr(out, name) 927db96d56Sopenharmony_ci buffer = _wrapper() 937db96d56Sopenharmony_ci buffer.close = lambda: None 947db96d56Sopenharmony_ci else: 957db96d56Sopenharmony_ci # This is to handle passed objects that aren't in the 967db96d56Sopenharmony_ci # IOBase hierarchy, but just have a write method 977db96d56Sopenharmony_ci buffer = io.BufferedIOBase() 987db96d56Sopenharmony_ci buffer.writable = lambda: True 997db96d56Sopenharmony_ci buffer.write = out.write 1007db96d56Sopenharmony_ci try: 1017db96d56Sopenharmony_ci # TextIOWrapper uses this methods to determine 1027db96d56Sopenharmony_ci # if BOM (for UTF-16, etc) should be added 1037db96d56Sopenharmony_ci buffer.seekable = out.seekable 1047db96d56Sopenharmony_ci buffer.tell = out.tell 1057db96d56Sopenharmony_ci except AttributeError: 1067db96d56Sopenharmony_ci pass 1077db96d56Sopenharmony_ci return io.TextIOWrapper(buffer, encoding=encoding, 1087db96d56Sopenharmony_ci errors='xmlcharrefreplace', 1097db96d56Sopenharmony_ci newline='\n', 1107db96d56Sopenharmony_ci write_through=True) 1117db96d56Sopenharmony_ci 1127db96d56Sopenharmony_ciclass XMLGenerator(handler.ContentHandler): 1137db96d56Sopenharmony_ci 1147db96d56Sopenharmony_ci def __init__(self, out=None, encoding="iso-8859-1", short_empty_elements=False): 1157db96d56Sopenharmony_ci handler.ContentHandler.__init__(self) 1167db96d56Sopenharmony_ci out = _gettextwriter(out, encoding) 1177db96d56Sopenharmony_ci self._write = out.write 1187db96d56Sopenharmony_ci self._flush = out.flush 1197db96d56Sopenharmony_ci self._ns_contexts = [{}] # contains uri -> prefix dicts 1207db96d56Sopenharmony_ci self._current_context = self._ns_contexts[-1] 1217db96d56Sopenharmony_ci self._undeclared_ns_maps = [] 1227db96d56Sopenharmony_ci self._encoding = encoding 1237db96d56Sopenharmony_ci self._short_empty_elements = short_empty_elements 1247db96d56Sopenharmony_ci self._pending_start_element = False 1257db96d56Sopenharmony_ci 1267db96d56Sopenharmony_ci def _qname(self, name): 1277db96d56Sopenharmony_ci """Builds a qualified name from a (ns_url, localname) pair""" 1287db96d56Sopenharmony_ci if name[0]: 1297db96d56Sopenharmony_ci # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is 1307db96d56Sopenharmony_ci # bound by definition to http://www.w3.org/XML/1998/namespace. It 1317db96d56Sopenharmony_ci # does not need to be declared and will not usually be found in 1327db96d56Sopenharmony_ci # self._current_context. 1337db96d56Sopenharmony_ci if 'http://www.w3.org/XML/1998/namespace' == name[0]: 1347db96d56Sopenharmony_ci return 'xml:' + name[1] 1357db96d56Sopenharmony_ci # The name is in a non-empty namespace 1367db96d56Sopenharmony_ci prefix = self._current_context[name[0]] 1377db96d56Sopenharmony_ci if prefix: 1387db96d56Sopenharmony_ci # If it is not the default namespace, prepend the prefix 1397db96d56Sopenharmony_ci return prefix + ":" + name[1] 1407db96d56Sopenharmony_ci # Return the unqualified name 1417db96d56Sopenharmony_ci return name[1] 1427db96d56Sopenharmony_ci 1437db96d56Sopenharmony_ci def _finish_pending_start_element(self,endElement=False): 1447db96d56Sopenharmony_ci if self._pending_start_element: 1457db96d56Sopenharmony_ci self._write('>') 1467db96d56Sopenharmony_ci self._pending_start_element = False 1477db96d56Sopenharmony_ci 1487db96d56Sopenharmony_ci # ContentHandler methods 1497db96d56Sopenharmony_ci 1507db96d56Sopenharmony_ci def startDocument(self): 1517db96d56Sopenharmony_ci self._write('<?xml version="1.0" encoding="%s"?>\n' % 1527db96d56Sopenharmony_ci self._encoding) 1537db96d56Sopenharmony_ci 1547db96d56Sopenharmony_ci def endDocument(self): 1557db96d56Sopenharmony_ci self._flush() 1567db96d56Sopenharmony_ci 1577db96d56Sopenharmony_ci def startPrefixMapping(self, prefix, uri): 1587db96d56Sopenharmony_ci self._ns_contexts.append(self._current_context.copy()) 1597db96d56Sopenharmony_ci self._current_context[uri] = prefix 1607db96d56Sopenharmony_ci self._undeclared_ns_maps.append((prefix, uri)) 1617db96d56Sopenharmony_ci 1627db96d56Sopenharmony_ci def endPrefixMapping(self, prefix): 1637db96d56Sopenharmony_ci self._current_context = self._ns_contexts[-1] 1647db96d56Sopenharmony_ci del self._ns_contexts[-1] 1657db96d56Sopenharmony_ci 1667db96d56Sopenharmony_ci def startElement(self, name, attrs): 1677db96d56Sopenharmony_ci self._finish_pending_start_element() 1687db96d56Sopenharmony_ci self._write('<' + name) 1697db96d56Sopenharmony_ci for (name, value) in attrs.items(): 1707db96d56Sopenharmony_ci self._write(' %s=%s' % (name, quoteattr(value))) 1717db96d56Sopenharmony_ci if self._short_empty_elements: 1727db96d56Sopenharmony_ci self._pending_start_element = True 1737db96d56Sopenharmony_ci else: 1747db96d56Sopenharmony_ci self._write(">") 1757db96d56Sopenharmony_ci 1767db96d56Sopenharmony_ci def endElement(self, name): 1777db96d56Sopenharmony_ci if self._pending_start_element: 1787db96d56Sopenharmony_ci self._write('/>') 1797db96d56Sopenharmony_ci self._pending_start_element = False 1807db96d56Sopenharmony_ci else: 1817db96d56Sopenharmony_ci self._write('</%s>' % name) 1827db96d56Sopenharmony_ci 1837db96d56Sopenharmony_ci def startElementNS(self, name, qname, attrs): 1847db96d56Sopenharmony_ci self._finish_pending_start_element() 1857db96d56Sopenharmony_ci self._write('<' + self._qname(name)) 1867db96d56Sopenharmony_ci 1877db96d56Sopenharmony_ci for prefix, uri in self._undeclared_ns_maps: 1887db96d56Sopenharmony_ci if prefix: 1897db96d56Sopenharmony_ci self._write(' xmlns:%s="%s"' % (prefix, uri)) 1907db96d56Sopenharmony_ci else: 1917db96d56Sopenharmony_ci self._write(' xmlns="%s"' % uri) 1927db96d56Sopenharmony_ci self._undeclared_ns_maps = [] 1937db96d56Sopenharmony_ci 1947db96d56Sopenharmony_ci for (name, value) in attrs.items(): 1957db96d56Sopenharmony_ci self._write(' %s=%s' % (self._qname(name), quoteattr(value))) 1967db96d56Sopenharmony_ci if self._short_empty_elements: 1977db96d56Sopenharmony_ci self._pending_start_element = True 1987db96d56Sopenharmony_ci else: 1997db96d56Sopenharmony_ci self._write(">") 2007db96d56Sopenharmony_ci 2017db96d56Sopenharmony_ci def endElementNS(self, name, qname): 2027db96d56Sopenharmony_ci if self._pending_start_element: 2037db96d56Sopenharmony_ci self._write('/>') 2047db96d56Sopenharmony_ci self._pending_start_element = False 2057db96d56Sopenharmony_ci else: 2067db96d56Sopenharmony_ci self._write('</%s>' % self._qname(name)) 2077db96d56Sopenharmony_ci 2087db96d56Sopenharmony_ci def characters(self, content): 2097db96d56Sopenharmony_ci if content: 2107db96d56Sopenharmony_ci self._finish_pending_start_element() 2117db96d56Sopenharmony_ci if not isinstance(content, str): 2127db96d56Sopenharmony_ci content = str(content, self._encoding) 2137db96d56Sopenharmony_ci self._write(escape(content)) 2147db96d56Sopenharmony_ci 2157db96d56Sopenharmony_ci def ignorableWhitespace(self, content): 2167db96d56Sopenharmony_ci if content: 2177db96d56Sopenharmony_ci self._finish_pending_start_element() 2187db96d56Sopenharmony_ci if not isinstance(content, str): 2197db96d56Sopenharmony_ci content = str(content, self._encoding) 2207db96d56Sopenharmony_ci self._write(content) 2217db96d56Sopenharmony_ci 2227db96d56Sopenharmony_ci def processingInstruction(self, target, data): 2237db96d56Sopenharmony_ci self._finish_pending_start_element() 2247db96d56Sopenharmony_ci self._write('<?%s %s?>' % (target, data)) 2257db96d56Sopenharmony_ci 2267db96d56Sopenharmony_ci 2277db96d56Sopenharmony_ciclass XMLFilterBase(xmlreader.XMLReader): 2287db96d56Sopenharmony_ci """This class is designed to sit between an XMLReader and the 2297db96d56Sopenharmony_ci client application's event handlers. By default, it does nothing 2307db96d56Sopenharmony_ci but pass requests up to the reader and events on to the handlers 2317db96d56Sopenharmony_ci unmodified, but subclasses can override specific methods to modify 2327db96d56Sopenharmony_ci the event stream or the configuration requests as they pass 2337db96d56Sopenharmony_ci through.""" 2347db96d56Sopenharmony_ci 2357db96d56Sopenharmony_ci def __init__(self, parent = None): 2367db96d56Sopenharmony_ci xmlreader.XMLReader.__init__(self) 2377db96d56Sopenharmony_ci self._parent = parent 2387db96d56Sopenharmony_ci 2397db96d56Sopenharmony_ci # ErrorHandler methods 2407db96d56Sopenharmony_ci 2417db96d56Sopenharmony_ci def error(self, exception): 2427db96d56Sopenharmony_ci self._err_handler.error(exception) 2437db96d56Sopenharmony_ci 2447db96d56Sopenharmony_ci def fatalError(self, exception): 2457db96d56Sopenharmony_ci self._err_handler.fatalError(exception) 2467db96d56Sopenharmony_ci 2477db96d56Sopenharmony_ci def warning(self, exception): 2487db96d56Sopenharmony_ci self._err_handler.warning(exception) 2497db96d56Sopenharmony_ci 2507db96d56Sopenharmony_ci # ContentHandler methods 2517db96d56Sopenharmony_ci 2527db96d56Sopenharmony_ci def setDocumentLocator(self, locator): 2537db96d56Sopenharmony_ci self._cont_handler.setDocumentLocator(locator) 2547db96d56Sopenharmony_ci 2557db96d56Sopenharmony_ci def startDocument(self): 2567db96d56Sopenharmony_ci self._cont_handler.startDocument() 2577db96d56Sopenharmony_ci 2587db96d56Sopenharmony_ci def endDocument(self): 2597db96d56Sopenharmony_ci self._cont_handler.endDocument() 2607db96d56Sopenharmony_ci 2617db96d56Sopenharmony_ci def startPrefixMapping(self, prefix, uri): 2627db96d56Sopenharmony_ci self._cont_handler.startPrefixMapping(prefix, uri) 2637db96d56Sopenharmony_ci 2647db96d56Sopenharmony_ci def endPrefixMapping(self, prefix): 2657db96d56Sopenharmony_ci self._cont_handler.endPrefixMapping(prefix) 2667db96d56Sopenharmony_ci 2677db96d56Sopenharmony_ci def startElement(self, name, attrs): 2687db96d56Sopenharmony_ci self._cont_handler.startElement(name, attrs) 2697db96d56Sopenharmony_ci 2707db96d56Sopenharmony_ci def endElement(self, name): 2717db96d56Sopenharmony_ci self._cont_handler.endElement(name) 2727db96d56Sopenharmony_ci 2737db96d56Sopenharmony_ci def startElementNS(self, name, qname, attrs): 2747db96d56Sopenharmony_ci self._cont_handler.startElementNS(name, qname, attrs) 2757db96d56Sopenharmony_ci 2767db96d56Sopenharmony_ci def endElementNS(self, name, qname): 2777db96d56Sopenharmony_ci self._cont_handler.endElementNS(name, qname) 2787db96d56Sopenharmony_ci 2797db96d56Sopenharmony_ci def characters(self, content): 2807db96d56Sopenharmony_ci self._cont_handler.characters(content) 2817db96d56Sopenharmony_ci 2827db96d56Sopenharmony_ci def ignorableWhitespace(self, chars): 2837db96d56Sopenharmony_ci self._cont_handler.ignorableWhitespace(chars) 2847db96d56Sopenharmony_ci 2857db96d56Sopenharmony_ci def processingInstruction(self, target, data): 2867db96d56Sopenharmony_ci self._cont_handler.processingInstruction(target, data) 2877db96d56Sopenharmony_ci 2887db96d56Sopenharmony_ci def skippedEntity(self, name): 2897db96d56Sopenharmony_ci self._cont_handler.skippedEntity(name) 2907db96d56Sopenharmony_ci 2917db96d56Sopenharmony_ci # DTDHandler methods 2927db96d56Sopenharmony_ci 2937db96d56Sopenharmony_ci def notationDecl(self, name, publicId, systemId): 2947db96d56Sopenharmony_ci self._dtd_handler.notationDecl(name, publicId, systemId) 2957db96d56Sopenharmony_ci 2967db96d56Sopenharmony_ci def unparsedEntityDecl(self, name, publicId, systemId, ndata): 2977db96d56Sopenharmony_ci self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) 2987db96d56Sopenharmony_ci 2997db96d56Sopenharmony_ci # EntityResolver methods 3007db96d56Sopenharmony_ci 3017db96d56Sopenharmony_ci def resolveEntity(self, publicId, systemId): 3027db96d56Sopenharmony_ci return self._ent_handler.resolveEntity(publicId, systemId) 3037db96d56Sopenharmony_ci 3047db96d56Sopenharmony_ci # XMLReader methods 3057db96d56Sopenharmony_ci 3067db96d56Sopenharmony_ci def parse(self, source): 3077db96d56Sopenharmony_ci self._parent.setContentHandler(self) 3087db96d56Sopenharmony_ci self._parent.setErrorHandler(self) 3097db96d56Sopenharmony_ci self._parent.setEntityResolver(self) 3107db96d56Sopenharmony_ci self._parent.setDTDHandler(self) 3117db96d56Sopenharmony_ci self._parent.parse(source) 3127db96d56Sopenharmony_ci 3137db96d56Sopenharmony_ci def setLocale(self, locale): 3147db96d56Sopenharmony_ci self._parent.setLocale(locale) 3157db96d56Sopenharmony_ci 3167db96d56Sopenharmony_ci def getFeature(self, name): 3177db96d56Sopenharmony_ci return self._parent.getFeature(name) 3187db96d56Sopenharmony_ci 3197db96d56Sopenharmony_ci def setFeature(self, name, state): 3207db96d56Sopenharmony_ci self._parent.setFeature(name, state) 3217db96d56Sopenharmony_ci 3227db96d56Sopenharmony_ci def getProperty(self, name): 3237db96d56Sopenharmony_ci return self._parent.getProperty(name) 3247db96d56Sopenharmony_ci 3257db96d56Sopenharmony_ci def setProperty(self, name, value): 3267db96d56Sopenharmony_ci self._parent.setProperty(name, value) 3277db96d56Sopenharmony_ci 3287db96d56Sopenharmony_ci # XMLFilter methods 3297db96d56Sopenharmony_ci 3307db96d56Sopenharmony_ci def getParent(self): 3317db96d56Sopenharmony_ci return self._parent 3327db96d56Sopenharmony_ci 3337db96d56Sopenharmony_ci def setParent(self, parent): 3347db96d56Sopenharmony_ci self._parent = parent 3357db96d56Sopenharmony_ci 3367db96d56Sopenharmony_ci# --- Utility functions 3377db96d56Sopenharmony_ci 3387db96d56Sopenharmony_cidef prepare_input_source(source, base=""): 3397db96d56Sopenharmony_ci """This function takes an InputSource and an optional base URL and 3407db96d56Sopenharmony_ci returns a fully resolved InputSource object ready for reading.""" 3417db96d56Sopenharmony_ci 3427db96d56Sopenharmony_ci if isinstance(source, os.PathLike): 3437db96d56Sopenharmony_ci source = os.fspath(source) 3447db96d56Sopenharmony_ci if isinstance(source, str): 3457db96d56Sopenharmony_ci source = xmlreader.InputSource(source) 3467db96d56Sopenharmony_ci elif hasattr(source, "read"): 3477db96d56Sopenharmony_ci f = source 3487db96d56Sopenharmony_ci source = xmlreader.InputSource() 3497db96d56Sopenharmony_ci if isinstance(f.read(0), str): 3507db96d56Sopenharmony_ci source.setCharacterStream(f) 3517db96d56Sopenharmony_ci else: 3527db96d56Sopenharmony_ci source.setByteStream(f) 3537db96d56Sopenharmony_ci if hasattr(f, "name") and isinstance(f.name, str): 3547db96d56Sopenharmony_ci source.setSystemId(f.name) 3557db96d56Sopenharmony_ci 3567db96d56Sopenharmony_ci if source.getCharacterStream() is None and source.getByteStream() is None: 3577db96d56Sopenharmony_ci sysid = source.getSystemId() 3587db96d56Sopenharmony_ci basehead = os.path.dirname(os.path.normpath(base)) 3597db96d56Sopenharmony_ci sysidfilename = os.path.join(basehead, sysid) 3607db96d56Sopenharmony_ci if os.path.isfile(sysidfilename): 3617db96d56Sopenharmony_ci source.setSystemId(sysidfilename) 3627db96d56Sopenharmony_ci f = open(sysidfilename, "rb") 3637db96d56Sopenharmony_ci else: 3647db96d56Sopenharmony_ci source.setSystemId(urllib.parse.urljoin(base, sysid)) 3657db96d56Sopenharmony_ci f = urllib.request.urlopen(source.getSystemId()) 3667db96d56Sopenharmony_ci 3677db96d56Sopenharmony_ci source.setByteStream(f) 3687db96d56Sopenharmony_ci 3697db96d56Sopenharmony_ci return source 370