17db96d56Sopenharmony_ci""" Standard "encodings" Package 27db96d56Sopenharmony_ci 37db96d56Sopenharmony_ci Standard Python encoding modules are stored in this package 47db96d56Sopenharmony_ci directory. 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ci Codec modules must have names corresponding to normalized encoding 77db96d56Sopenharmony_ci names as defined in the normalize_encoding() function below, e.g. 87db96d56Sopenharmony_ci 'utf-8' must be implemented by the module 'utf_8.py'. 97db96d56Sopenharmony_ci 107db96d56Sopenharmony_ci Each codec module must export the following interface: 117db96d56Sopenharmony_ci 127db96d56Sopenharmony_ci * getregentry() -> codecs.CodecInfo object 137db96d56Sopenharmony_ci The getregentry() API must return a CodecInfo object with encoder, decoder, 147db96d56Sopenharmony_ci incrementalencoder, incrementaldecoder, streamwriter and streamreader 157db96d56Sopenharmony_ci attributes which adhere to the Python Codec Interface Standard. 167db96d56Sopenharmony_ci 177db96d56Sopenharmony_ci In addition, a module may optionally also define the following 187db96d56Sopenharmony_ci APIs which are then used by the package's codec search function: 197db96d56Sopenharmony_ci 207db96d56Sopenharmony_ci * getaliases() -> sequence of encoding name strings to use as aliases 217db96d56Sopenharmony_ci 227db96d56Sopenharmony_ci Alias names returned by getaliases() must be normalized encoding 237db96d56Sopenharmony_ci names as defined by normalize_encoding(). 247db96d56Sopenharmony_ci 257db96d56Sopenharmony_ciWritten by Marc-Andre Lemburg (mal@lemburg.com). 267db96d56Sopenharmony_ci 277db96d56Sopenharmony_ci(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ci"""#" 307db96d56Sopenharmony_ci 317db96d56Sopenharmony_ciimport codecs 327db96d56Sopenharmony_ciimport sys 337db96d56Sopenharmony_cifrom . import aliases 347db96d56Sopenharmony_ci 357db96d56Sopenharmony_ci_cache = {} 367db96d56Sopenharmony_ci_unknown = '--unknown--' 377db96d56Sopenharmony_ci_import_tail = ['*'] 387db96d56Sopenharmony_ci_aliases = aliases.aliases 397db96d56Sopenharmony_ci 407db96d56Sopenharmony_ciclass CodecRegistryError(LookupError, SystemError): 417db96d56Sopenharmony_ci pass 427db96d56Sopenharmony_ci 437db96d56Sopenharmony_cidef normalize_encoding(encoding): 447db96d56Sopenharmony_ci 457db96d56Sopenharmony_ci """ Normalize an encoding name. 467db96d56Sopenharmony_ci 477db96d56Sopenharmony_ci Normalization works as follows: all non-alphanumeric 487db96d56Sopenharmony_ci characters except the dot used for Python package names are 497db96d56Sopenharmony_ci collapsed and replaced with a single underscore, e.g. ' -;#' 507db96d56Sopenharmony_ci becomes '_'. Leading and trailing underscores are removed. 517db96d56Sopenharmony_ci 527db96d56Sopenharmony_ci Note that encoding names should be ASCII only. 537db96d56Sopenharmony_ci 547db96d56Sopenharmony_ci """ 557db96d56Sopenharmony_ci if isinstance(encoding, bytes): 567db96d56Sopenharmony_ci encoding = str(encoding, "ascii") 577db96d56Sopenharmony_ci 587db96d56Sopenharmony_ci chars = [] 597db96d56Sopenharmony_ci punct = False 607db96d56Sopenharmony_ci for c in encoding: 617db96d56Sopenharmony_ci if c.isalnum() or c == '.': 627db96d56Sopenharmony_ci if punct and chars: 637db96d56Sopenharmony_ci chars.append('_') 647db96d56Sopenharmony_ci if c.isascii(): 657db96d56Sopenharmony_ci chars.append(c) 667db96d56Sopenharmony_ci punct = False 677db96d56Sopenharmony_ci else: 687db96d56Sopenharmony_ci punct = True 697db96d56Sopenharmony_ci return ''.join(chars) 707db96d56Sopenharmony_ci 717db96d56Sopenharmony_cidef search_function(encoding): 727db96d56Sopenharmony_ci 737db96d56Sopenharmony_ci # Cache lookup 747db96d56Sopenharmony_ci entry = _cache.get(encoding, _unknown) 757db96d56Sopenharmony_ci if entry is not _unknown: 767db96d56Sopenharmony_ci return entry 777db96d56Sopenharmony_ci 787db96d56Sopenharmony_ci # Import the module: 797db96d56Sopenharmony_ci # 807db96d56Sopenharmony_ci # First try to find an alias for the normalized encoding 817db96d56Sopenharmony_ci # name and lookup the module using the aliased name, then try to 827db96d56Sopenharmony_ci # lookup the module using the standard import scheme, i.e. first 837db96d56Sopenharmony_ci # try in the encodings package, then at top-level. 847db96d56Sopenharmony_ci # 857db96d56Sopenharmony_ci norm_encoding = normalize_encoding(encoding) 867db96d56Sopenharmony_ci aliased_encoding = _aliases.get(norm_encoding) or \ 877db96d56Sopenharmony_ci _aliases.get(norm_encoding.replace('.', '_')) 887db96d56Sopenharmony_ci if aliased_encoding is not None: 897db96d56Sopenharmony_ci modnames = [aliased_encoding, 907db96d56Sopenharmony_ci norm_encoding] 917db96d56Sopenharmony_ci else: 927db96d56Sopenharmony_ci modnames = [norm_encoding] 937db96d56Sopenharmony_ci for modname in modnames: 947db96d56Sopenharmony_ci if not modname or '.' in modname: 957db96d56Sopenharmony_ci continue 967db96d56Sopenharmony_ci try: 977db96d56Sopenharmony_ci # Import is absolute to prevent the possibly malicious import of a 987db96d56Sopenharmony_ci # module with side-effects that is not in the 'encodings' package. 997db96d56Sopenharmony_ci mod = __import__('encodings.' + modname, fromlist=_import_tail, 1007db96d56Sopenharmony_ci level=0) 1017db96d56Sopenharmony_ci except ImportError: 1027db96d56Sopenharmony_ci # ImportError may occur because 'encodings.(modname)' does not exist, 1037db96d56Sopenharmony_ci # or because it imports a name that does not exist (see mbcs and oem) 1047db96d56Sopenharmony_ci pass 1057db96d56Sopenharmony_ci else: 1067db96d56Sopenharmony_ci break 1077db96d56Sopenharmony_ci else: 1087db96d56Sopenharmony_ci mod = None 1097db96d56Sopenharmony_ci 1107db96d56Sopenharmony_ci try: 1117db96d56Sopenharmony_ci getregentry = mod.getregentry 1127db96d56Sopenharmony_ci except AttributeError: 1137db96d56Sopenharmony_ci # Not a codec module 1147db96d56Sopenharmony_ci mod = None 1157db96d56Sopenharmony_ci 1167db96d56Sopenharmony_ci if mod is None: 1177db96d56Sopenharmony_ci # Cache misses 1187db96d56Sopenharmony_ci _cache[encoding] = None 1197db96d56Sopenharmony_ci return None 1207db96d56Sopenharmony_ci 1217db96d56Sopenharmony_ci # Now ask the module for the registry entry 1227db96d56Sopenharmony_ci entry = getregentry() 1237db96d56Sopenharmony_ci if not isinstance(entry, codecs.CodecInfo): 1247db96d56Sopenharmony_ci if not 4 <= len(entry) <= 7: 1257db96d56Sopenharmony_ci raise CodecRegistryError('module "%s" (%s) failed to register' 1267db96d56Sopenharmony_ci % (mod.__name__, mod.__file__)) 1277db96d56Sopenharmony_ci if not callable(entry[0]) or not callable(entry[1]) or \ 1287db96d56Sopenharmony_ci (entry[2] is not None and not callable(entry[2])) or \ 1297db96d56Sopenharmony_ci (entry[3] is not None and not callable(entry[3])) or \ 1307db96d56Sopenharmony_ci (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \ 1317db96d56Sopenharmony_ci (len(entry) > 5 and entry[5] is not None and not callable(entry[5])): 1327db96d56Sopenharmony_ci raise CodecRegistryError('incompatible codecs in module "%s" (%s)' 1337db96d56Sopenharmony_ci % (mod.__name__, mod.__file__)) 1347db96d56Sopenharmony_ci if len(entry)<7 or entry[6] is None: 1357db96d56Sopenharmony_ci entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],) 1367db96d56Sopenharmony_ci entry = codecs.CodecInfo(*entry) 1377db96d56Sopenharmony_ci 1387db96d56Sopenharmony_ci # Cache the codec registry entry 1397db96d56Sopenharmony_ci _cache[encoding] = entry 1407db96d56Sopenharmony_ci 1417db96d56Sopenharmony_ci # Register its aliases (without overwriting previously registered 1427db96d56Sopenharmony_ci # aliases) 1437db96d56Sopenharmony_ci try: 1447db96d56Sopenharmony_ci codecaliases = mod.getaliases() 1457db96d56Sopenharmony_ci except AttributeError: 1467db96d56Sopenharmony_ci pass 1477db96d56Sopenharmony_ci else: 1487db96d56Sopenharmony_ci for alias in codecaliases: 1497db96d56Sopenharmony_ci if alias not in _aliases: 1507db96d56Sopenharmony_ci _aliases[alias] = modname 1517db96d56Sopenharmony_ci 1527db96d56Sopenharmony_ci # Return the registry entry 1537db96d56Sopenharmony_ci return entry 1547db96d56Sopenharmony_ci 1557db96d56Sopenharmony_ci# Register the search_function in the Python codec registry 1567db96d56Sopenharmony_cicodecs.register(search_function) 1577db96d56Sopenharmony_ci 1587db96d56Sopenharmony_ciif sys.platform == 'win32': 1597db96d56Sopenharmony_ci # bpo-671666, bpo-46668: If Python does not implement a codec for current 1607db96d56Sopenharmony_ci # Windows ANSI code page, use the "mbcs" codec instead: 1617db96d56Sopenharmony_ci # WideCharToMultiByte() and MultiByteToWideChar() functions with CP_ACP. 1627db96d56Sopenharmony_ci # Python does not support custom code pages. 1637db96d56Sopenharmony_ci def _alias_mbcs(encoding): 1647db96d56Sopenharmony_ci try: 1657db96d56Sopenharmony_ci import _winapi 1667db96d56Sopenharmony_ci ansi_code_page = "cp%s" % _winapi.GetACP() 1677db96d56Sopenharmony_ci if encoding == ansi_code_page: 1687db96d56Sopenharmony_ci import encodings.mbcs 1697db96d56Sopenharmony_ci return encodings.mbcs.getregentry() 1707db96d56Sopenharmony_ci except ImportError: 1717db96d56Sopenharmony_ci # Imports may fail while we are shutting down 1727db96d56Sopenharmony_ci pass 1737db96d56Sopenharmony_ci 1747db96d56Sopenharmony_ci codecs.register(_alias_mbcs) 175