17db96d56Sopenharmony_ci""" Standard "encodings" Package
27db96d56Sopenharmony_ci
37db96d56Sopenharmony_ci    Standard Python encoding modules are stored in this package
47db96d56Sopenharmony_ci    directory.
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ci    Codec modules must have names corresponding to normalized encoding
77db96d56Sopenharmony_ci    names as defined in the normalize_encoding() function below, e.g.
87db96d56Sopenharmony_ci    'utf-8' must be implemented by the module 'utf_8.py'.
97db96d56Sopenharmony_ci
107db96d56Sopenharmony_ci    Each codec module must export the following interface:
117db96d56Sopenharmony_ci
127db96d56Sopenharmony_ci    * getregentry() -> codecs.CodecInfo object
137db96d56Sopenharmony_ci    The getregentry() API must return a CodecInfo object with encoder, decoder,
147db96d56Sopenharmony_ci    incrementalencoder, incrementaldecoder, streamwriter and streamreader
157db96d56Sopenharmony_ci    attributes which adhere to the Python Codec Interface Standard.
167db96d56Sopenharmony_ci
177db96d56Sopenharmony_ci    In addition, a module may optionally also define the following
187db96d56Sopenharmony_ci    APIs which are then used by the package's codec search function:
197db96d56Sopenharmony_ci
207db96d56Sopenharmony_ci    * getaliases() -> sequence of encoding name strings to use as aliases
217db96d56Sopenharmony_ci
227db96d56Sopenharmony_ci    Alias names returned by getaliases() must be normalized encoding
237db96d56Sopenharmony_ci    names as defined by normalize_encoding().
247db96d56Sopenharmony_ci
257db96d56Sopenharmony_ciWritten by Marc-Andre Lemburg (mal@lemburg.com).
267db96d56Sopenharmony_ci
277db96d56Sopenharmony_ci(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_ci"""#"
307db96d56Sopenharmony_ci
317db96d56Sopenharmony_ciimport codecs
327db96d56Sopenharmony_ciimport sys
337db96d56Sopenharmony_cifrom . import aliases
347db96d56Sopenharmony_ci
357db96d56Sopenharmony_ci_cache = {}
367db96d56Sopenharmony_ci_unknown = '--unknown--'
377db96d56Sopenharmony_ci_import_tail = ['*']
387db96d56Sopenharmony_ci_aliases = aliases.aliases
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ciclass CodecRegistryError(LookupError, SystemError):
417db96d56Sopenharmony_ci    pass
427db96d56Sopenharmony_ci
437db96d56Sopenharmony_cidef normalize_encoding(encoding):
447db96d56Sopenharmony_ci
457db96d56Sopenharmony_ci    """ Normalize an encoding name.
467db96d56Sopenharmony_ci
477db96d56Sopenharmony_ci        Normalization works as follows: all non-alphanumeric
487db96d56Sopenharmony_ci        characters except the dot used for Python package names are
497db96d56Sopenharmony_ci        collapsed and replaced with a single underscore, e.g. '  -;#'
507db96d56Sopenharmony_ci        becomes '_'. Leading and trailing underscores are removed.
517db96d56Sopenharmony_ci
527db96d56Sopenharmony_ci        Note that encoding names should be ASCII only.
537db96d56Sopenharmony_ci
547db96d56Sopenharmony_ci    """
557db96d56Sopenharmony_ci    if isinstance(encoding, bytes):
567db96d56Sopenharmony_ci        encoding = str(encoding, "ascii")
577db96d56Sopenharmony_ci
587db96d56Sopenharmony_ci    chars = []
597db96d56Sopenharmony_ci    punct = False
607db96d56Sopenharmony_ci    for c in encoding:
617db96d56Sopenharmony_ci        if c.isalnum() or c == '.':
627db96d56Sopenharmony_ci            if punct and chars:
637db96d56Sopenharmony_ci                chars.append('_')
647db96d56Sopenharmony_ci            if c.isascii():
657db96d56Sopenharmony_ci                chars.append(c)
667db96d56Sopenharmony_ci            punct = False
677db96d56Sopenharmony_ci        else:
687db96d56Sopenharmony_ci            punct = True
697db96d56Sopenharmony_ci    return ''.join(chars)
707db96d56Sopenharmony_ci
717db96d56Sopenharmony_cidef search_function(encoding):
727db96d56Sopenharmony_ci
737db96d56Sopenharmony_ci    # Cache lookup
747db96d56Sopenharmony_ci    entry = _cache.get(encoding, _unknown)
757db96d56Sopenharmony_ci    if entry is not _unknown:
767db96d56Sopenharmony_ci        return entry
777db96d56Sopenharmony_ci
787db96d56Sopenharmony_ci    # Import the module:
797db96d56Sopenharmony_ci    #
807db96d56Sopenharmony_ci    # First try to find an alias for the normalized encoding
817db96d56Sopenharmony_ci    # name and lookup the module using the aliased name, then try to
827db96d56Sopenharmony_ci    # lookup the module using the standard import scheme, i.e. first
837db96d56Sopenharmony_ci    # try in the encodings package, then at top-level.
847db96d56Sopenharmony_ci    #
857db96d56Sopenharmony_ci    norm_encoding = normalize_encoding(encoding)
867db96d56Sopenharmony_ci    aliased_encoding = _aliases.get(norm_encoding) or \
877db96d56Sopenharmony_ci                       _aliases.get(norm_encoding.replace('.', '_'))
887db96d56Sopenharmony_ci    if aliased_encoding is not None:
897db96d56Sopenharmony_ci        modnames = [aliased_encoding,
907db96d56Sopenharmony_ci                    norm_encoding]
917db96d56Sopenharmony_ci    else:
927db96d56Sopenharmony_ci        modnames = [norm_encoding]
937db96d56Sopenharmony_ci    for modname in modnames:
947db96d56Sopenharmony_ci        if not modname or '.' in modname:
957db96d56Sopenharmony_ci            continue
967db96d56Sopenharmony_ci        try:
977db96d56Sopenharmony_ci            # Import is absolute to prevent the possibly malicious import of a
987db96d56Sopenharmony_ci            # module with side-effects that is not in the 'encodings' package.
997db96d56Sopenharmony_ci            mod = __import__('encodings.' + modname, fromlist=_import_tail,
1007db96d56Sopenharmony_ci                             level=0)
1017db96d56Sopenharmony_ci        except ImportError:
1027db96d56Sopenharmony_ci            # ImportError may occur because 'encodings.(modname)' does not exist,
1037db96d56Sopenharmony_ci            # or because it imports a name that does not exist (see mbcs and oem)
1047db96d56Sopenharmony_ci            pass
1057db96d56Sopenharmony_ci        else:
1067db96d56Sopenharmony_ci            break
1077db96d56Sopenharmony_ci    else:
1087db96d56Sopenharmony_ci        mod = None
1097db96d56Sopenharmony_ci
1107db96d56Sopenharmony_ci    try:
1117db96d56Sopenharmony_ci        getregentry = mod.getregentry
1127db96d56Sopenharmony_ci    except AttributeError:
1137db96d56Sopenharmony_ci        # Not a codec module
1147db96d56Sopenharmony_ci        mod = None
1157db96d56Sopenharmony_ci
1167db96d56Sopenharmony_ci    if mod is None:
1177db96d56Sopenharmony_ci        # Cache misses
1187db96d56Sopenharmony_ci        _cache[encoding] = None
1197db96d56Sopenharmony_ci        return None
1207db96d56Sopenharmony_ci
1217db96d56Sopenharmony_ci    # Now ask the module for the registry entry
1227db96d56Sopenharmony_ci    entry = getregentry()
1237db96d56Sopenharmony_ci    if not isinstance(entry, codecs.CodecInfo):
1247db96d56Sopenharmony_ci        if not 4 <= len(entry) <= 7:
1257db96d56Sopenharmony_ci            raise CodecRegistryError('module "%s" (%s) failed to register'
1267db96d56Sopenharmony_ci                                     % (mod.__name__, mod.__file__))
1277db96d56Sopenharmony_ci        if not callable(entry[0]) or not callable(entry[1]) or \
1287db96d56Sopenharmony_ci           (entry[2] is not None and not callable(entry[2])) or \
1297db96d56Sopenharmony_ci           (entry[3] is not None and not callable(entry[3])) or \
1307db96d56Sopenharmony_ci           (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \
1317db96d56Sopenharmony_ci           (len(entry) > 5 and entry[5] is not None and not callable(entry[5])):
1327db96d56Sopenharmony_ci            raise CodecRegistryError('incompatible codecs in module "%s" (%s)'
1337db96d56Sopenharmony_ci                                     % (mod.__name__, mod.__file__))
1347db96d56Sopenharmony_ci        if len(entry)<7 or entry[6] is None:
1357db96d56Sopenharmony_ci            entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)
1367db96d56Sopenharmony_ci        entry = codecs.CodecInfo(*entry)
1377db96d56Sopenharmony_ci
1387db96d56Sopenharmony_ci    # Cache the codec registry entry
1397db96d56Sopenharmony_ci    _cache[encoding] = entry
1407db96d56Sopenharmony_ci
1417db96d56Sopenharmony_ci    # Register its aliases (without overwriting previously registered
1427db96d56Sopenharmony_ci    # aliases)
1437db96d56Sopenharmony_ci    try:
1447db96d56Sopenharmony_ci        codecaliases = mod.getaliases()
1457db96d56Sopenharmony_ci    except AttributeError:
1467db96d56Sopenharmony_ci        pass
1477db96d56Sopenharmony_ci    else:
1487db96d56Sopenharmony_ci        for alias in codecaliases:
1497db96d56Sopenharmony_ci            if alias not in _aliases:
1507db96d56Sopenharmony_ci                _aliases[alias] = modname
1517db96d56Sopenharmony_ci
1527db96d56Sopenharmony_ci    # Return the registry entry
1537db96d56Sopenharmony_ci    return entry
1547db96d56Sopenharmony_ci
1557db96d56Sopenharmony_ci# Register the search_function in the Python codec registry
1567db96d56Sopenharmony_cicodecs.register(search_function)
1577db96d56Sopenharmony_ci
1587db96d56Sopenharmony_ciif sys.platform == 'win32':
1597db96d56Sopenharmony_ci    # bpo-671666, bpo-46668: If Python does not implement a codec for current
1607db96d56Sopenharmony_ci    # Windows ANSI code page, use the "mbcs" codec instead:
1617db96d56Sopenharmony_ci    # WideCharToMultiByte() and MultiByteToWideChar() functions with CP_ACP.
1627db96d56Sopenharmony_ci    # Python does not support custom code pages.
1637db96d56Sopenharmony_ci    def _alias_mbcs(encoding):
1647db96d56Sopenharmony_ci        try:
1657db96d56Sopenharmony_ci            import _winapi
1667db96d56Sopenharmony_ci            ansi_code_page = "cp%s" % _winapi.GetACP()
1677db96d56Sopenharmony_ci            if encoding == ansi_code_page:
1687db96d56Sopenharmony_ci                import encodings.mbcs
1697db96d56Sopenharmony_ci                return encodings.mbcs.getregentry()
1707db96d56Sopenharmony_ci        except ImportError:
1717db96d56Sopenharmony_ci            # Imports may fail while we are shutting down
1727db96d56Sopenharmony_ci            pass
1737db96d56Sopenharmony_ci
1747db96d56Sopenharmony_ci    codecs.register(_alias_mbcs)
175