17db96d56Sopenharmony_ci#
27db96d56Sopenharmony_ci# genmap_support.py: Multibyte Codec Map Generator
37db96d56Sopenharmony_ci#
47db96d56Sopenharmony_ci# Original Author:  Hye-Shik Chang <perky@FreeBSD.org>
57db96d56Sopenharmony_ci# Modified Author:  Dong-hee Na <donghee.na92@gmail.com>
67db96d56Sopenharmony_ci#
77db96d56Sopenharmony_ci
87db96d56Sopenharmony_ci
97db96d56Sopenharmony_ciclass BufferedFiller:
107db96d56Sopenharmony_ci    def __init__(self, column=78):
117db96d56Sopenharmony_ci        self.column = column
127db96d56Sopenharmony_ci        self.buffered = []
137db96d56Sopenharmony_ci        self.cline = []
147db96d56Sopenharmony_ci        self.clen = 0
157db96d56Sopenharmony_ci        self.count = 0
167db96d56Sopenharmony_ci
177db96d56Sopenharmony_ci    def write(self, *data):
187db96d56Sopenharmony_ci        for s in data:
197db96d56Sopenharmony_ci            if len(s) > self.column:
207db96d56Sopenharmony_ci                raise ValueError("token is too long")
217db96d56Sopenharmony_ci            if len(s) + self.clen > self.column:
227db96d56Sopenharmony_ci                self.flush()
237db96d56Sopenharmony_ci            self.clen += len(s)
247db96d56Sopenharmony_ci            self.cline.append(s)
257db96d56Sopenharmony_ci            self.count += 1
267db96d56Sopenharmony_ci
277db96d56Sopenharmony_ci    def flush(self):
287db96d56Sopenharmony_ci        if not self.cline:
297db96d56Sopenharmony_ci            return
307db96d56Sopenharmony_ci        self.buffered.append(''.join(self.cline))
317db96d56Sopenharmony_ci        self.clen = 0
327db96d56Sopenharmony_ci        del self.cline[:]
337db96d56Sopenharmony_ci
347db96d56Sopenharmony_ci    def printout(self, fp):
357db96d56Sopenharmony_ci        self.flush()
367db96d56Sopenharmony_ci        for l in self.buffered:
377db96d56Sopenharmony_ci            fp.write(f'{l}\n')
387db96d56Sopenharmony_ci        del self.buffered[:]
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ci    def __len__(self):
417db96d56Sopenharmony_ci        return self.count
427db96d56Sopenharmony_ci
437db96d56Sopenharmony_ci
447db96d56Sopenharmony_ciclass DecodeMapWriter:
457db96d56Sopenharmony_ci    filler_class = BufferedFiller
467db96d56Sopenharmony_ci
477db96d56Sopenharmony_ci    def __init__(self, fp, prefix, decode_map):
487db96d56Sopenharmony_ci        self.fp = fp
497db96d56Sopenharmony_ci        self.prefix = prefix
507db96d56Sopenharmony_ci        self.decode_map = decode_map
517db96d56Sopenharmony_ci        self.filler = self.filler_class()
527db96d56Sopenharmony_ci
537db96d56Sopenharmony_ci    def update_decode_map(self, c1range, c2range, onlymask=(), wide=0):
547db96d56Sopenharmony_ci        c2values = range(c2range[0], c2range[1] + 1)
557db96d56Sopenharmony_ci
567db96d56Sopenharmony_ci        for c1 in range(c1range[0], c1range[1] + 1):
577db96d56Sopenharmony_ci            if c1 not in self.decode_map or (onlymask and c1 not in onlymask):
587db96d56Sopenharmony_ci                continue
597db96d56Sopenharmony_ci            c2map = self.decode_map[c1]
607db96d56Sopenharmony_ci            rc2values = [n for n in c2values if n in c2map]
617db96d56Sopenharmony_ci            if not rc2values:
627db96d56Sopenharmony_ci                continue
637db96d56Sopenharmony_ci
647db96d56Sopenharmony_ci            c2map[self.prefix] = True
657db96d56Sopenharmony_ci            c2map['min'] = rc2values[0]
667db96d56Sopenharmony_ci            c2map['max'] = rc2values[-1]
677db96d56Sopenharmony_ci            c2map['midx'] = len(self.filler)
687db96d56Sopenharmony_ci
697db96d56Sopenharmony_ci            for v in range(rc2values[0], rc2values[-1] + 1):
707db96d56Sopenharmony_ci                if v in c2map:
717db96d56Sopenharmony_ci                    self.filler.write('%d,' % c2map[v])
727db96d56Sopenharmony_ci                else:
737db96d56Sopenharmony_ci                    self.filler.write('U,')
747db96d56Sopenharmony_ci
757db96d56Sopenharmony_ci    def generate(self, wide=False):
767db96d56Sopenharmony_ci        if not wide:
777db96d56Sopenharmony_ci            self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
787db96d56Sopenharmony_ci        else:
797db96d56Sopenharmony_ci            self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
807db96d56Sopenharmony_ci
817db96d56Sopenharmony_ci        self.filler.printout(self.fp)
827db96d56Sopenharmony_ci        self.fp.write("};\n\n")
837db96d56Sopenharmony_ci
847db96d56Sopenharmony_ci        if not wide:
857db96d56Sopenharmony_ci            self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n")
867db96d56Sopenharmony_ci        else:
877db96d56Sopenharmony_ci            self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n")
887db96d56Sopenharmony_ci
897db96d56Sopenharmony_ci        for i in range(256):
907db96d56Sopenharmony_ci            if i in self.decode_map and self.prefix in self.decode_map[i]:
917db96d56Sopenharmony_ci                m = self.decode_map
927db96d56Sopenharmony_ci                prefix = self.prefix
937db96d56Sopenharmony_ci            else:
947db96d56Sopenharmony_ci                self.filler.write("{", "0,", "0,", "0", "},")
957db96d56Sopenharmony_ci                continue
967db96d56Sopenharmony_ci
977db96d56Sopenharmony_ci            self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'],
987db96d56Sopenharmony_ci                              ",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},")
997db96d56Sopenharmony_ci        self.filler.printout(self.fp)
1007db96d56Sopenharmony_ci        self.fp.write("};\n\n")
1017db96d56Sopenharmony_ci
1027db96d56Sopenharmony_ci
1037db96d56Sopenharmony_ciclass EncodeMapWriter:
1047db96d56Sopenharmony_ci    filler_class = BufferedFiller
1057db96d56Sopenharmony_ci    elemtype = 'DBCHAR'
1067db96d56Sopenharmony_ci    indextype = 'struct unim_index'
1077db96d56Sopenharmony_ci
1087db96d56Sopenharmony_ci    def __init__(self, fp, prefix, encode_map):
1097db96d56Sopenharmony_ci        self.fp = fp
1107db96d56Sopenharmony_ci        self.prefix = prefix
1117db96d56Sopenharmony_ci        self.encode_map = encode_map
1127db96d56Sopenharmony_ci        self.filler = self.filler_class()
1137db96d56Sopenharmony_ci
1147db96d56Sopenharmony_ci    def generate(self):
1157db96d56Sopenharmony_ci        self.buildmap()
1167db96d56Sopenharmony_ci        self.printmap()
1177db96d56Sopenharmony_ci
1187db96d56Sopenharmony_ci    def buildmap(self):
1197db96d56Sopenharmony_ci        for c1 in range(0, 256):
1207db96d56Sopenharmony_ci            if c1 not in self.encode_map:
1217db96d56Sopenharmony_ci                continue
1227db96d56Sopenharmony_ci            c2map = self.encode_map[c1]
1237db96d56Sopenharmony_ci            rc2values = [k for k in c2map.keys()]
1247db96d56Sopenharmony_ci            rc2values.sort()
1257db96d56Sopenharmony_ci            if not rc2values:
1267db96d56Sopenharmony_ci                continue
1277db96d56Sopenharmony_ci
1287db96d56Sopenharmony_ci            c2map[self.prefix] = True
1297db96d56Sopenharmony_ci            c2map['min'] = rc2values[0]
1307db96d56Sopenharmony_ci            c2map['max'] = rc2values[-1]
1317db96d56Sopenharmony_ci            c2map['midx'] = len(self.filler)
1327db96d56Sopenharmony_ci
1337db96d56Sopenharmony_ci            for v in range(rc2values[0], rc2values[-1] + 1):
1347db96d56Sopenharmony_ci                if v not in c2map:
1357db96d56Sopenharmony_ci                    self.write_nochar()
1367db96d56Sopenharmony_ci                elif isinstance(c2map[v], int):
1377db96d56Sopenharmony_ci                    self.write_char(c2map[v])
1387db96d56Sopenharmony_ci                elif isinstance(c2map[v], tuple):
1397db96d56Sopenharmony_ci                    self.write_multic(c2map[v])
1407db96d56Sopenharmony_ci                else:
1417db96d56Sopenharmony_ci                    raise ValueError
1427db96d56Sopenharmony_ci
1437db96d56Sopenharmony_ci    def write_nochar(self):
1447db96d56Sopenharmony_ci        self.filler.write('N,')
1457db96d56Sopenharmony_ci
1467db96d56Sopenharmony_ci    def write_multic(self, point):
1477db96d56Sopenharmony_ci        self.filler.write('M,')
1487db96d56Sopenharmony_ci
1497db96d56Sopenharmony_ci    def write_char(self, point):
1507db96d56Sopenharmony_ci        self.filler.write(str(point) + ',')
1517db96d56Sopenharmony_ci
1527db96d56Sopenharmony_ci    def printmap(self):
1537db96d56Sopenharmony_ci        self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n")
1547db96d56Sopenharmony_ci        self.filler.printout(self.fp)
1557db96d56Sopenharmony_ci        self.fp.write("};\n\n")
1567db96d56Sopenharmony_ci        self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n")
1577db96d56Sopenharmony_ci
1587db96d56Sopenharmony_ci        for i in range(256):
1597db96d56Sopenharmony_ci            if i in self.encode_map and self.prefix in self.encode_map[i]:
1607db96d56Sopenharmony_ci                self.filler.write("{", "__%s_encmap" % self.prefix, "+",
1617db96d56Sopenharmony_ci                                  "%d" % self.encode_map[i]['midx'], ",",
1627db96d56Sopenharmony_ci                                  "%d," % self.encode_map[i]['min'],
1637db96d56Sopenharmony_ci                                  "%d" % self.encode_map[i]['max'], "},")
1647db96d56Sopenharmony_ci            else:
1657db96d56Sopenharmony_ci                self.filler.write("{", "0,", "0,", "0", "},")
1667db96d56Sopenharmony_ci                continue
1677db96d56Sopenharmony_ci        self.filler.printout(self.fp)
1687db96d56Sopenharmony_ci        self.fp.write("};\n\n")
1697db96d56Sopenharmony_ci
1707db96d56Sopenharmony_ci
1717db96d56Sopenharmony_cidef open_mapping_file(path, source):
1727db96d56Sopenharmony_ci    try:
1737db96d56Sopenharmony_ci        f = open(path)
1747db96d56Sopenharmony_ci    except IOError:
1757db96d56Sopenharmony_ci        raise SystemExit(f'{source} is needed')
1767db96d56Sopenharmony_ci    return f
1777db96d56Sopenharmony_ci
1787db96d56Sopenharmony_ci
1797db96d56Sopenharmony_cidef print_autogen(fo, source):
1807db96d56Sopenharmony_ci    fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n')
1817db96d56Sopenharmony_ci
1827db96d56Sopenharmony_ci
1837db96d56Sopenharmony_cidef loadmap(fo, natcol=0, unicol=1, sbcs=0):
1847db96d56Sopenharmony_ci    print("Loading from", fo)
1857db96d56Sopenharmony_ci    fo.seek(0, 0)
1867db96d56Sopenharmony_ci    decmap = {}
1877db96d56Sopenharmony_ci    for line in fo:
1887db96d56Sopenharmony_ci        line = line.split('#', 1)[0].strip()
1897db96d56Sopenharmony_ci        if not line or len(line.split()) < 2:
1907db96d56Sopenharmony_ci            continue
1917db96d56Sopenharmony_ci
1927db96d56Sopenharmony_ci        row = [eval(e) for e in line.split()]
1937db96d56Sopenharmony_ci        loc, uni = row[natcol], row[unicol]
1947db96d56Sopenharmony_ci        if loc >= 0x100 or sbcs:
1957db96d56Sopenharmony_ci            decmap.setdefault((loc >> 8), {})
1967db96d56Sopenharmony_ci            decmap[(loc >> 8)][(loc & 0xff)] = uni
1977db96d56Sopenharmony_ci
1987db96d56Sopenharmony_ci    return decmap
199