17db96d56Sopenharmony_ci# 27db96d56Sopenharmony_ci# genmap_support.py: Multibyte Codec Map Generator 37db96d56Sopenharmony_ci# 47db96d56Sopenharmony_ci# Original Author: Hye-Shik Chang <perky@FreeBSD.org> 57db96d56Sopenharmony_ci# Modified Author: Dong-hee Na <donghee.na92@gmail.com> 67db96d56Sopenharmony_ci# 77db96d56Sopenharmony_ci 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_ciclass BufferedFiller: 107db96d56Sopenharmony_ci def __init__(self, column=78): 117db96d56Sopenharmony_ci self.column = column 127db96d56Sopenharmony_ci self.buffered = [] 137db96d56Sopenharmony_ci self.cline = [] 147db96d56Sopenharmony_ci self.clen = 0 157db96d56Sopenharmony_ci self.count = 0 167db96d56Sopenharmony_ci 177db96d56Sopenharmony_ci def write(self, *data): 187db96d56Sopenharmony_ci for s in data: 197db96d56Sopenharmony_ci if len(s) > self.column: 207db96d56Sopenharmony_ci raise ValueError("token is too long") 217db96d56Sopenharmony_ci if len(s) + self.clen > self.column: 227db96d56Sopenharmony_ci self.flush() 237db96d56Sopenharmony_ci self.clen += len(s) 247db96d56Sopenharmony_ci self.cline.append(s) 257db96d56Sopenharmony_ci self.count += 1 267db96d56Sopenharmony_ci 277db96d56Sopenharmony_ci def flush(self): 287db96d56Sopenharmony_ci if not self.cline: 297db96d56Sopenharmony_ci return 307db96d56Sopenharmony_ci self.buffered.append(''.join(self.cline)) 317db96d56Sopenharmony_ci self.clen = 0 327db96d56Sopenharmony_ci del self.cline[:] 337db96d56Sopenharmony_ci 347db96d56Sopenharmony_ci def printout(self, fp): 357db96d56Sopenharmony_ci self.flush() 367db96d56Sopenharmony_ci for l in self.buffered: 377db96d56Sopenharmony_ci fp.write(f'{l}\n') 387db96d56Sopenharmony_ci del self.buffered[:] 397db96d56Sopenharmony_ci 407db96d56Sopenharmony_ci def __len__(self): 417db96d56Sopenharmony_ci return self.count 427db96d56Sopenharmony_ci 437db96d56Sopenharmony_ci 447db96d56Sopenharmony_ciclass DecodeMapWriter: 457db96d56Sopenharmony_ci filler_class = BufferedFiller 467db96d56Sopenharmony_ci 477db96d56Sopenharmony_ci def __init__(self, fp, prefix, decode_map): 487db96d56Sopenharmony_ci self.fp = fp 497db96d56Sopenharmony_ci self.prefix = prefix 507db96d56Sopenharmony_ci self.decode_map = decode_map 517db96d56Sopenharmony_ci self.filler = self.filler_class() 527db96d56Sopenharmony_ci 537db96d56Sopenharmony_ci def update_decode_map(self, c1range, c2range, onlymask=(), wide=0): 547db96d56Sopenharmony_ci c2values = range(c2range[0], c2range[1] + 1) 557db96d56Sopenharmony_ci 567db96d56Sopenharmony_ci for c1 in range(c1range[0], c1range[1] + 1): 577db96d56Sopenharmony_ci if c1 not in self.decode_map or (onlymask and c1 not in onlymask): 587db96d56Sopenharmony_ci continue 597db96d56Sopenharmony_ci c2map = self.decode_map[c1] 607db96d56Sopenharmony_ci rc2values = [n for n in c2values if n in c2map] 617db96d56Sopenharmony_ci if not rc2values: 627db96d56Sopenharmony_ci continue 637db96d56Sopenharmony_ci 647db96d56Sopenharmony_ci c2map[self.prefix] = True 657db96d56Sopenharmony_ci c2map['min'] = rc2values[0] 667db96d56Sopenharmony_ci c2map['max'] = rc2values[-1] 677db96d56Sopenharmony_ci c2map['midx'] = len(self.filler) 687db96d56Sopenharmony_ci 697db96d56Sopenharmony_ci for v in range(rc2values[0], rc2values[-1] + 1): 707db96d56Sopenharmony_ci if v in c2map: 717db96d56Sopenharmony_ci self.filler.write('%d,' % c2map[v]) 727db96d56Sopenharmony_ci else: 737db96d56Sopenharmony_ci self.filler.write('U,') 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_ci def generate(self, wide=False): 767db96d56Sopenharmony_ci if not wide: 777db96d56Sopenharmony_ci self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n") 787db96d56Sopenharmony_ci else: 797db96d56Sopenharmony_ci self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n") 807db96d56Sopenharmony_ci 817db96d56Sopenharmony_ci self.filler.printout(self.fp) 827db96d56Sopenharmony_ci self.fp.write("};\n\n") 837db96d56Sopenharmony_ci 847db96d56Sopenharmony_ci if not wide: 857db96d56Sopenharmony_ci self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n") 867db96d56Sopenharmony_ci else: 877db96d56Sopenharmony_ci self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n") 887db96d56Sopenharmony_ci 897db96d56Sopenharmony_ci for i in range(256): 907db96d56Sopenharmony_ci if i in self.decode_map and self.prefix in self.decode_map[i]: 917db96d56Sopenharmony_ci m = self.decode_map 927db96d56Sopenharmony_ci prefix = self.prefix 937db96d56Sopenharmony_ci else: 947db96d56Sopenharmony_ci self.filler.write("{", "0,", "0,", "0", "},") 957db96d56Sopenharmony_ci continue 967db96d56Sopenharmony_ci 977db96d56Sopenharmony_ci self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'], 987db96d56Sopenharmony_ci ",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},") 997db96d56Sopenharmony_ci self.filler.printout(self.fp) 1007db96d56Sopenharmony_ci self.fp.write("};\n\n") 1017db96d56Sopenharmony_ci 1027db96d56Sopenharmony_ci 1037db96d56Sopenharmony_ciclass EncodeMapWriter: 1047db96d56Sopenharmony_ci filler_class = BufferedFiller 1057db96d56Sopenharmony_ci elemtype = 'DBCHAR' 1067db96d56Sopenharmony_ci indextype = 'struct unim_index' 1077db96d56Sopenharmony_ci 1087db96d56Sopenharmony_ci def __init__(self, fp, prefix, encode_map): 1097db96d56Sopenharmony_ci self.fp = fp 1107db96d56Sopenharmony_ci self.prefix = prefix 1117db96d56Sopenharmony_ci self.encode_map = encode_map 1127db96d56Sopenharmony_ci self.filler = self.filler_class() 1137db96d56Sopenharmony_ci 1147db96d56Sopenharmony_ci def generate(self): 1157db96d56Sopenharmony_ci self.buildmap() 1167db96d56Sopenharmony_ci self.printmap() 1177db96d56Sopenharmony_ci 1187db96d56Sopenharmony_ci def buildmap(self): 1197db96d56Sopenharmony_ci for c1 in range(0, 256): 1207db96d56Sopenharmony_ci if c1 not in self.encode_map: 1217db96d56Sopenharmony_ci continue 1227db96d56Sopenharmony_ci c2map = self.encode_map[c1] 1237db96d56Sopenharmony_ci rc2values = [k for k in c2map.keys()] 1247db96d56Sopenharmony_ci rc2values.sort() 1257db96d56Sopenharmony_ci if not rc2values: 1267db96d56Sopenharmony_ci continue 1277db96d56Sopenharmony_ci 1287db96d56Sopenharmony_ci c2map[self.prefix] = True 1297db96d56Sopenharmony_ci c2map['min'] = rc2values[0] 1307db96d56Sopenharmony_ci c2map['max'] = rc2values[-1] 1317db96d56Sopenharmony_ci c2map['midx'] = len(self.filler) 1327db96d56Sopenharmony_ci 1337db96d56Sopenharmony_ci for v in range(rc2values[0], rc2values[-1] + 1): 1347db96d56Sopenharmony_ci if v not in c2map: 1357db96d56Sopenharmony_ci self.write_nochar() 1367db96d56Sopenharmony_ci elif isinstance(c2map[v], int): 1377db96d56Sopenharmony_ci self.write_char(c2map[v]) 1387db96d56Sopenharmony_ci elif isinstance(c2map[v], tuple): 1397db96d56Sopenharmony_ci self.write_multic(c2map[v]) 1407db96d56Sopenharmony_ci else: 1417db96d56Sopenharmony_ci raise ValueError 1427db96d56Sopenharmony_ci 1437db96d56Sopenharmony_ci def write_nochar(self): 1447db96d56Sopenharmony_ci self.filler.write('N,') 1457db96d56Sopenharmony_ci 1467db96d56Sopenharmony_ci def write_multic(self, point): 1477db96d56Sopenharmony_ci self.filler.write('M,') 1487db96d56Sopenharmony_ci 1497db96d56Sopenharmony_ci def write_char(self, point): 1507db96d56Sopenharmony_ci self.filler.write(str(point) + ',') 1517db96d56Sopenharmony_ci 1527db96d56Sopenharmony_ci def printmap(self): 1537db96d56Sopenharmony_ci self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n") 1547db96d56Sopenharmony_ci self.filler.printout(self.fp) 1557db96d56Sopenharmony_ci self.fp.write("};\n\n") 1567db96d56Sopenharmony_ci self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n") 1577db96d56Sopenharmony_ci 1587db96d56Sopenharmony_ci for i in range(256): 1597db96d56Sopenharmony_ci if i in self.encode_map and self.prefix in self.encode_map[i]: 1607db96d56Sopenharmony_ci self.filler.write("{", "__%s_encmap" % self.prefix, "+", 1617db96d56Sopenharmony_ci "%d" % self.encode_map[i]['midx'], ",", 1627db96d56Sopenharmony_ci "%d," % self.encode_map[i]['min'], 1637db96d56Sopenharmony_ci "%d" % self.encode_map[i]['max'], "},") 1647db96d56Sopenharmony_ci else: 1657db96d56Sopenharmony_ci self.filler.write("{", "0,", "0,", "0", "},") 1667db96d56Sopenharmony_ci continue 1677db96d56Sopenharmony_ci self.filler.printout(self.fp) 1687db96d56Sopenharmony_ci self.fp.write("};\n\n") 1697db96d56Sopenharmony_ci 1707db96d56Sopenharmony_ci 1717db96d56Sopenharmony_cidef open_mapping_file(path, source): 1727db96d56Sopenharmony_ci try: 1737db96d56Sopenharmony_ci f = open(path) 1747db96d56Sopenharmony_ci except IOError: 1757db96d56Sopenharmony_ci raise SystemExit(f'{source} is needed') 1767db96d56Sopenharmony_ci return f 1777db96d56Sopenharmony_ci 1787db96d56Sopenharmony_ci 1797db96d56Sopenharmony_cidef print_autogen(fo, source): 1807db96d56Sopenharmony_ci fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n') 1817db96d56Sopenharmony_ci 1827db96d56Sopenharmony_ci 1837db96d56Sopenharmony_cidef loadmap(fo, natcol=0, unicol=1, sbcs=0): 1847db96d56Sopenharmony_ci print("Loading from", fo) 1857db96d56Sopenharmony_ci fo.seek(0, 0) 1867db96d56Sopenharmony_ci decmap = {} 1877db96d56Sopenharmony_ci for line in fo: 1887db96d56Sopenharmony_ci line = line.split('#', 1)[0].strip() 1897db96d56Sopenharmony_ci if not line or len(line.split()) < 2: 1907db96d56Sopenharmony_ci continue 1917db96d56Sopenharmony_ci 1927db96d56Sopenharmony_ci row = [eval(e) for e in line.split()] 1937db96d56Sopenharmony_ci loc, uni = row[natcol], row[unicol] 1947db96d56Sopenharmony_ci if loc >= 0x100 or sbcs: 1957db96d56Sopenharmony_ci decmap.setdefault((loc >> 8), {}) 1967db96d56Sopenharmony_ci decmap[(loc >> 8)][(loc & 0xff)] = uni 1977db96d56Sopenharmony_ci 1987db96d56Sopenharmony_ci return decmap 199