17db96d56Sopenharmony_ci# 27db96d56Sopenharmony_ci# genmap_ja_codecs.py: Japanese Codecs Map Generator 37db96d56Sopenharmony_ci# 47db96d56Sopenharmony_ci# Original Author: Hye-Shik Chang <perky@FreeBSD.org> 57db96d56Sopenharmony_ci# Modified Author: Dong-hee Na <donghee.na92@gmail.com> 67db96d56Sopenharmony_ci# 77db96d56Sopenharmony_ciimport os 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_cifrom genmap_support import * 107db96d56Sopenharmony_ci 117db96d56Sopenharmony_ciJISX0208_C1 = (0x21, 0x74) 127db96d56Sopenharmony_ciJISX0208_C2 = (0x21, 0x7e) 137db96d56Sopenharmony_ciJISX0212_C1 = (0x22, 0x6d) 147db96d56Sopenharmony_ciJISX0212_C2 = (0x21, 0x7e) 157db96d56Sopenharmony_ciJISX0213_C1 = (0x21, 0x7e) 167db96d56Sopenharmony_ciJISX0213_C2 = (0x21, 0x7e) 177db96d56Sopenharmony_ciCP932P0_C1 = (0x81, 0x81) # patches between shift-jis and cp932 187db96d56Sopenharmony_ciCP932P0_C2 = (0x5f, 0xca) 197db96d56Sopenharmony_ciCP932P1_C1 = (0x87, 0x87) # CP932 P1 207db96d56Sopenharmony_ciCP932P1_C2 = (0x40, 0x9c) 217db96d56Sopenharmony_ciCP932P2_C1 = (0xed, 0xfc) # CP932 P2 227db96d56Sopenharmony_ciCP932P2_C2 = (0x40, 0xfc) 237db96d56Sopenharmony_ci 247db96d56Sopenharmony_ciMAPPINGS_JIS0208 = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT' 257db96d56Sopenharmony_ciMAPPINGS_JIS0212 = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT' 267db96d56Sopenharmony_ciMAPPINGS_CP932 = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT' 277db96d56Sopenharmony_ciMAPPINGS_JISX0213_2004 = 'http://wakaba-web.hp.infoseek.co.jp/table/jisx0213-2004-std.txt' 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ci 307db96d56Sopenharmony_cidef loadmap_jisx0213(fo): 317db96d56Sopenharmony_ci decmap3, decmap4 = {}, {} # maps to BMP for level 3 and 4 327db96d56Sopenharmony_ci decmap3_2, decmap4_2 = {}, {} # maps to U+2xxxx for level 3 and 4 337db96d56Sopenharmony_ci decmap3_pair = {} # maps to BMP-pair for level 3 347db96d56Sopenharmony_ci for line in fo: 357db96d56Sopenharmony_ci line = line.split('#', 1)[0].strip() 367db96d56Sopenharmony_ci if not line or len(line.split()) < 2: 377db96d56Sopenharmony_ci continue 387db96d56Sopenharmony_ci 397db96d56Sopenharmony_ci row = line.split() 407db96d56Sopenharmony_ci loc = eval('0x' + row[0][2:]) 417db96d56Sopenharmony_ci level = eval(row[0][0]) 427db96d56Sopenharmony_ci m = None 437db96d56Sopenharmony_ci if len(row[1].split('+')) == 2: # single unicode 447db96d56Sopenharmony_ci uni = eval('0x' + row[1][2:]) 457db96d56Sopenharmony_ci if level == 3: 467db96d56Sopenharmony_ci if uni < 0x10000: 477db96d56Sopenharmony_ci m = decmap3 487db96d56Sopenharmony_ci elif 0x20000 <= uni < 0x30000: 497db96d56Sopenharmony_ci uni -= 0x20000 507db96d56Sopenharmony_ci m = decmap3_2 517db96d56Sopenharmony_ci elif level == 4: 527db96d56Sopenharmony_ci if uni < 0x10000: 537db96d56Sopenharmony_ci m = decmap4 547db96d56Sopenharmony_ci elif 0x20000 <= uni < 0x30000: 557db96d56Sopenharmony_ci uni -= 0x20000 567db96d56Sopenharmony_ci m = decmap4_2 577db96d56Sopenharmony_ci m.setdefault((loc >> 8), {}) 587db96d56Sopenharmony_ci m[(loc >> 8)][(loc & 0xff)] = uni 597db96d56Sopenharmony_ci else: # pair 607db96d56Sopenharmony_ci uniprefix = eval('0x' + row[1][2:6]) # body 617db96d56Sopenharmony_ci uni = eval('0x' + row[1][7:11]) # modifier 627db96d56Sopenharmony_ci if level != 3: 637db96d56Sopenharmony_ci raise ValueError("invalid map") 647db96d56Sopenharmony_ci decmap3_pair.setdefault(uniprefix, {}) 657db96d56Sopenharmony_ci m = decmap3_pair[uniprefix] 667db96d56Sopenharmony_ci 677db96d56Sopenharmony_ci if m is None: 687db96d56Sopenharmony_ci raise ValueError("invalid map") 697db96d56Sopenharmony_ci m.setdefault((loc >> 8), {}) 707db96d56Sopenharmony_ci m[(loc >> 8)][(loc & 0xff)] = uni 717db96d56Sopenharmony_ci 727db96d56Sopenharmony_ci return decmap3, decmap4, decmap3_2, decmap4_2, decmap3_pair 737db96d56Sopenharmony_ci 747db96d56Sopenharmony_ci 757db96d56Sopenharmony_cidef main(): 767db96d56Sopenharmony_ci jisx0208file = open_mapping_file('python-mappings/JIS0208.TXT', MAPPINGS_JIS0208) 777db96d56Sopenharmony_ci jisx0212file = open_mapping_file('python-mappings/JIS0212.TXT', MAPPINGS_JIS0212) 787db96d56Sopenharmony_ci cp932file = open_mapping_file('python-mappings/CP932.TXT', MAPPINGS_CP932) 797db96d56Sopenharmony_ci jisx0213file = open_mapping_file('python-mappings/jisx0213-2004-std.txt', MAPPINGS_JISX0213_2004) 807db96d56Sopenharmony_ci 817db96d56Sopenharmony_ci print("Loading Mapping File...") 827db96d56Sopenharmony_ci 837db96d56Sopenharmony_ci sjisdecmap = loadmap(jisx0208file, natcol=0, unicol=2) 847db96d56Sopenharmony_ci jisx0208decmap = loadmap(jisx0208file, natcol=1, unicol=2) 857db96d56Sopenharmony_ci jisx0212decmap = loadmap(jisx0212file) 867db96d56Sopenharmony_ci cp932decmap = loadmap(cp932file) 877db96d56Sopenharmony_ci jis3decmap, jis4decmap, jis3_2_decmap, jis4_2_decmap, jis3_pairdecmap = loadmap_jisx0213(jisx0213file) 887db96d56Sopenharmony_ci 897db96d56Sopenharmony_ci if jis3decmap[0x21][0x24] != 0xff0c: 907db96d56Sopenharmony_ci raise SystemExit('Please adjust your JIS X 0213 map using jisx0213-2000-std.txt.diff') 917db96d56Sopenharmony_ci 927db96d56Sopenharmony_ci sjisencmap, cp932encmap = {}, {} 937db96d56Sopenharmony_ci jisx0208_0212encmap = {} 947db96d56Sopenharmony_ci for c1, m in sjisdecmap.items(): 957db96d56Sopenharmony_ci for c2, code in m.items(): 967db96d56Sopenharmony_ci sjisencmap.setdefault(code >> 8, {}) 977db96d56Sopenharmony_ci sjisencmap[code >> 8][code & 0xff] = c1 << 8 | c2 987db96d56Sopenharmony_ci for c1, m in cp932decmap.items(): 997db96d56Sopenharmony_ci for c2, code in m.items(): 1007db96d56Sopenharmony_ci cp932encmap.setdefault(code >> 8, {}) 1017db96d56Sopenharmony_ci if (code & 0xff) not in cp932encmap[code >> 8]: 1027db96d56Sopenharmony_ci cp932encmap[code >> 8][code & 0xff] = c1 << 8 | c2 1037db96d56Sopenharmony_ci for c1, m in cp932encmap.copy().items(): 1047db96d56Sopenharmony_ci for c2, code in m.copy().items(): 1057db96d56Sopenharmony_ci if c1 in sjisencmap and c2 in sjisencmap[c1] and sjisencmap[c1][c2] == code: 1067db96d56Sopenharmony_ci del cp932encmap[c1][c2] 1077db96d56Sopenharmony_ci if not cp932encmap[c1]: 1087db96d56Sopenharmony_ci del cp932encmap[c1] 1097db96d56Sopenharmony_ci 1107db96d56Sopenharmony_ci jisx0213pairdecmap = {} 1117db96d56Sopenharmony_ci jisx0213pairencmap = [] 1127db96d56Sopenharmony_ci for unibody, m1 in jis3_pairdecmap.items(): 1137db96d56Sopenharmony_ci for c1, m2 in m1.items(): 1147db96d56Sopenharmony_ci for c2, modifier in m2.items(): 1157db96d56Sopenharmony_ci jisx0213pairencmap.append((unibody, modifier, c1 << 8 | c2)) 1167db96d56Sopenharmony_ci jisx0213pairdecmap.setdefault(c1, {}) 1177db96d56Sopenharmony_ci jisx0213pairdecmap[c1][c2] = unibody << 16 | modifier 1187db96d56Sopenharmony_ci 1197db96d56Sopenharmony_ci # Twinmap for both of JIS X 0208 (MSB unset) and JIS X 0212 (MSB set) 1207db96d56Sopenharmony_ci for c1, m in jisx0208decmap.items(): 1217db96d56Sopenharmony_ci for c2, code in m.items(): 1227db96d56Sopenharmony_ci jisx0208_0212encmap.setdefault(code >> 8, {}) 1237db96d56Sopenharmony_ci jisx0208_0212encmap[code >> 8][code & 0xff] = c1 << 8 | c2 1247db96d56Sopenharmony_ci 1257db96d56Sopenharmony_ci for c1, m in jisx0212decmap.items(): 1267db96d56Sopenharmony_ci for c2, code in m.items(): 1277db96d56Sopenharmony_ci jisx0208_0212encmap.setdefault(code >> 8, {}) 1287db96d56Sopenharmony_ci if (code & 0xff) in jisx0208_0212encmap[code >> 8]: 1297db96d56Sopenharmony_ci print("OOPS!!!", (code)) 1307db96d56Sopenharmony_ci jisx0208_0212encmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 1317db96d56Sopenharmony_ci 1327db96d56Sopenharmony_ci jisx0213bmpencmap = {} 1337db96d56Sopenharmony_ci for c1, m in jis3decmap.copy().items(): 1347db96d56Sopenharmony_ci for c2, code in m.copy().items(): 1357db96d56Sopenharmony_ci if c1 in jisx0208decmap and c2 in jisx0208decmap[c1]: 1367db96d56Sopenharmony_ci if code in jis3_pairdecmap: 1377db96d56Sopenharmony_ci jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair 1387db96d56Sopenharmony_ci jisx0213pairencmap.append((code, 0, c1 << 8 | c2)) 1397db96d56Sopenharmony_ci elif jisx0208decmap[c1][c2] == code: 1407db96d56Sopenharmony_ci del jis3decmap[c1][c2] 1417db96d56Sopenharmony_ci if not jis3decmap[c1]: 1427db96d56Sopenharmony_ci del jis3decmap[c1] 1437db96d56Sopenharmony_ci else: 1447db96d56Sopenharmony_ci raise ValueError("Difference between JIS X 0208 and JIS X 0213 Plane 1 is found.") 1457db96d56Sopenharmony_ci else: 1467db96d56Sopenharmony_ci jisx0213bmpencmap.setdefault(code >> 8, {}) 1477db96d56Sopenharmony_ci if code not in jis3_pairdecmap: 1487db96d56Sopenharmony_ci jisx0213bmpencmap[code >> 8][code & 0xff] = c1 << 8 | c2 1497db96d56Sopenharmony_ci else: 1507db96d56Sopenharmony_ci jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair 1517db96d56Sopenharmony_ci jisx0213pairencmap.append((code, 0, c1 << 8 | c2)) 1527db96d56Sopenharmony_ci 1537db96d56Sopenharmony_ci for c1, m in jis4decmap.items(): 1547db96d56Sopenharmony_ci for c2, code in m.items(): 1557db96d56Sopenharmony_ci jisx0213bmpencmap.setdefault(code >> 8, {}) 1567db96d56Sopenharmony_ci jisx0213bmpencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 1577db96d56Sopenharmony_ci 1587db96d56Sopenharmony_ci jisx0213empencmap = {} 1597db96d56Sopenharmony_ci for c1, m in jis3_2_decmap.items(): 1607db96d56Sopenharmony_ci for c2, code in m.items(): 1617db96d56Sopenharmony_ci jisx0213empencmap.setdefault(code >> 8, {}) 1627db96d56Sopenharmony_ci jisx0213empencmap[code >> 8][code & 0xff] = c1 << 8 | c2 1637db96d56Sopenharmony_ci for c1, m in jis4_2_decmap.items(): 1647db96d56Sopenharmony_ci for c2, code in m.items(): 1657db96d56Sopenharmony_ci jisx0213empencmap.setdefault(code >> 8, {}) 1667db96d56Sopenharmony_ci jisx0213empencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 1677db96d56Sopenharmony_ci 1687db96d56Sopenharmony_ci with open("mappings_jp.h", "w") as fp: 1697db96d56Sopenharmony_ci print_autogen(fp, os.path.basename(__file__)) 1707db96d56Sopenharmony_ci print("Generating JIS X 0208 decode map...") 1717db96d56Sopenharmony_ci writer = DecodeMapWriter(fp, "jisx0208", jisx0208decmap) 1727db96d56Sopenharmony_ci writer.update_decode_map(JISX0208_C1, JISX0208_C2) 1737db96d56Sopenharmony_ci writer.generate() 1747db96d56Sopenharmony_ci 1757db96d56Sopenharmony_ci print("Generating JIS X 0212 decode map...") 1767db96d56Sopenharmony_ci writer = DecodeMapWriter(fp, "jisx0212", jisx0212decmap) 1777db96d56Sopenharmony_ci writer.update_decode_map(JISX0212_C1, JISX0212_C2) 1787db96d56Sopenharmony_ci writer.generate() 1797db96d56Sopenharmony_ci 1807db96d56Sopenharmony_ci print("Generating JIS X 0208 && JIS X 0212 encode map...") 1817db96d56Sopenharmony_ci writer = EncodeMapWriter(fp, "jisxcommon", jisx0208_0212encmap) 1827db96d56Sopenharmony_ci writer.generate() 1837db96d56Sopenharmony_ci 1847db96d56Sopenharmony_ci print("Generating CP932 Extension decode map...") 1857db96d56Sopenharmony_ci writer = DecodeMapWriter(fp, "cp932ext", cp932decmap) 1867db96d56Sopenharmony_ci writer.update_decode_map(CP932P0_C1, CP932P0_C2) 1877db96d56Sopenharmony_ci writer.update_decode_map(CP932P1_C1, CP932P1_C2) 1887db96d56Sopenharmony_ci writer.update_decode_map(CP932P2_C1, CP932P2_C2) 1897db96d56Sopenharmony_ci writer.generate() 1907db96d56Sopenharmony_ci 1917db96d56Sopenharmony_ci print("Generating CP932 Extension encode map...") 1927db96d56Sopenharmony_ci writer = EncodeMapWriter(fp, "cp932ext", cp932encmap) 1937db96d56Sopenharmony_ci writer.generate() 1947db96d56Sopenharmony_ci 1957db96d56Sopenharmony_ci print("Generating JIS X 0213 Plane 1 BMP decode map...") 1967db96d56Sopenharmony_ci writer = DecodeMapWriter(fp, "jisx0213_1_bmp", jis3decmap) 1977db96d56Sopenharmony_ci writer.update_decode_map(JISX0213_C1, JISX0213_C2) 1987db96d56Sopenharmony_ci writer.generate() 1997db96d56Sopenharmony_ci 2007db96d56Sopenharmony_ci print("Generating JIS X 0213 Plane 2 BMP decode map...") 2017db96d56Sopenharmony_ci writer = DecodeMapWriter(fp, "jisx0213_2_bmp", jis4decmap) 2027db96d56Sopenharmony_ci writer.update_decode_map(JISX0213_C1, JISX0213_C2) 2037db96d56Sopenharmony_ci writer.generate() 2047db96d56Sopenharmony_ci 2057db96d56Sopenharmony_ci print("Generating JIS X 0213 BMP encode map...") 2067db96d56Sopenharmony_ci writer = EncodeMapWriter(fp, "jisx0213_bmp", jisx0213bmpencmap) 2077db96d56Sopenharmony_ci writer.generate() 2087db96d56Sopenharmony_ci 2097db96d56Sopenharmony_ci print("Generating JIS X 0213 Plane 1 EMP decode map...") 2107db96d56Sopenharmony_ci writer = DecodeMapWriter(fp, "jisx0213_1_emp", jis3_2_decmap) 2117db96d56Sopenharmony_ci writer.update_decode_map(JISX0213_C1, JISX0213_C2) 2127db96d56Sopenharmony_ci writer.generate() 2137db96d56Sopenharmony_ci 2147db96d56Sopenharmony_ci print("Generating JIS X 0213 Plane 2 EMP decode map...") 2157db96d56Sopenharmony_ci writer = DecodeMapWriter(fp, "jisx0213_2_emp", jis4_2_decmap) 2167db96d56Sopenharmony_ci writer.update_decode_map(JISX0213_C1, JISX0213_C2) 2177db96d56Sopenharmony_ci writer.generate() 2187db96d56Sopenharmony_ci 2197db96d56Sopenharmony_ci print("Generating JIS X 0213 EMP encode map...") 2207db96d56Sopenharmony_ci writer = EncodeMapWriter(fp, "jisx0213_emp", jisx0213empencmap) 2217db96d56Sopenharmony_ci writer.generate() 2227db96d56Sopenharmony_ci 2237db96d56Sopenharmony_ci with open('mappings_jisx0213_pair.h', 'w') as fp: 2247db96d56Sopenharmony_ci print_autogen(fp, os.path.basename(__file__)) 2257db96d56Sopenharmony_ci fp.write(f"#define JISX0213_ENCPAIRS {len(jisx0213pairencmap)}\n") 2267db96d56Sopenharmony_ci fp.write("""\ 2277db96d56Sopenharmony_ci#ifdef EXTERN_JISX0213_PAIR 2287db96d56Sopenharmony_cistatic const struct widedbcs_index *jisx0213_pair_decmap; 2297db96d56Sopenharmony_cistatic const struct pair_encodemap *jisx0213_pair_encmap; 2307db96d56Sopenharmony_ci#else 2317db96d56Sopenharmony_ci""") 2327db96d56Sopenharmony_ci 2337db96d56Sopenharmony_ci print("Generating JIS X 0213 unicode-pair decode map...") 2347db96d56Sopenharmony_ci writer = DecodeMapWriter(fp, "jisx0213_pair", jisx0213pairdecmap) 2357db96d56Sopenharmony_ci writer.update_decode_map(JISX0213_C1, JISX0213_C2) 2367db96d56Sopenharmony_ci writer.generate(wide=True) 2377db96d56Sopenharmony_ci 2387db96d56Sopenharmony_ci print("Generating JIS X 0213 unicode-pair encode map...") 2397db96d56Sopenharmony_ci jisx0213pairencmap.sort() 2407db96d56Sopenharmony_ci fp.write("static const struct pair_encodemap jisx0213_pair_encmap[JISX0213_ENCPAIRS] = {\n") 2417db96d56Sopenharmony_ci filler = BufferedFiller() 2427db96d56Sopenharmony_ci for body, modifier, jis in jisx0213pairencmap: 2437db96d56Sopenharmony_ci filler.write('{', '0x%04x%04x,' % (body, modifier), '0x%04x' % jis, '},') 2447db96d56Sopenharmony_ci filler.printout(fp) 2457db96d56Sopenharmony_ci fp.write("};\n") 2467db96d56Sopenharmony_ci fp.write("#endif\n") 2477db96d56Sopenharmony_ci 2487db96d56Sopenharmony_ci print("Done!") 2497db96d56Sopenharmony_ci 2507db96d56Sopenharmony_ciif __name__ == '__main__': 2517db96d56Sopenharmony_ci main() 252