17db96d56Sopenharmony_ci#! /usr/bin/env python3 27db96d56Sopenharmony_ci# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de> 37db96d56Sopenharmony_ci 47db96d56Sopenharmony_ci"""Generate binary message catalog from textual translation description. 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ciThis program converts a textual Uniforum-style message catalog (.po file) into 77db96d56Sopenharmony_cia binary GNU catalog (.mo file). This is essentially the same function as the 87db96d56Sopenharmony_ciGNU msgfmt program, however, it is a simpler implementation. Currently it 97db96d56Sopenharmony_cidoes not handle plural forms but it does handle message contexts. 107db96d56Sopenharmony_ci 117db96d56Sopenharmony_ciUsage: msgfmt.py [OPTIONS] filename.po 127db96d56Sopenharmony_ci 137db96d56Sopenharmony_ciOptions: 147db96d56Sopenharmony_ci -o file 157db96d56Sopenharmony_ci --output-file=file 167db96d56Sopenharmony_ci Specify the output file to write to. If omitted, output will go to a 177db96d56Sopenharmony_ci file named filename.mo (based off the input file name). 187db96d56Sopenharmony_ci 197db96d56Sopenharmony_ci -h 207db96d56Sopenharmony_ci --help 217db96d56Sopenharmony_ci Print this message and exit. 227db96d56Sopenharmony_ci 237db96d56Sopenharmony_ci -V 247db96d56Sopenharmony_ci --version 257db96d56Sopenharmony_ci Display version information and exit. 267db96d56Sopenharmony_ci""" 277db96d56Sopenharmony_ci 287db96d56Sopenharmony_ciimport os 297db96d56Sopenharmony_ciimport sys 307db96d56Sopenharmony_ciimport ast 317db96d56Sopenharmony_ciimport getopt 327db96d56Sopenharmony_ciimport struct 337db96d56Sopenharmony_ciimport array 347db96d56Sopenharmony_cifrom email.parser import HeaderParser 357db96d56Sopenharmony_ci 367db96d56Sopenharmony_ci__version__ = "1.2" 377db96d56Sopenharmony_ci 387db96d56Sopenharmony_ciMESSAGES = {} 397db96d56Sopenharmony_ci 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_cidef usage(code, msg=''): 427db96d56Sopenharmony_ci print(__doc__, file=sys.stderr) 437db96d56Sopenharmony_ci if msg: 447db96d56Sopenharmony_ci print(msg, file=sys.stderr) 457db96d56Sopenharmony_ci sys.exit(code) 467db96d56Sopenharmony_ci 477db96d56Sopenharmony_ci 487db96d56Sopenharmony_cidef add(ctxt, id, str, fuzzy): 497db96d56Sopenharmony_ci "Add a non-fuzzy translation to the dictionary." 507db96d56Sopenharmony_ci global MESSAGES 517db96d56Sopenharmony_ci if not fuzzy and str: 527db96d56Sopenharmony_ci if ctxt is None: 537db96d56Sopenharmony_ci MESSAGES[id] = str 547db96d56Sopenharmony_ci else: 557db96d56Sopenharmony_ci MESSAGES[b"%b\x04%b" % (ctxt, id)] = str 567db96d56Sopenharmony_ci 577db96d56Sopenharmony_ci 587db96d56Sopenharmony_cidef generate(): 597db96d56Sopenharmony_ci "Return the generated output." 607db96d56Sopenharmony_ci global MESSAGES 617db96d56Sopenharmony_ci # the keys are sorted in the .mo file 627db96d56Sopenharmony_ci keys = sorted(MESSAGES.keys()) 637db96d56Sopenharmony_ci offsets = [] 647db96d56Sopenharmony_ci ids = strs = b'' 657db96d56Sopenharmony_ci for id in keys: 667db96d56Sopenharmony_ci # For each string, we need size and file offset. Each string is NUL 677db96d56Sopenharmony_ci # terminated; the NUL does not count into the size. 687db96d56Sopenharmony_ci offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) 697db96d56Sopenharmony_ci ids += id + b'\0' 707db96d56Sopenharmony_ci strs += MESSAGES[id] + b'\0' 717db96d56Sopenharmony_ci output = '' 727db96d56Sopenharmony_ci # The header is 7 32-bit unsigned integers. We don't use hash tables, so 737db96d56Sopenharmony_ci # the keys start right after the index tables. 747db96d56Sopenharmony_ci # translated string. 757db96d56Sopenharmony_ci keystart = 7*4+16*len(keys) 767db96d56Sopenharmony_ci # and the values start after the keys 777db96d56Sopenharmony_ci valuestart = keystart + len(ids) 787db96d56Sopenharmony_ci koffsets = [] 797db96d56Sopenharmony_ci voffsets = [] 807db96d56Sopenharmony_ci # The string table first has the list of keys, then the list of values. 817db96d56Sopenharmony_ci # Each entry has first the size of the string, then the file offset. 827db96d56Sopenharmony_ci for o1, l1, o2, l2 in offsets: 837db96d56Sopenharmony_ci koffsets += [l1, o1+keystart] 847db96d56Sopenharmony_ci voffsets += [l2, o2+valuestart] 857db96d56Sopenharmony_ci offsets = koffsets + voffsets 867db96d56Sopenharmony_ci output = struct.pack("Iiiiiii", 877db96d56Sopenharmony_ci 0x950412de, # Magic 887db96d56Sopenharmony_ci 0, # Version 897db96d56Sopenharmony_ci len(keys), # # of entries 907db96d56Sopenharmony_ci 7*4, # start of key index 917db96d56Sopenharmony_ci 7*4+len(keys)*8, # start of value index 927db96d56Sopenharmony_ci 0, 0) # size and offset of hash table 937db96d56Sopenharmony_ci output += array.array("i", offsets).tobytes() 947db96d56Sopenharmony_ci output += ids 957db96d56Sopenharmony_ci output += strs 967db96d56Sopenharmony_ci return output 977db96d56Sopenharmony_ci 987db96d56Sopenharmony_ci 997db96d56Sopenharmony_cidef make(filename, outfile): 1007db96d56Sopenharmony_ci ID = 1 1017db96d56Sopenharmony_ci STR = 2 1027db96d56Sopenharmony_ci CTXT = 3 1037db96d56Sopenharmony_ci 1047db96d56Sopenharmony_ci # Compute .mo name from .po name and arguments 1057db96d56Sopenharmony_ci if filename.endswith('.po'): 1067db96d56Sopenharmony_ci infile = filename 1077db96d56Sopenharmony_ci else: 1087db96d56Sopenharmony_ci infile = filename + '.po' 1097db96d56Sopenharmony_ci if outfile is None: 1107db96d56Sopenharmony_ci outfile = os.path.splitext(infile)[0] + '.mo' 1117db96d56Sopenharmony_ci 1127db96d56Sopenharmony_ci try: 1137db96d56Sopenharmony_ci with open(infile, 'rb') as f: 1147db96d56Sopenharmony_ci lines = f.readlines() 1157db96d56Sopenharmony_ci except IOError as msg: 1167db96d56Sopenharmony_ci print(msg, file=sys.stderr) 1177db96d56Sopenharmony_ci sys.exit(1) 1187db96d56Sopenharmony_ci 1197db96d56Sopenharmony_ci section = msgctxt = None 1207db96d56Sopenharmony_ci fuzzy = 0 1217db96d56Sopenharmony_ci 1227db96d56Sopenharmony_ci # Start off assuming Latin-1, so everything decodes without failure, 1237db96d56Sopenharmony_ci # until we know the exact encoding 1247db96d56Sopenharmony_ci encoding = 'latin-1' 1257db96d56Sopenharmony_ci 1267db96d56Sopenharmony_ci # Parse the catalog 1277db96d56Sopenharmony_ci lno = 0 1287db96d56Sopenharmony_ci for l in lines: 1297db96d56Sopenharmony_ci l = l.decode(encoding) 1307db96d56Sopenharmony_ci lno += 1 1317db96d56Sopenharmony_ci # If we get a comment line after a msgstr, this is a new entry 1327db96d56Sopenharmony_ci if l[0] == '#' and section == STR: 1337db96d56Sopenharmony_ci add(msgctxt, msgid, msgstr, fuzzy) 1347db96d56Sopenharmony_ci section = msgctxt = None 1357db96d56Sopenharmony_ci fuzzy = 0 1367db96d56Sopenharmony_ci # Record a fuzzy mark 1377db96d56Sopenharmony_ci if l[:2] == '#,' and 'fuzzy' in l: 1387db96d56Sopenharmony_ci fuzzy = 1 1397db96d56Sopenharmony_ci # Skip comments 1407db96d56Sopenharmony_ci if l[0] == '#': 1417db96d56Sopenharmony_ci continue 1427db96d56Sopenharmony_ci # Now we are in a msgid or msgctxt section, output previous section 1437db96d56Sopenharmony_ci if l.startswith('msgctxt'): 1447db96d56Sopenharmony_ci if section == STR: 1457db96d56Sopenharmony_ci add(msgctxt, msgid, msgstr, fuzzy) 1467db96d56Sopenharmony_ci section = CTXT 1477db96d56Sopenharmony_ci l = l[7:] 1487db96d56Sopenharmony_ci msgctxt = b'' 1497db96d56Sopenharmony_ci elif l.startswith('msgid') and not l.startswith('msgid_plural'): 1507db96d56Sopenharmony_ci if section == STR: 1517db96d56Sopenharmony_ci add(msgctxt, msgid, msgstr, fuzzy) 1527db96d56Sopenharmony_ci if not msgid: 1537db96d56Sopenharmony_ci # See whether there is an encoding declaration 1547db96d56Sopenharmony_ci p = HeaderParser() 1557db96d56Sopenharmony_ci charset = p.parsestr(msgstr.decode(encoding)).get_content_charset() 1567db96d56Sopenharmony_ci if charset: 1577db96d56Sopenharmony_ci encoding = charset 1587db96d56Sopenharmony_ci section = ID 1597db96d56Sopenharmony_ci l = l[5:] 1607db96d56Sopenharmony_ci msgid = msgstr = b'' 1617db96d56Sopenharmony_ci is_plural = False 1627db96d56Sopenharmony_ci # This is a message with plural forms 1637db96d56Sopenharmony_ci elif l.startswith('msgid_plural'): 1647db96d56Sopenharmony_ci if section != ID: 1657db96d56Sopenharmony_ci print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno), 1667db96d56Sopenharmony_ci file=sys.stderr) 1677db96d56Sopenharmony_ci sys.exit(1) 1687db96d56Sopenharmony_ci l = l[12:] 1697db96d56Sopenharmony_ci msgid += b'\0' # separator of singular and plural 1707db96d56Sopenharmony_ci is_plural = True 1717db96d56Sopenharmony_ci # Now we are in a msgstr section 1727db96d56Sopenharmony_ci elif l.startswith('msgstr'): 1737db96d56Sopenharmony_ci section = STR 1747db96d56Sopenharmony_ci if l.startswith('msgstr['): 1757db96d56Sopenharmony_ci if not is_plural: 1767db96d56Sopenharmony_ci print('plural without msgid_plural on %s:%d' % (infile, lno), 1777db96d56Sopenharmony_ci file=sys.stderr) 1787db96d56Sopenharmony_ci sys.exit(1) 1797db96d56Sopenharmony_ci l = l.split(']', 1)[1] 1807db96d56Sopenharmony_ci if msgstr: 1817db96d56Sopenharmony_ci msgstr += b'\0' # Separator of the various plural forms 1827db96d56Sopenharmony_ci else: 1837db96d56Sopenharmony_ci if is_plural: 1847db96d56Sopenharmony_ci print('indexed msgstr required for plural on %s:%d' % (infile, lno), 1857db96d56Sopenharmony_ci file=sys.stderr) 1867db96d56Sopenharmony_ci sys.exit(1) 1877db96d56Sopenharmony_ci l = l[6:] 1887db96d56Sopenharmony_ci # Skip empty lines 1897db96d56Sopenharmony_ci l = l.strip() 1907db96d56Sopenharmony_ci if not l: 1917db96d56Sopenharmony_ci continue 1927db96d56Sopenharmony_ci l = ast.literal_eval(l) 1937db96d56Sopenharmony_ci if section == CTXT: 1947db96d56Sopenharmony_ci msgctxt += l.encode(encoding) 1957db96d56Sopenharmony_ci elif section == ID: 1967db96d56Sopenharmony_ci msgid += l.encode(encoding) 1977db96d56Sopenharmony_ci elif section == STR: 1987db96d56Sopenharmony_ci msgstr += l.encode(encoding) 1997db96d56Sopenharmony_ci else: 2007db96d56Sopenharmony_ci print('Syntax error on %s:%d' % (infile, lno), \ 2017db96d56Sopenharmony_ci 'before:', file=sys.stderr) 2027db96d56Sopenharmony_ci print(l, file=sys.stderr) 2037db96d56Sopenharmony_ci sys.exit(1) 2047db96d56Sopenharmony_ci # Add last entry 2057db96d56Sopenharmony_ci if section == STR: 2067db96d56Sopenharmony_ci add(msgctxt, msgid, msgstr, fuzzy) 2077db96d56Sopenharmony_ci 2087db96d56Sopenharmony_ci # Compute output 2097db96d56Sopenharmony_ci output = generate() 2107db96d56Sopenharmony_ci 2117db96d56Sopenharmony_ci try: 2127db96d56Sopenharmony_ci with open(outfile,"wb") as f: 2137db96d56Sopenharmony_ci f.write(output) 2147db96d56Sopenharmony_ci except IOError as msg: 2157db96d56Sopenharmony_ci print(msg, file=sys.stderr) 2167db96d56Sopenharmony_ci 2177db96d56Sopenharmony_ci 2187db96d56Sopenharmony_cidef main(): 2197db96d56Sopenharmony_ci try: 2207db96d56Sopenharmony_ci opts, args = getopt.getopt(sys.argv[1:], 'hVo:', 2217db96d56Sopenharmony_ci ['help', 'version', 'output-file=']) 2227db96d56Sopenharmony_ci except getopt.error as msg: 2237db96d56Sopenharmony_ci usage(1, msg) 2247db96d56Sopenharmony_ci 2257db96d56Sopenharmony_ci outfile = None 2267db96d56Sopenharmony_ci # parse options 2277db96d56Sopenharmony_ci for opt, arg in opts: 2287db96d56Sopenharmony_ci if opt in ('-h', '--help'): 2297db96d56Sopenharmony_ci usage(0) 2307db96d56Sopenharmony_ci elif opt in ('-V', '--version'): 2317db96d56Sopenharmony_ci print("msgfmt.py", __version__) 2327db96d56Sopenharmony_ci sys.exit(0) 2337db96d56Sopenharmony_ci elif opt in ('-o', '--output-file'): 2347db96d56Sopenharmony_ci outfile = arg 2357db96d56Sopenharmony_ci # do it 2367db96d56Sopenharmony_ci if not args: 2377db96d56Sopenharmony_ci print('No input file given', file=sys.stderr) 2387db96d56Sopenharmony_ci print("Try `msgfmt --help' for more information.", file=sys.stderr) 2397db96d56Sopenharmony_ci return 2407db96d56Sopenharmony_ci 2417db96d56Sopenharmony_ci for filename in args: 2427db96d56Sopenharmony_ci make(filename, outfile) 2437db96d56Sopenharmony_ci 2447db96d56Sopenharmony_ci 2457db96d56Sopenharmony_ciif __name__ == '__main__': 2467db96d56Sopenharmony_ci main() 247