17db96d56Sopenharmony_ci#! /usr/bin/env python3
27db96d56Sopenharmony_ci# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
37db96d56Sopenharmony_ci
47db96d56Sopenharmony_ci"""Generate binary message catalog from textual translation description.
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ciThis program converts a textual Uniforum-style message catalog (.po file) into
77db96d56Sopenharmony_cia binary GNU catalog (.mo file).  This is essentially the same function as the
87db96d56Sopenharmony_ciGNU msgfmt program, however, it is a simpler implementation.  Currently it
97db96d56Sopenharmony_cidoes not handle plural forms but it does handle message contexts.
107db96d56Sopenharmony_ci
117db96d56Sopenharmony_ciUsage: msgfmt.py [OPTIONS] filename.po
127db96d56Sopenharmony_ci
137db96d56Sopenharmony_ciOptions:
147db96d56Sopenharmony_ci    -o file
157db96d56Sopenharmony_ci    --output-file=file
167db96d56Sopenharmony_ci        Specify the output file to write to.  If omitted, output will go to a
177db96d56Sopenharmony_ci        file named filename.mo (based off the input file name).
187db96d56Sopenharmony_ci
197db96d56Sopenharmony_ci    -h
207db96d56Sopenharmony_ci    --help
217db96d56Sopenharmony_ci        Print this message and exit.
227db96d56Sopenharmony_ci
237db96d56Sopenharmony_ci    -V
247db96d56Sopenharmony_ci    --version
257db96d56Sopenharmony_ci        Display version information and exit.
267db96d56Sopenharmony_ci"""
277db96d56Sopenharmony_ci
287db96d56Sopenharmony_ciimport os
297db96d56Sopenharmony_ciimport sys
307db96d56Sopenharmony_ciimport ast
317db96d56Sopenharmony_ciimport getopt
327db96d56Sopenharmony_ciimport struct
337db96d56Sopenharmony_ciimport array
347db96d56Sopenharmony_cifrom email.parser import HeaderParser
357db96d56Sopenharmony_ci
367db96d56Sopenharmony_ci__version__ = "1.2"
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_ciMESSAGES = {}
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_cidef usage(code, msg=''):
427db96d56Sopenharmony_ci    print(__doc__, file=sys.stderr)
437db96d56Sopenharmony_ci    if msg:
447db96d56Sopenharmony_ci        print(msg, file=sys.stderr)
457db96d56Sopenharmony_ci    sys.exit(code)
467db96d56Sopenharmony_ci
477db96d56Sopenharmony_ci
487db96d56Sopenharmony_cidef add(ctxt, id, str, fuzzy):
497db96d56Sopenharmony_ci    "Add a non-fuzzy translation to the dictionary."
507db96d56Sopenharmony_ci    global MESSAGES
517db96d56Sopenharmony_ci    if not fuzzy and str:
527db96d56Sopenharmony_ci        if ctxt is None:
537db96d56Sopenharmony_ci            MESSAGES[id] = str
547db96d56Sopenharmony_ci        else:
557db96d56Sopenharmony_ci            MESSAGES[b"%b\x04%b" % (ctxt, id)] = str
567db96d56Sopenharmony_ci
577db96d56Sopenharmony_ci
587db96d56Sopenharmony_cidef generate():
597db96d56Sopenharmony_ci    "Return the generated output."
607db96d56Sopenharmony_ci    global MESSAGES
617db96d56Sopenharmony_ci    # the keys are sorted in the .mo file
627db96d56Sopenharmony_ci    keys = sorted(MESSAGES.keys())
637db96d56Sopenharmony_ci    offsets = []
647db96d56Sopenharmony_ci    ids = strs = b''
657db96d56Sopenharmony_ci    for id in keys:
667db96d56Sopenharmony_ci        # For each string, we need size and file offset.  Each string is NUL
677db96d56Sopenharmony_ci        # terminated; the NUL does not count into the size.
687db96d56Sopenharmony_ci        offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
697db96d56Sopenharmony_ci        ids += id + b'\0'
707db96d56Sopenharmony_ci        strs += MESSAGES[id] + b'\0'
717db96d56Sopenharmony_ci    output = ''
727db96d56Sopenharmony_ci    # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
737db96d56Sopenharmony_ci    # the keys start right after the index tables.
747db96d56Sopenharmony_ci    # translated string.
757db96d56Sopenharmony_ci    keystart = 7*4+16*len(keys)
767db96d56Sopenharmony_ci    # and the values start after the keys
777db96d56Sopenharmony_ci    valuestart = keystart + len(ids)
787db96d56Sopenharmony_ci    koffsets = []
797db96d56Sopenharmony_ci    voffsets = []
807db96d56Sopenharmony_ci    # The string table first has the list of keys, then the list of values.
817db96d56Sopenharmony_ci    # Each entry has first the size of the string, then the file offset.
827db96d56Sopenharmony_ci    for o1, l1, o2, l2 in offsets:
837db96d56Sopenharmony_ci        koffsets += [l1, o1+keystart]
847db96d56Sopenharmony_ci        voffsets += [l2, o2+valuestart]
857db96d56Sopenharmony_ci    offsets = koffsets + voffsets
867db96d56Sopenharmony_ci    output = struct.pack("Iiiiiii",
877db96d56Sopenharmony_ci                         0x950412de,       # Magic
887db96d56Sopenharmony_ci                         0,                 # Version
897db96d56Sopenharmony_ci                         len(keys),         # # of entries
907db96d56Sopenharmony_ci                         7*4,               # start of key index
917db96d56Sopenharmony_ci                         7*4+len(keys)*8,   # start of value index
927db96d56Sopenharmony_ci                         0, 0)              # size and offset of hash table
937db96d56Sopenharmony_ci    output += array.array("i", offsets).tobytes()
947db96d56Sopenharmony_ci    output += ids
957db96d56Sopenharmony_ci    output += strs
967db96d56Sopenharmony_ci    return output
977db96d56Sopenharmony_ci
987db96d56Sopenharmony_ci
997db96d56Sopenharmony_cidef make(filename, outfile):
1007db96d56Sopenharmony_ci    ID = 1
1017db96d56Sopenharmony_ci    STR = 2
1027db96d56Sopenharmony_ci    CTXT = 3
1037db96d56Sopenharmony_ci
1047db96d56Sopenharmony_ci    # Compute .mo name from .po name and arguments
1057db96d56Sopenharmony_ci    if filename.endswith('.po'):
1067db96d56Sopenharmony_ci        infile = filename
1077db96d56Sopenharmony_ci    else:
1087db96d56Sopenharmony_ci        infile = filename + '.po'
1097db96d56Sopenharmony_ci    if outfile is None:
1107db96d56Sopenharmony_ci        outfile = os.path.splitext(infile)[0] + '.mo'
1117db96d56Sopenharmony_ci
1127db96d56Sopenharmony_ci    try:
1137db96d56Sopenharmony_ci        with open(infile, 'rb') as f:
1147db96d56Sopenharmony_ci            lines = f.readlines()
1157db96d56Sopenharmony_ci    except IOError as msg:
1167db96d56Sopenharmony_ci        print(msg, file=sys.stderr)
1177db96d56Sopenharmony_ci        sys.exit(1)
1187db96d56Sopenharmony_ci
1197db96d56Sopenharmony_ci    section = msgctxt = None
1207db96d56Sopenharmony_ci    fuzzy = 0
1217db96d56Sopenharmony_ci
1227db96d56Sopenharmony_ci    # Start off assuming Latin-1, so everything decodes without failure,
1237db96d56Sopenharmony_ci    # until we know the exact encoding
1247db96d56Sopenharmony_ci    encoding = 'latin-1'
1257db96d56Sopenharmony_ci
1267db96d56Sopenharmony_ci    # Parse the catalog
1277db96d56Sopenharmony_ci    lno = 0
1287db96d56Sopenharmony_ci    for l in lines:
1297db96d56Sopenharmony_ci        l = l.decode(encoding)
1307db96d56Sopenharmony_ci        lno += 1
1317db96d56Sopenharmony_ci        # If we get a comment line after a msgstr, this is a new entry
1327db96d56Sopenharmony_ci        if l[0] == '#' and section == STR:
1337db96d56Sopenharmony_ci            add(msgctxt, msgid, msgstr, fuzzy)
1347db96d56Sopenharmony_ci            section = msgctxt = None
1357db96d56Sopenharmony_ci            fuzzy = 0
1367db96d56Sopenharmony_ci        # Record a fuzzy mark
1377db96d56Sopenharmony_ci        if l[:2] == '#,' and 'fuzzy' in l:
1387db96d56Sopenharmony_ci            fuzzy = 1
1397db96d56Sopenharmony_ci        # Skip comments
1407db96d56Sopenharmony_ci        if l[0] == '#':
1417db96d56Sopenharmony_ci            continue
1427db96d56Sopenharmony_ci        # Now we are in a msgid or msgctxt section, output previous section
1437db96d56Sopenharmony_ci        if l.startswith('msgctxt'):
1447db96d56Sopenharmony_ci            if section == STR:
1457db96d56Sopenharmony_ci                add(msgctxt, msgid, msgstr, fuzzy)
1467db96d56Sopenharmony_ci            section = CTXT
1477db96d56Sopenharmony_ci            l = l[7:]
1487db96d56Sopenharmony_ci            msgctxt = b''
1497db96d56Sopenharmony_ci        elif l.startswith('msgid') and not l.startswith('msgid_plural'):
1507db96d56Sopenharmony_ci            if section == STR:
1517db96d56Sopenharmony_ci                add(msgctxt, msgid, msgstr, fuzzy)
1527db96d56Sopenharmony_ci                if not msgid:
1537db96d56Sopenharmony_ci                    # See whether there is an encoding declaration
1547db96d56Sopenharmony_ci                    p = HeaderParser()
1557db96d56Sopenharmony_ci                    charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
1567db96d56Sopenharmony_ci                    if charset:
1577db96d56Sopenharmony_ci                        encoding = charset
1587db96d56Sopenharmony_ci            section = ID
1597db96d56Sopenharmony_ci            l = l[5:]
1607db96d56Sopenharmony_ci            msgid = msgstr = b''
1617db96d56Sopenharmony_ci            is_plural = False
1627db96d56Sopenharmony_ci        # This is a message with plural forms
1637db96d56Sopenharmony_ci        elif l.startswith('msgid_plural'):
1647db96d56Sopenharmony_ci            if section != ID:
1657db96d56Sopenharmony_ci                print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),
1667db96d56Sopenharmony_ci                      file=sys.stderr)
1677db96d56Sopenharmony_ci                sys.exit(1)
1687db96d56Sopenharmony_ci            l = l[12:]
1697db96d56Sopenharmony_ci            msgid += b'\0' # separator of singular and plural
1707db96d56Sopenharmony_ci            is_plural = True
1717db96d56Sopenharmony_ci        # Now we are in a msgstr section
1727db96d56Sopenharmony_ci        elif l.startswith('msgstr'):
1737db96d56Sopenharmony_ci            section = STR
1747db96d56Sopenharmony_ci            if l.startswith('msgstr['):
1757db96d56Sopenharmony_ci                if not is_plural:
1767db96d56Sopenharmony_ci                    print('plural without msgid_plural on %s:%d' % (infile, lno),
1777db96d56Sopenharmony_ci                          file=sys.stderr)
1787db96d56Sopenharmony_ci                    sys.exit(1)
1797db96d56Sopenharmony_ci                l = l.split(']', 1)[1]
1807db96d56Sopenharmony_ci                if msgstr:
1817db96d56Sopenharmony_ci                    msgstr += b'\0' # Separator of the various plural forms
1827db96d56Sopenharmony_ci            else:
1837db96d56Sopenharmony_ci                if is_plural:
1847db96d56Sopenharmony_ci                    print('indexed msgstr required for plural on  %s:%d' % (infile, lno),
1857db96d56Sopenharmony_ci                          file=sys.stderr)
1867db96d56Sopenharmony_ci                    sys.exit(1)
1877db96d56Sopenharmony_ci                l = l[6:]
1887db96d56Sopenharmony_ci        # Skip empty lines
1897db96d56Sopenharmony_ci        l = l.strip()
1907db96d56Sopenharmony_ci        if not l:
1917db96d56Sopenharmony_ci            continue
1927db96d56Sopenharmony_ci        l = ast.literal_eval(l)
1937db96d56Sopenharmony_ci        if section == CTXT:
1947db96d56Sopenharmony_ci            msgctxt += l.encode(encoding)
1957db96d56Sopenharmony_ci        elif section == ID:
1967db96d56Sopenharmony_ci            msgid += l.encode(encoding)
1977db96d56Sopenharmony_ci        elif section == STR:
1987db96d56Sopenharmony_ci            msgstr += l.encode(encoding)
1997db96d56Sopenharmony_ci        else:
2007db96d56Sopenharmony_ci            print('Syntax error on %s:%d' % (infile, lno), \
2017db96d56Sopenharmony_ci                  'before:', file=sys.stderr)
2027db96d56Sopenharmony_ci            print(l, file=sys.stderr)
2037db96d56Sopenharmony_ci            sys.exit(1)
2047db96d56Sopenharmony_ci    # Add last entry
2057db96d56Sopenharmony_ci    if section == STR:
2067db96d56Sopenharmony_ci        add(msgctxt, msgid, msgstr, fuzzy)
2077db96d56Sopenharmony_ci
2087db96d56Sopenharmony_ci    # Compute output
2097db96d56Sopenharmony_ci    output = generate()
2107db96d56Sopenharmony_ci
2117db96d56Sopenharmony_ci    try:
2127db96d56Sopenharmony_ci        with open(outfile,"wb") as f:
2137db96d56Sopenharmony_ci            f.write(output)
2147db96d56Sopenharmony_ci    except IOError as msg:
2157db96d56Sopenharmony_ci        print(msg, file=sys.stderr)
2167db96d56Sopenharmony_ci
2177db96d56Sopenharmony_ci
2187db96d56Sopenharmony_cidef main():
2197db96d56Sopenharmony_ci    try:
2207db96d56Sopenharmony_ci        opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
2217db96d56Sopenharmony_ci                                   ['help', 'version', 'output-file='])
2227db96d56Sopenharmony_ci    except getopt.error as msg:
2237db96d56Sopenharmony_ci        usage(1, msg)
2247db96d56Sopenharmony_ci
2257db96d56Sopenharmony_ci    outfile = None
2267db96d56Sopenharmony_ci    # parse options
2277db96d56Sopenharmony_ci    for opt, arg in opts:
2287db96d56Sopenharmony_ci        if opt in ('-h', '--help'):
2297db96d56Sopenharmony_ci            usage(0)
2307db96d56Sopenharmony_ci        elif opt in ('-V', '--version'):
2317db96d56Sopenharmony_ci            print("msgfmt.py", __version__)
2327db96d56Sopenharmony_ci            sys.exit(0)
2337db96d56Sopenharmony_ci        elif opt in ('-o', '--output-file'):
2347db96d56Sopenharmony_ci            outfile = arg
2357db96d56Sopenharmony_ci    # do it
2367db96d56Sopenharmony_ci    if not args:
2377db96d56Sopenharmony_ci        print('No input file given', file=sys.stderr)
2387db96d56Sopenharmony_ci        print("Try `msgfmt --help' for more information.", file=sys.stderr)
2397db96d56Sopenharmony_ci        return
2407db96d56Sopenharmony_ci
2417db96d56Sopenharmony_ci    for filename in args:
2427db96d56Sopenharmony_ci        make(filename, outfile)
2437db96d56Sopenharmony_ci
2447db96d56Sopenharmony_ci
2457db96d56Sopenharmony_ciif __name__ == '__main__':
2467db96d56Sopenharmony_ci    main()
247