17db96d56Sopenharmony_ci#! /usr/bin/env python3
27db96d56Sopenharmony_ci# -*- coding: iso-8859-1 -*-
37db96d56Sopenharmony_ci# Originally written by Barry Warsaw <barry@python.org>
47db96d56Sopenharmony_ci#
57db96d56Sopenharmony_ci# Minimally patched to make it even more xgettext compatible
67db96d56Sopenharmony_ci# by Peter Funk <pf@artcom-gmbh.de>
77db96d56Sopenharmony_ci#
87db96d56Sopenharmony_ci# 2002-11-22 J�rgen Hermann <jh@web.de>
97db96d56Sopenharmony_ci# Added checks that _() only contains string literals, and
107db96d56Sopenharmony_ci# command line args are resolved to module lists, i.e. you
117db96d56Sopenharmony_ci# can now pass a filename, a module or package name, or a
127db96d56Sopenharmony_ci# directory (including globbing chars, important for Win32).
137db96d56Sopenharmony_ci# Made docstring fit in 80 chars wide displays using pydoc.
147db96d56Sopenharmony_ci#
157db96d56Sopenharmony_ci
167db96d56Sopenharmony_ci# for selftesting
177db96d56Sopenharmony_citry:
187db96d56Sopenharmony_ci    import fintl
197db96d56Sopenharmony_ci    _ = fintl.gettext
207db96d56Sopenharmony_ciexcept ImportError:
217db96d56Sopenharmony_ci    _ = lambda s: s
227db96d56Sopenharmony_ci
237db96d56Sopenharmony_ci__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
247db96d56Sopenharmony_ci
257db96d56Sopenharmony_ciMany systems (Solaris, Linux, Gnu) provide extensive tools that ease the
267db96d56Sopenharmony_ciinternationalization of C programs. Most of these tools are independent of
277db96d56Sopenharmony_cithe programming language and can be used from within Python programs.
287db96d56Sopenharmony_ciMartin von Loewis' work[1] helps considerably in this regard.
297db96d56Sopenharmony_ci
307db96d56Sopenharmony_ciThere's one problem though; xgettext is the program that scans source code
317db96d56Sopenharmony_cilooking for message strings, but it groks only C (or C++). Python
327db96d56Sopenharmony_ciintroduces a few wrinkles, such as dual quoting characters, triple quoted
337db96d56Sopenharmony_cistrings, and raw strings. xgettext understands none of this.
347db96d56Sopenharmony_ci
357db96d56Sopenharmony_ciEnter pygettext, which uses Python's standard tokenize module to scan
367db96d56Sopenharmony_ciPython source code, generating .pot files identical to what GNU xgettext[2]
377db96d56Sopenharmony_cigenerates for C and C++ code. From there, the standard GNU tools can be
387db96d56Sopenharmony_ciused.
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ciA word about marking Python strings as candidates for translation. GNU
417db96d56Sopenharmony_cixgettext recognizes the following keywords: gettext, dgettext, dcgettext,
427db96d56Sopenharmony_ciand gettext_noop. But those can be a lot of text to include all over your
437db96d56Sopenharmony_cicode. C and C++ have a trick: they use the C preprocessor. Most
447db96d56Sopenharmony_ciinternationalized C source includes a #define for gettext() to _() so that
457db96d56Sopenharmony_ciwhat has to be written in the source is much less. Thus these are both
467db96d56Sopenharmony_citranslatable strings:
477db96d56Sopenharmony_ci
487db96d56Sopenharmony_ci    gettext("Translatable String")
497db96d56Sopenharmony_ci    _("Translatable String")
507db96d56Sopenharmony_ci
517db96d56Sopenharmony_ciPython of course has no preprocessor so this doesn't work so well.  Thus,
527db96d56Sopenharmony_cipygettext searches only for _() by default, but see the -k/--keyword flag
537db96d56Sopenharmony_cibelow for how to augment this.
547db96d56Sopenharmony_ci
557db96d56Sopenharmony_ci [1] https://www.python.org/workshops/1997-10/proceedings/loewis.html
567db96d56Sopenharmony_ci [2] https://www.gnu.org/software/gettext/gettext.html
577db96d56Sopenharmony_ci
587db96d56Sopenharmony_ciNOTE: pygettext attempts to be option and feature compatible with GNU
597db96d56Sopenharmony_cixgettext where ever possible. However some options are still missing or are
607db96d56Sopenharmony_cinot fully implemented. Also, xgettext's use of command line switches with
617db96d56Sopenharmony_cioption arguments is broken, and in these cases, pygettext just defines
627db96d56Sopenharmony_ciadditional switches.
637db96d56Sopenharmony_ci
647db96d56Sopenharmony_ciUsage: pygettext [options] inputfile ...
657db96d56Sopenharmony_ci
667db96d56Sopenharmony_ciOptions:
677db96d56Sopenharmony_ci
687db96d56Sopenharmony_ci    -a
697db96d56Sopenharmony_ci    --extract-all
707db96d56Sopenharmony_ci        Extract all strings.
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci    -d name
737db96d56Sopenharmony_ci    --default-domain=name
747db96d56Sopenharmony_ci        Rename the default output file from messages.pot to name.pot.
757db96d56Sopenharmony_ci
767db96d56Sopenharmony_ci    -E
777db96d56Sopenharmony_ci    --escape
787db96d56Sopenharmony_ci        Replace non-ASCII characters with octal escape sequences.
797db96d56Sopenharmony_ci
807db96d56Sopenharmony_ci    -D
817db96d56Sopenharmony_ci    --docstrings
827db96d56Sopenharmony_ci        Extract module, class, method, and function docstrings.  These do
837db96d56Sopenharmony_ci        not need to be wrapped in _() markers, and in fact cannot be for
847db96d56Sopenharmony_ci        Python to consider them docstrings. (See also the -X option).
857db96d56Sopenharmony_ci
867db96d56Sopenharmony_ci    -h
877db96d56Sopenharmony_ci    --help
887db96d56Sopenharmony_ci        Print this help message and exit.
897db96d56Sopenharmony_ci
907db96d56Sopenharmony_ci    -k word
917db96d56Sopenharmony_ci    --keyword=word
927db96d56Sopenharmony_ci        Keywords to look for in addition to the default set, which are:
937db96d56Sopenharmony_ci        %(DEFAULTKEYWORDS)s
947db96d56Sopenharmony_ci
957db96d56Sopenharmony_ci        You can have multiple -k flags on the command line.
967db96d56Sopenharmony_ci
977db96d56Sopenharmony_ci    -K
987db96d56Sopenharmony_ci    --no-default-keywords
997db96d56Sopenharmony_ci        Disable the default set of keywords (see above).  Any keywords
1007db96d56Sopenharmony_ci        explicitly added with the -k/--keyword option are still recognized.
1017db96d56Sopenharmony_ci
1027db96d56Sopenharmony_ci    --no-location
1037db96d56Sopenharmony_ci        Do not write filename/lineno location comments.
1047db96d56Sopenharmony_ci
1057db96d56Sopenharmony_ci    -n
1067db96d56Sopenharmony_ci    --add-location
1077db96d56Sopenharmony_ci        Write filename/lineno location comments indicating where each
1087db96d56Sopenharmony_ci        extracted string is found in the source.  These lines appear before
1097db96d56Sopenharmony_ci        each msgid.  The style of comments is controlled by the -S/--style
1107db96d56Sopenharmony_ci        option.  This is the default.
1117db96d56Sopenharmony_ci
1127db96d56Sopenharmony_ci    -o filename
1137db96d56Sopenharmony_ci    --output=filename
1147db96d56Sopenharmony_ci        Rename the default output file from messages.pot to filename.  If
1157db96d56Sopenharmony_ci        filename is `-' then the output is sent to standard out.
1167db96d56Sopenharmony_ci
1177db96d56Sopenharmony_ci    -p dir
1187db96d56Sopenharmony_ci    --output-dir=dir
1197db96d56Sopenharmony_ci        Output files will be placed in directory dir.
1207db96d56Sopenharmony_ci
1217db96d56Sopenharmony_ci    -S stylename
1227db96d56Sopenharmony_ci    --style stylename
1237db96d56Sopenharmony_ci        Specify which style to use for location comments.  Two styles are
1247db96d56Sopenharmony_ci        supported:
1257db96d56Sopenharmony_ci
1267db96d56Sopenharmony_ci        Solaris  # File: filename, line: line-number
1277db96d56Sopenharmony_ci        GNU      #: filename:line
1287db96d56Sopenharmony_ci
1297db96d56Sopenharmony_ci        The style name is case insensitive.  GNU style is the default.
1307db96d56Sopenharmony_ci
1317db96d56Sopenharmony_ci    -v
1327db96d56Sopenharmony_ci    --verbose
1337db96d56Sopenharmony_ci        Print the names of the files being processed.
1347db96d56Sopenharmony_ci
1357db96d56Sopenharmony_ci    -V
1367db96d56Sopenharmony_ci    --version
1377db96d56Sopenharmony_ci        Print the version of pygettext and exit.
1387db96d56Sopenharmony_ci
1397db96d56Sopenharmony_ci    -w columns
1407db96d56Sopenharmony_ci    --width=columns
1417db96d56Sopenharmony_ci        Set width of output to columns.
1427db96d56Sopenharmony_ci
1437db96d56Sopenharmony_ci    -x filename
1447db96d56Sopenharmony_ci    --exclude-file=filename
1457db96d56Sopenharmony_ci        Specify a file that contains a list of strings that are not be
1467db96d56Sopenharmony_ci        extracted from the input files.  Each string to be excluded must
1477db96d56Sopenharmony_ci        appear on a line by itself in the file.
1487db96d56Sopenharmony_ci
1497db96d56Sopenharmony_ci    -X filename
1507db96d56Sopenharmony_ci    --no-docstrings=filename
1517db96d56Sopenharmony_ci        Specify a file that contains a list of files (one per line) that
1527db96d56Sopenharmony_ci        should not have their docstrings extracted.  This is only useful in
1537db96d56Sopenharmony_ci        conjunction with the -D option above.
1547db96d56Sopenharmony_ci
1557db96d56Sopenharmony_ciIf `inputfile' is -, standard input is read.
1567db96d56Sopenharmony_ci""")
1577db96d56Sopenharmony_ci
1587db96d56Sopenharmony_ciimport os
1597db96d56Sopenharmony_ciimport importlib.machinery
1607db96d56Sopenharmony_ciimport importlib.util
1617db96d56Sopenharmony_ciimport sys
1627db96d56Sopenharmony_ciimport glob
1637db96d56Sopenharmony_ciimport time
1647db96d56Sopenharmony_ciimport getopt
1657db96d56Sopenharmony_ciimport ast
1667db96d56Sopenharmony_ciimport token
1677db96d56Sopenharmony_ciimport tokenize
1687db96d56Sopenharmony_ci
1697db96d56Sopenharmony_ci__version__ = '1.5'
1707db96d56Sopenharmony_ci
1717db96d56Sopenharmony_cidefault_keywords = ['_']
1727db96d56Sopenharmony_ciDEFAULTKEYWORDS = ', '.join(default_keywords)
1737db96d56Sopenharmony_ci
1747db96d56Sopenharmony_ciEMPTYSTRING = ''
1757db96d56Sopenharmony_ci
1767db96d56Sopenharmony_ci
1777db96d56Sopenharmony_ci
1787db96d56Sopenharmony_ci# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
1797db96d56Sopenharmony_ci# there.
1807db96d56Sopenharmony_cipot_header = _('''\
1817db96d56Sopenharmony_ci# SOME DESCRIPTIVE TITLE.
1827db96d56Sopenharmony_ci# Copyright (C) YEAR ORGANIZATION
1837db96d56Sopenharmony_ci# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
1847db96d56Sopenharmony_ci#
1857db96d56Sopenharmony_cimsgid ""
1867db96d56Sopenharmony_cimsgstr ""
1877db96d56Sopenharmony_ci"Project-Id-Version: PACKAGE VERSION\\n"
1887db96d56Sopenharmony_ci"POT-Creation-Date: %(time)s\\n"
1897db96d56Sopenharmony_ci"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
1907db96d56Sopenharmony_ci"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
1917db96d56Sopenharmony_ci"Language-Team: LANGUAGE <LL@li.org>\\n"
1927db96d56Sopenharmony_ci"MIME-Version: 1.0\\n"
1937db96d56Sopenharmony_ci"Content-Type: text/plain; charset=%(charset)s\\n"
1947db96d56Sopenharmony_ci"Content-Transfer-Encoding: %(encoding)s\\n"
1957db96d56Sopenharmony_ci"Generated-By: pygettext.py %(version)s\\n"
1967db96d56Sopenharmony_ci
1977db96d56Sopenharmony_ci''')
1987db96d56Sopenharmony_ci
1997db96d56Sopenharmony_ci
2007db96d56Sopenharmony_cidef usage(code, msg=''):
2017db96d56Sopenharmony_ci    print(__doc__ % globals(), file=sys.stderr)
2027db96d56Sopenharmony_ci    if msg:
2037db96d56Sopenharmony_ci        print(msg, file=sys.stderr)
2047db96d56Sopenharmony_ci    sys.exit(code)
2057db96d56Sopenharmony_ci
2067db96d56Sopenharmony_ci
2077db96d56Sopenharmony_ci
2087db96d56Sopenharmony_cidef make_escapes(pass_nonascii):
2097db96d56Sopenharmony_ci    global escapes, escape
2107db96d56Sopenharmony_ci    if pass_nonascii:
2117db96d56Sopenharmony_ci        # Allow non-ascii characters to pass through so that e.g. 'msgid
2127db96d56Sopenharmony_ci        # "H�he"' would result not result in 'msgid "H\366he"'.  Otherwise we
2137db96d56Sopenharmony_ci        # escape any character outside the 32..126 range.
2147db96d56Sopenharmony_ci        mod = 128
2157db96d56Sopenharmony_ci        escape = escape_ascii
2167db96d56Sopenharmony_ci    else:
2177db96d56Sopenharmony_ci        mod = 256
2187db96d56Sopenharmony_ci        escape = escape_nonascii
2197db96d56Sopenharmony_ci    escapes = [r"\%03o" % i for i in range(mod)]
2207db96d56Sopenharmony_ci    for i in range(32, 127):
2217db96d56Sopenharmony_ci        escapes[i] = chr(i)
2227db96d56Sopenharmony_ci    escapes[ord('\\')] = r'\\'
2237db96d56Sopenharmony_ci    escapes[ord('\t')] = r'\t'
2247db96d56Sopenharmony_ci    escapes[ord('\r')] = r'\r'
2257db96d56Sopenharmony_ci    escapes[ord('\n')] = r'\n'
2267db96d56Sopenharmony_ci    escapes[ord('\"')] = r'\"'
2277db96d56Sopenharmony_ci
2287db96d56Sopenharmony_ci
2297db96d56Sopenharmony_cidef escape_ascii(s, encoding):
2307db96d56Sopenharmony_ci    return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s)
2317db96d56Sopenharmony_ci
2327db96d56Sopenharmony_cidef escape_nonascii(s, encoding):
2337db96d56Sopenharmony_ci    return ''.join(escapes[b] for b in s.encode(encoding))
2347db96d56Sopenharmony_ci
2357db96d56Sopenharmony_ci
2367db96d56Sopenharmony_cidef is_literal_string(s):
2377db96d56Sopenharmony_ci    return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
2387db96d56Sopenharmony_ci
2397db96d56Sopenharmony_ci
2407db96d56Sopenharmony_cidef safe_eval(s):
2417db96d56Sopenharmony_ci    # unwrap quotes, safely
2427db96d56Sopenharmony_ci    return eval(s, {'__builtins__':{}}, {})
2437db96d56Sopenharmony_ci
2447db96d56Sopenharmony_ci
2457db96d56Sopenharmony_cidef normalize(s, encoding):
2467db96d56Sopenharmony_ci    # This converts the various Python string types into a format that is
2477db96d56Sopenharmony_ci    # appropriate for .po files, namely much closer to C style.
2487db96d56Sopenharmony_ci    lines = s.split('\n')
2497db96d56Sopenharmony_ci    if len(lines) == 1:
2507db96d56Sopenharmony_ci        s = '"' + escape(s, encoding) + '"'
2517db96d56Sopenharmony_ci    else:
2527db96d56Sopenharmony_ci        if not lines[-1]:
2537db96d56Sopenharmony_ci            del lines[-1]
2547db96d56Sopenharmony_ci            lines[-1] = lines[-1] + '\n'
2557db96d56Sopenharmony_ci        for i in range(len(lines)):
2567db96d56Sopenharmony_ci            lines[i] = escape(lines[i], encoding)
2577db96d56Sopenharmony_ci        lineterm = '\\n"\n"'
2587db96d56Sopenharmony_ci        s = '""\n"' + lineterm.join(lines) + '"'
2597db96d56Sopenharmony_ci    return s
2607db96d56Sopenharmony_ci
2617db96d56Sopenharmony_ci
2627db96d56Sopenharmony_cidef containsAny(str, set):
2637db96d56Sopenharmony_ci    """Check whether 'str' contains ANY of the chars in 'set'"""
2647db96d56Sopenharmony_ci    return 1 in [c in str for c in set]
2657db96d56Sopenharmony_ci
2667db96d56Sopenharmony_ci
2677db96d56Sopenharmony_cidef getFilesForName(name):
2687db96d56Sopenharmony_ci    """Get a list of module files for a filename, a module or package name,
2697db96d56Sopenharmony_ci    or a directory.
2707db96d56Sopenharmony_ci    """
2717db96d56Sopenharmony_ci    if not os.path.exists(name):
2727db96d56Sopenharmony_ci        # check for glob chars
2737db96d56Sopenharmony_ci        if containsAny(name, "*?[]"):
2747db96d56Sopenharmony_ci            files = glob.glob(name)
2757db96d56Sopenharmony_ci            list = []
2767db96d56Sopenharmony_ci            for file in files:
2777db96d56Sopenharmony_ci                list.extend(getFilesForName(file))
2787db96d56Sopenharmony_ci            return list
2797db96d56Sopenharmony_ci
2807db96d56Sopenharmony_ci        # try to find module or package
2817db96d56Sopenharmony_ci        try:
2827db96d56Sopenharmony_ci            spec = importlib.util.find_spec(name)
2837db96d56Sopenharmony_ci            name = spec.origin
2847db96d56Sopenharmony_ci        except ImportError:
2857db96d56Sopenharmony_ci            name = None
2867db96d56Sopenharmony_ci        if not name:
2877db96d56Sopenharmony_ci            return []
2887db96d56Sopenharmony_ci
2897db96d56Sopenharmony_ci    if os.path.isdir(name):
2907db96d56Sopenharmony_ci        # find all python files in directory
2917db96d56Sopenharmony_ci        list = []
2927db96d56Sopenharmony_ci        # get extension for python source files
2937db96d56Sopenharmony_ci        _py_ext = importlib.machinery.SOURCE_SUFFIXES[0]
2947db96d56Sopenharmony_ci        for root, dirs, files in os.walk(name):
2957db96d56Sopenharmony_ci            # don't recurse into CVS directories
2967db96d56Sopenharmony_ci            if 'CVS' in dirs:
2977db96d56Sopenharmony_ci                dirs.remove('CVS')
2987db96d56Sopenharmony_ci            # add all *.py files to list
2997db96d56Sopenharmony_ci            list.extend(
3007db96d56Sopenharmony_ci                [os.path.join(root, file) for file in files
3017db96d56Sopenharmony_ci                 if os.path.splitext(file)[1] == _py_ext]
3027db96d56Sopenharmony_ci                )
3037db96d56Sopenharmony_ci        return list
3047db96d56Sopenharmony_ci    elif os.path.exists(name):
3057db96d56Sopenharmony_ci        # a single file
3067db96d56Sopenharmony_ci        return [name]
3077db96d56Sopenharmony_ci
3087db96d56Sopenharmony_ci    return []
3097db96d56Sopenharmony_ci
3107db96d56Sopenharmony_ci
3117db96d56Sopenharmony_ciclass TokenEater:
3127db96d56Sopenharmony_ci    def __init__(self, options):
3137db96d56Sopenharmony_ci        self.__options = options
3147db96d56Sopenharmony_ci        self.__messages = {}
3157db96d56Sopenharmony_ci        self.__state = self.__waiting
3167db96d56Sopenharmony_ci        self.__data = []
3177db96d56Sopenharmony_ci        self.__lineno = -1
3187db96d56Sopenharmony_ci        self.__freshmodule = 1
3197db96d56Sopenharmony_ci        self.__curfile = None
3207db96d56Sopenharmony_ci        self.__enclosurecount = 0
3217db96d56Sopenharmony_ci
3227db96d56Sopenharmony_ci    def __call__(self, ttype, tstring, stup, etup, line):
3237db96d56Sopenharmony_ci        # dispatch
3247db96d56Sopenharmony_ci##        import token
3257db96d56Sopenharmony_ci##        print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
3267db96d56Sopenharmony_ci##              file=sys.stderr)
3277db96d56Sopenharmony_ci        self.__state(ttype, tstring, stup[0])
3287db96d56Sopenharmony_ci
3297db96d56Sopenharmony_ci    def __waiting(self, ttype, tstring, lineno):
3307db96d56Sopenharmony_ci        opts = self.__options
3317db96d56Sopenharmony_ci        # Do docstring extractions, if enabled
3327db96d56Sopenharmony_ci        if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
3337db96d56Sopenharmony_ci            # module docstring?
3347db96d56Sopenharmony_ci            if self.__freshmodule:
3357db96d56Sopenharmony_ci                if ttype == tokenize.STRING and is_literal_string(tstring):
3367db96d56Sopenharmony_ci                    self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
3377db96d56Sopenharmony_ci                    self.__freshmodule = 0
3387db96d56Sopenharmony_ci                    return
3397db96d56Sopenharmony_ci                if ttype in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
3407db96d56Sopenharmony_ci                    return
3417db96d56Sopenharmony_ci                self.__freshmodule = 0
3427db96d56Sopenharmony_ci            # class or func/method docstring?
3437db96d56Sopenharmony_ci            if ttype == tokenize.NAME and tstring in ('class', 'def'):
3447db96d56Sopenharmony_ci                self.__state = self.__suiteseen
3457db96d56Sopenharmony_ci                return
3467db96d56Sopenharmony_ci        if ttype == tokenize.NAME and tstring in opts.keywords:
3477db96d56Sopenharmony_ci            self.__state = self.__keywordseen
3487db96d56Sopenharmony_ci            return
3497db96d56Sopenharmony_ci        if ttype == tokenize.STRING:
3507db96d56Sopenharmony_ci            maybe_fstring = ast.parse(tstring, mode='eval').body
3517db96d56Sopenharmony_ci            if not isinstance(maybe_fstring, ast.JoinedStr):
3527db96d56Sopenharmony_ci                return
3537db96d56Sopenharmony_ci            for value in filter(lambda node: isinstance(node, ast.FormattedValue),
3547db96d56Sopenharmony_ci                                maybe_fstring.values):
3557db96d56Sopenharmony_ci                for call in filter(lambda node: isinstance(node, ast.Call),
3567db96d56Sopenharmony_ci                                   ast.walk(value)):
3577db96d56Sopenharmony_ci                    func = call.func
3587db96d56Sopenharmony_ci                    if isinstance(func, ast.Name):
3597db96d56Sopenharmony_ci                        func_name = func.id
3607db96d56Sopenharmony_ci                    elif isinstance(func, ast.Attribute):
3617db96d56Sopenharmony_ci                        func_name = func.attr
3627db96d56Sopenharmony_ci                    else:
3637db96d56Sopenharmony_ci                        continue
3647db96d56Sopenharmony_ci
3657db96d56Sopenharmony_ci                    if func_name not in opts.keywords:
3667db96d56Sopenharmony_ci                        continue
3677db96d56Sopenharmony_ci                    if len(call.args) != 1:
3687db96d56Sopenharmony_ci                        print(_(
3697db96d56Sopenharmony_ci                            '*** %(file)s:%(lineno)s: Seen unexpected amount of'
3707db96d56Sopenharmony_ci                            ' positional arguments in gettext call: %(source_segment)s'
3717db96d56Sopenharmony_ci                            ) % {
3727db96d56Sopenharmony_ci                            'source_segment': ast.get_source_segment(tstring, call) or tstring,
3737db96d56Sopenharmony_ci                            'file': self.__curfile,
3747db96d56Sopenharmony_ci                            'lineno': lineno
3757db96d56Sopenharmony_ci                            }, file=sys.stderr)
3767db96d56Sopenharmony_ci                        continue
3777db96d56Sopenharmony_ci                    if call.keywords:
3787db96d56Sopenharmony_ci                        print(_(
3797db96d56Sopenharmony_ci                            '*** %(file)s:%(lineno)s: Seen unexpected keyword arguments'
3807db96d56Sopenharmony_ci                            ' in gettext call: %(source_segment)s'
3817db96d56Sopenharmony_ci                            ) % {
3827db96d56Sopenharmony_ci                            'source_segment': ast.get_source_segment(tstring, call) or tstring,
3837db96d56Sopenharmony_ci                            'file': self.__curfile,
3847db96d56Sopenharmony_ci                            'lineno': lineno
3857db96d56Sopenharmony_ci                            }, file=sys.stderr)
3867db96d56Sopenharmony_ci                        continue
3877db96d56Sopenharmony_ci                    arg = call.args[0]
3887db96d56Sopenharmony_ci                    if not isinstance(arg, ast.Constant):
3897db96d56Sopenharmony_ci                        print(_(
3907db96d56Sopenharmony_ci                            '*** %(file)s:%(lineno)s: Seen unexpected argument type'
3917db96d56Sopenharmony_ci                            ' in gettext call: %(source_segment)s'
3927db96d56Sopenharmony_ci                            ) % {
3937db96d56Sopenharmony_ci                            'source_segment': ast.get_source_segment(tstring, call) or tstring,
3947db96d56Sopenharmony_ci                            'file': self.__curfile,
3957db96d56Sopenharmony_ci                            'lineno': lineno
3967db96d56Sopenharmony_ci                            }, file=sys.stderr)
3977db96d56Sopenharmony_ci                        continue
3987db96d56Sopenharmony_ci                    if isinstance(arg.value, str):
3997db96d56Sopenharmony_ci                        self.__addentry(arg.value, lineno)
4007db96d56Sopenharmony_ci
4017db96d56Sopenharmony_ci    def __suiteseen(self, ttype, tstring, lineno):
4027db96d56Sopenharmony_ci        # skip over any enclosure pairs until we see the colon
4037db96d56Sopenharmony_ci        if ttype == tokenize.OP:
4047db96d56Sopenharmony_ci            if tstring == ':' and self.__enclosurecount == 0:
4057db96d56Sopenharmony_ci                # we see a colon and we're not in an enclosure: end of def
4067db96d56Sopenharmony_ci                self.__state = self.__suitedocstring
4077db96d56Sopenharmony_ci            elif tstring in '([{':
4087db96d56Sopenharmony_ci                self.__enclosurecount += 1
4097db96d56Sopenharmony_ci            elif tstring in ')]}':
4107db96d56Sopenharmony_ci                self.__enclosurecount -= 1
4117db96d56Sopenharmony_ci
4127db96d56Sopenharmony_ci    def __suitedocstring(self, ttype, tstring, lineno):
4137db96d56Sopenharmony_ci        # ignore any intervening noise
4147db96d56Sopenharmony_ci        if ttype == tokenize.STRING and is_literal_string(tstring):
4157db96d56Sopenharmony_ci            self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
4167db96d56Sopenharmony_ci            self.__state = self.__waiting
4177db96d56Sopenharmony_ci        elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
4187db96d56Sopenharmony_ci                           tokenize.COMMENT):
4197db96d56Sopenharmony_ci            # there was no class docstring
4207db96d56Sopenharmony_ci            self.__state = self.__waiting
4217db96d56Sopenharmony_ci
4227db96d56Sopenharmony_ci    def __keywordseen(self, ttype, tstring, lineno):
4237db96d56Sopenharmony_ci        if ttype == tokenize.OP and tstring == '(':
4247db96d56Sopenharmony_ci            self.__data = []
4257db96d56Sopenharmony_ci            self.__lineno = lineno
4267db96d56Sopenharmony_ci            self.__state = self.__openseen
4277db96d56Sopenharmony_ci        else:
4287db96d56Sopenharmony_ci            self.__state = self.__waiting
4297db96d56Sopenharmony_ci
4307db96d56Sopenharmony_ci    def __openseen(self, ttype, tstring, lineno):
4317db96d56Sopenharmony_ci        if ttype == tokenize.OP and tstring == ')':
4327db96d56Sopenharmony_ci            # We've seen the last of the translatable strings.  Record the
4337db96d56Sopenharmony_ci            # line number of the first line of the strings and update the list
4347db96d56Sopenharmony_ci            # of messages seen.  Reset state for the next batch.  If there
4357db96d56Sopenharmony_ci            # were no strings inside _(), then just ignore this entry.
4367db96d56Sopenharmony_ci            if self.__data:
4377db96d56Sopenharmony_ci                self.__addentry(EMPTYSTRING.join(self.__data))
4387db96d56Sopenharmony_ci            self.__state = self.__waiting
4397db96d56Sopenharmony_ci        elif ttype == tokenize.STRING and is_literal_string(tstring):
4407db96d56Sopenharmony_ci            self.__data.append(safe_eval(tstring))
4417db96d56Sopenharmony_ci        elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
4427db96d56Sopenharmony_ci                           token.NEWLINE, tokenize.NL]:
4437db96d56Sopenharmony_ci            # warn if we see anything else than STRING or whitespace
4447db96d56Sopenharmony_ci            print(_(
4457db96d56Sopenharmony_ci                '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
4467db96d56Sopenharmony_ci                ) % {
4477db96d56Sopenharmony_ci                'token': tstring,
4487db96d56Sopenharmony_ci                'file': self.__curfile,
4497db96d56Sopenharmony_ci                'lineno': self.__lineno
4507db96d56Sopenharmony_ci                }, file=sys.stderr)
4517db96d56Sopenharmony_ci            self.__state = self.__waiting
4527db96d56Sopenharmony_ci
4537db96d56Sopenharmony_ci    def __addentry(self, msg, lineno=None, isdocstring=0):
4547db96d56Sopenharmony_ci        if lineno is None:
4557db96d56Sopenharmony_ci            lineno = self.__lineno
4567db96d56Sopenharmony_ci        if not msg in self.__options.toexclude:
4577db96d56Sopenharmony_ci            entry = (self.__curfile, lineno)
4587db96d56Sopenharmony_ci            self.__messages.setdefault(msg, {})[entry] = isdocstring
4597db96d56Sopenharmony_ci
4607db96d56Sopenharmony_ci    def set_filename(self, filename):
4617db96d56Sopenharmony_ci        self.__curfile = filename
4627db96d56Sopenharmony_ci        self.__freshmodule = 1
4637db96d56Sopenharmony_ci
4647db96d56Sopenharmony_ci    def write(self, fp):
4657db96d56Sopenharmony_ci        options = self.__options
4667db96d56Sopenharmony_ci        timestamp = time.strftime('%Y-%m-%d %H:%M%z')
4677db96d56Sopenharmony_ci        encoding = fp.encoding if fp.encoding else 'UTF-8'
4687db96d56Sopenharmony_ci        print(pot_header % {'time': timestamp, 'version': __version__,
4697db96d56Sopenharmony_ci                            'charset': encoding,
4707db96d56Sopenharmony_ci                            'encoding': '8bit'}, file=fp)
4717db96d56Sopenharmony_ci        # Sort the entries.  First sort each particular entry's keys, then
4727db96d56Sopenharmony_ci        # sort all the entries by their first item.
4737db96d56Sopenharmony_ci        reverse = {}
4747db96d56Sopenharmony_ci        for k, v in self.__messages.items():
4757db96d56Sopenharmony_ci            keys = sorted(v.keys())
4767db96d56Sopenharmony_ci            reverse.setdefault(tuple(keys), []).append((k, v))
4777db96d56Sopenharmony_ci        rkeys = sorted(reverse.keys())
4787db96d56Sopenharmony_ci        for rkey in rkeys:
4797db96d56Sopenharmony_ci            rentries = reverse[rkey]
4807db96d56Sopenharmony_ci            rentries.sort()
4817db96d56Sopenharmony_ci            for k, v in rentries:
4827db96d56Sopenharmony_ci                # If the entry was gleaned out of a docstring, then add a
4837db96d56Sopenharmony_ci                # comment stating so.  This is to aid translators who may wish
4847db96d56Sopenharmony_ci                # to skip translating some unimportant docstrings.
4857db96d56Sopenharmony_ci                isdocstring = any(v.values())
4867db96d56Sopenharmony_ci                # k is the message string, v is a dictionary-set of (filename,
4877db96d56Sopenharmony_ci                # lineno) tuples.  We want to sort the entries in v first by
4887db96d56Sopenharmony_ci                # file name and then by line number.
4897db96d56Sopenharmony_ci                v = sorted(v.keys())
4907db96d56Sopenharmony_ci                if not options.writelocations:
4917db96d56Sopenharmony_ci                    pass
4927db96d56Sopenharmony_ci                # location comments are different b/w Solaris and GNU:
4937db96d56Sopenharmony_ci                elif options.locationstyle == options.SOLARIS:
4947db96d56Sopenharmony_ci                    for filename, lineno in v:
4957db96d56Sopenharmony_ci                        d = {'filename': filename, 'lineno': lineno}
4967db96d56Sopenharmony_ci                        print(_(
4977db96d56Sopenharmony_ci                            '# File: %(filename)s, line: %(lineno)d') % d, file=fp)
4987db96d56Sopenharmony_ci                elif options.locationstyle == options.GNU:
4997db96d56Sopenharmony_ci                    # fit as many locations on one line, as long as the
5007db96d56Sopenharmony_ci                    # resulting line length doesn't exceed 'options.width'
5017db96d56Sopenharmony_ci                    locline = '#:'
5027db96d56Sopenharmony_ci                    for filename, lineno in v:
5037db96d56Sopenharmony_ci                        d = {'filename': filename, 'lineno': lineno}
5047db96d56Sopenharmony_ci                        s = _(' %(filename)s:%(lineno)d') % d
5057db96d56Sopenharmony_ci                        if len(locline) + len(s) <= options.width:
5067db96d56Sopenharmony_ci                            locline = locline + s
5077db96d56Sopenharmony_ci                        else:
5087db96d56Sopenharmony_ci                            print(locline, file=fp)
5097db96d56Sopenharmony_ci                            locline = "#:" + s
5107db96d56Sopenharmony_ci                    if len(locline) > 2:
5117db96d56Sopenharmony_ci                        print(locline, file=fp)
5127db96d56Sopenharmony_ci                if isdocstring:
5137db96d56Sopenharmony_ci                    print('#, docstring', file=fp)
5147db96d56Sopenharmony_ci                print('msgid', normalize(k, encoding), file=fp)
5157db96d56Sopenharmony_ci                print('msgstr ""\n', file=fp)
5167db96d56Sopenharmony_ci
5177db96d56Sopenharmony_ci
5187db96d56Sopenharmony_ci
5197db96d56Sopenharmony_cidef main():
5207db96d56Sopenharmony_ci    global default_keywords
5217db96d56Sopenharmony_ci    try:
5227db96d56Sopenharmony_ci        opts, args = getopt.getopt(
5237db96d56Sopenharmony_ci            sys.argv[1:],
5247db96d56Sopenharmony_ci            'ad:DEhk:Kno:p:S:Vvw:x:X:',
5257db96d56Sopenharmony_ci            ['extract-all', 'default-domain=', 'escape', 'help',
5267db96d56Sopenharmony_ci             'keyword=', 'no-default-keywords',
5277db96d56Sopenharmony_ci             'add-location', 'no-location', 'output=', 'output-dir=',
5287db96d56Sopenharmony_ci             'style=', 'verbose', 'version', 'width=', 'exclude-file=',
5297db96d56Sopenharmony_ci             'docstrings', 'no-docstrings',
5307db96d56Sopenharmony_ci             ])
5317db96d56Sopenharmony_ci    except getopt.error as msg:
5327db96d56Sopenharmony_ci        usage(1, msg)
5337db96d56Sopenharmony_ci
5347db96d56Sopenharmony_ci    # for holding option values
5357db96d56Sopenharmony_ci    class Options:
5367db96d56Sopenharmony_ci        # constants
5377db96d56Sopenharmony_ci        GNU = 1
5387db96d56Sopenharmony_ci        SOLARIS = 2
5397db96d56Sopenharmony_ci        # defaults
5407db96d56Sopenharmony_ci        extractall = 0 # FIXME: currently this option has no effect at all.
5417db96d56Sopenharmony_ci        escape = 0
5427db96d56Sopenharmony_ci        keywords = []
5437db96d56Sopenharmony_ci        outpath = ''
5447db96d56Sopenharmony_ci        outfile = 'messages.pot'
5457db96d56Sopenharmony_ci        writelocations = 1
5467db96d56Sopenharmony_ci        locationstyle = GNU
5477db96d56Sopenharmony_ci        verbose = 0
5487db96d56Sopenharmony_ci        width = 78
5497db96d56Sopenharmony_ci        excludefilename = ''
5507db96d56Sopenharmony_ci        docstrings = 0
5517db96d56Sopenharmony_ci        nodocstrings = {}
5527db96d56Sopenharmony_ci
5537db96d56Sopenharmony_ci    options = Options()
5547db96d56Sopenharmony_ci    locations = {'gnu' : options.GNU,
5557db96d56Sopenharmony_ci                 'solaris' : options.SOLARIS,
5567db96d56Sopenharmony_ci                 }
5577db96d56Sopenharmony_ci
5587db96d56Sopenharmony_ci    # parse options
5597db96d56Sopenharmony_ci    for opt, arg in opts:
5607db96d56Sopenharmony_ci        if opt in ('-h', '--help'):
5617db96d56Sopenharmony_ci            usage(0)
5627db96d56Sopenharmony_ci        elif opt in ('-a', '--extract-all'):
5637db96d56Sopenharmony_ci            options.extractall = 1
5647db96d56Sopenharmony_ci        elif opt in ('-d', '--default-domain'):
5657db96d56Sopenharmony_ci            options.outfile = arg + '.pot'
5667db96d56Sopenharmony_ci        elif opt in ('-E', '--escape'):
5677db96d56Sopenharmony_ci            options.escape = 1
5687db96d56Sopenharmony_ci        elif opt in ('-D', '--docstrings'):
5697db96d56Sopenharmony_ci            options.docstrings = 1
5707db96d56Sopenharmony_ci        elif opt in ('-k', '--keyword'):
5717db96d56Sopenharmony_ci            options.keywords.append(arg)
5727db96d56Sopenharmony_ci        elif opt in ('-K', '--no-default-keywords'):
5737db96d56Sopenharmony_ci            default_keywords = []
5747db96d56Sopenharmony_ci        elif opt in ('-n', '--add-location'):
5757db96d56Sopenharmony_ci            options.writelocations = 1
5767db96d56Sopenharmony_ci        elif opt in ('--no-location',):
5777db96d56Sopenharmony_ci            options.writelocations = 0
5787db96d56Sopenharmony_ci        elif opt in ('-S', '--style'):
5797db96d56Sopenharmony_ci            options.locationstyle = locations.get(arg.lower())
5807db96d56Sopenharmony_ci            if options.locationstyle is None:
5817db96d56Sopenharmony_ci                usage(1, _('Invalid value for --style: %s') % arg)
5827db96d56Sopenharmony_ci        elif opt in ('-o', '--output'):
5837db96d56Sopenharmony_ci            options.outfile = arg
5847db96d56Sopenharmony_ci        elif opt in ('-p', '--output-dir'):
5857db96d56Sopenharmony_ci            options.outpath = arg
5867db96d56Sopenharmony_ci        elif opt in ('-v', '--verbose'):
5877db96d56Sopenharmony_ci            options.verbose = 1
5887db96d56Sopenharmony_ci        elif opt in ('-V', '--version'):
5897db96d56Sopenharmony_ci            print(_('pygettext.py (xgettext for Python) %s') % __version__)
5907db96d56Sopenharmony_ci            sys.exit(0)
5917db96d56Sopenharmony_ci        elif opt in ('-w', '--width'):
5927db96d56Sopenharmony_ci            try:
5937db96d56Sopenharmony_ci                options.width = int(arg)
5947db96d56Sopenharmony_ci            except ValueError:
5957db96d56Sopenharmony_ci                usage(1, _('--width argument must be an integer: %s') % arg)
5967db96d56Sopenharmony_ci        elif opt in ('-x', '--exclude-file'):
5977db96d56Sopenharmony_ci            options.excludefilename = arg
5987db96d56Sopenharmony_ci        elif opt in ('-X', '--no-docstrings'):
5997db96d56Sopenharmony_ci            fp = open(arg)
6007db96d56Sopenharmony_ci            try:
6017db96d56Sopenharmony_ci                while 1:
6027db96d56Sopenharmony_ci                    line = fp.readline()
6037db96d56Sopenharmony_ci                    if not line:
6047db96d56Sopenharmony_ci                        break
6057db96d56Sopenharmony_ci                    options.nodocstrings[line[:-1]] = 1
6067db96d56Sopenharmony_ci            finally:
6077db96d56Sopenharmony_ci                fp.close()
6087db96d56Sopenharmony_ci
6097db96d56Sopenharmony_ci    # calculate escapes
6107db96d56Sopenharmony_ci    make_escapes(not options.escape)
6117db96d56Sopenharmony_ci
6127db96d56Sopenharmony_ci    # calculate all keywords
6137db96d56Sopenharmony_ci    options.keywords.extend(default_keywords)
6147db96d56Sopenharmony_ci
6157db96d56Sopenharmony_ci    # initialize list of strings to exclude
6167db96d56Sopenharmony_ci    if options.excludefilename:
6177db96d56Sopenharmony_ci        try:
6187db96d56Sopenharmony_ci            with open(options.excludefilename) as fp:
6197db96d56Sopenharmony_ci                options.toexclude = fp.readlines()
6207db96d56Sopenharmony_ci        except IOError:
6217db96d56Sopenharmony_ci            print(_(
6227db96d56Sopenharmony_ci                "Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr)
6237db96d56Sopenharmony_ci            sys.exit(1)
6247db96d56Sopenharmony_ci    else:
6257db96d56Sopenharmony_ci        options.toexclude = []
6267db96d56Sopenharmony_ci
6277db96d56Sopenharmony_ci    # resolve args to module lists
6287db96d56Sopenharmony_ci    expanded = []
6297db96d56Sopenharmony_ci    for arg in args:
6307db96d56Sopenharmony_ci        if arg == '-':
6317db96d56Sopenharmony_ci            expanded.append(arg)
6327db96d56Sopenharmony_ci        else:
6337db96d56Sopenharmony_ci            expanded.extend(getFilesForName(arg))
6347db96d56Sopenharmony_ci    args = expanded
6357db96d56Sopenharmony_ci
6367db96d56Sopenharmony_ci    # slurp through all the files
6377db96d56Sopenharmony_ci    eater = TokenEater(options)
6387db96d56Sopenharmony_ci    for filename in args:
6397db96d56Sopenharmony_ci        if filename == '-':
6407db96d56Sopenharmony_ci            if options.verbose:
6417db96d56Sopenharmony_ci                print(_('Reading standard input'))
6427db96d56Sopenharmony_ci            fp = sys.stdin.buffer
6437db96d56Sopenharmony_ci            closep = 0
6447db96d56Sopenharmony_ci        else:
6457db96d56Sopenharmony_ci            if options.verbose:
6467db96d56Sopenharmony_ci                print(_('Working on %s') % filename)
6477db96d56Sopenharmony_ci            fp = open(filename, 'rb')
6487db96d56Sopenharmony_ci            closep = 1
6497db96d56Sopenharmony_ci        try:
6507db96d56Sopenharmony_ci            eater.set_filename(filename)
6517db96d56Sopenharmony_ci            try:
6527db96d56Sopenharmony_ci                tokens = tokenize.tokenize(fp.readline)
6537db96d56Sopenharmony_ci                for _token in tokens:
6547db96d56Sopenharmony_ci                    eater(*_token)
6557db96d56Sopenharmony_ci            except tokenize.TokenError as e:
6567db96d56Sopenharmony_ci                print('%s: %s, line %d, column %d' % (
6577db96d56Sopenharmony_ci                    e.args[0], filename, e.args[1][0], e.args[1][1]),
6587db96d56Sopenharmony_ci                    file=sys.stderr)
6597db96d56Sopenharmony_ci        finally:
6607db96d56Sopenharmony_ci            if closep:
6617db96d56Sopenharmony_ci                fp.close()
6627db96d56Sopenharmony_ci
6637db96d56Sopenharmony_ci    # write the output
6647db96d56Sopenharmony_ci    if options.outfile == '-':
6657db96d56Sopenharmony_ci        fp = sys.stdout
6667db96d56Sopenharmony_ci        closep = 0
6677db96d56Sopenharmony_ci    else:
6687db96d56Sopenharmony_ci        if options.outpath:
6697db96d56Sopenharmony_ci            options.outfile = os.path.join(options.outpath, options.outfile)
6707db96d56Sopenharmony_ci        fp = open(options.outfile, 'w')
6717db96d56Sopenharmony_ci        closep = 1
6727db96d56Sopenharmony_ci    try:
6737db96d56Sopenharmony_ci        eater.write(fp)
6747db96d56Sopenharmony_ci    finally:
6757db96d56Sopenharmony_ci        if closep:
6767db96d56Sopenharmony_ci            fp.close()
6777db96d56Sopenharmony_ci
6787db96d56Sopenharmony_ci
6797db96d56Sopenharmony_ciif __name__ == '__main__':
6807db96d56Sopenharmony_ci    main()
6817db96d56Sopenharmony_ci    # some more test strings
6827db96d56Sopenharmony_ci    # this one creates a warning
6837db96d56Sopenharmony_ci    _('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
6847db96d56Sopenharmony_ci    _('more' 'than' 'one' 'string')
685