17db96d56Sopenharmony_ci#! /usr/bin/env python3 27db96d56Sopenharmony_ci# -*- coding: iso-8859-1 -*- 37db96d56Sopenharmony_ci# Originally written by Barry Warsaw <barry@python.org> 47db96d56Sopenharmony_ci# 57db96d56Sopenharmony_ci# Minimally patched to make it even more xgettext compatible 67db96d56Sopenharmony_ci# by Peter Funk <pf@artcom-gmbh.de> 77db96d56Sopenharmony_ci# 87db96d56Sopenharmony_ci# 2002-11-22 J�rgen Hermann <jh@web.de> 97db96d56Sopenharmony_ci# Added checks that _() only contains string literals, and 107db96d56Sopenharmony_ci# command line args are resolved to module lists, i.e. you 117db96d56Sopenharmony_ci# can now pass a filename, a module or package name, or a 127db96d56Sopenharmony_ci# directory (including globbing chars, important for Win32). 137db96d56Sopenharmony_ci# Made docstring fit in 80 chars wide displays using pydoc. 147db96d56Sopenharmony_ci# 157db96d56Sopenharmony_ci 167db96d56Sopenharmony_ci# for selftesting 177db96d56Sopenharmony_citry: 187db96d56Sopenharmony_ci import fintl 197db96d56Sopenharmony_ci _ = fintl.gettext 207db96d56Sopenharmony_ciexcept ImportError: 217db96d56Sopenharmony_ci _ = lambda s: s 227db96d56Sopenharmony_ci 237db96d56Sopenharmony_ci__doc__ = _("""pygettext -- Python equivalent of xgettext(1) 247db96d56Sopenharmony_ci 257db96d56Sopenharmony_ciMany systems (Solaris, Linux, Gnu) provide extensive tools that ease the 267db96d56Sopenharmony_ciinternationalization of C programs. Most of these tools are independent of 277db96d56Sopenharmony_cithe programming language and can be used from within Python programs. 287db96d56Sopenharmony_ciMartin von Loewis' work[1] helps considerably in this regard. 297db96d56Sopenharmony_ci 307db96d56Sopenharmony_ciThere's one problem though; xgettext is the program that scans source code 317db96d56Sopenharmony_cilooking for message strings, but it groks only C (or C++). Python 327db96d56Sopenharmony_ciintroduces a few wrinkles, such as dual quoting characters, triple quoted 337db96d56Sopenharmony_cistrings, and raw strings. xgettext understands none of this. 347db96d56Sopenharmony_ci 357db96d56Sopenharmony_ciEnter pygettext, which uses Python's standard tokenize module to scan 367db96d56Sopenharmony_ciPython source code, generating .pot files identical to what GNU xgettext[2] 377db96d56Sopenharmony_cigenerates for C and C++ code. From there, the standard GNU tools can be 387db96d56Sopenharmony_ciused. 397db96d56Sopenharmony_ci 407db96d56Sopenharmony_ciA word about marking Python strings as candidates for translation. GNU 417db96d56Sopenharmony_cixgettext recognizes the following keywords: gettext, dgettext, dcgettext, 427db96d56Sopenharmony_ciand gettext_noop. But those can be a lot of text to include all over your 437db96d56Sopenharmony_cicode. C and C++ have a trick: they use the C preprocessor. Most 447db96d56Sopenharmony_ciinternationalized C source includes a #define for gettext() to _() so that 457db96d56Sopenharmony_ciwhat has to be written in the source is much less. Thus these are both 467db96d56Sopenharmony_citranslatable strings: 477db96d56Sopenharmony_ci 487db96d56Sopenharmony_ci gettext("Translatable String") 497db96d56Sopenharmony_ci _("Translatable String") 507db96d56Sopenharmony_ci 517db96d56Sopenharmony_ciPython of course has no preprocessor so this doesn't work so well. Thus, 527db96d56Sopenharmony_cipygettext searches only for _() by default, but see the -k/--keyword flag 537db96d56Sopenharmony_cibelow for how to augment this. 547db96d56Sopenharmony_ci 557db96d56Sopenharmony_ci [1] https://www.python.org/workshops/1997-10/proceedings/loewis.html 567db96d56Sopenharmony_ci [2] https://www.gnu.org/software/gettext/gettext.html 577db96d56Sopenharmony_ci 587db96d56Sopenharmony_ciNOTE: pygettext attempts to be option and feature compatible with GNU 597db96d56Sopenharmony_cixgettext where ever possible. However some options are still missing or are 607db96d56Sopenharmony_cinot fully implemented. Also, xgettext's use of command line switches with 617db96d56Sopenharmony_cioption arguments is broken, and in these cases, pygettext just defines 627db96d56Sopenharmony_ciadditional switches. 637db96d56Sopenharmony_ci 647db96d56Sopenharmony_ciUsage: pygettext [options] inputfile ... 657db96d56Sopenharmony_ci 667db96d56Sopenharmony_ciOptions: 677db96d56Sopenharmony_ci 687db96d56Sopenharmony_ci -a 697db96d56Sopenharmony_ci --extract-all 707db96d56Sopenharmony_ci Extract all strings. 717db96d56Sopenharmony_ci 727db96d56Sopenharmony_ci -d name 737db96d56Sopenharmony_ci --default-domain=name 747db96d56Sopenharmony_ci Rename the default output file from messages.pot to name.pot. 757db96d56Sopenharmony_ci 767db96d56Sopenharmony_ci -E 777db96d56Sopenharmony_ci --escape 787db96d56Sopenharmony_ci Replace non-ASCII characters with octal escape sequences. 797db96d56Sopenharmony_ci 807db96d56Sopenharmony_ci -D 817db96d56Sopenharmony_ci --docstrings 827db96d56Sopenharmony_ci Extract module, class, method, and function docstrings. These do 837db96d56Sopenharmony_ci not need to be wrapped in _() markers, and in fact cannot be for 847db96d56Sopenharmony_ci Python to consider them docstrings. (See also the -X option). 857db96d56Sopenharmony_ci 867db96d56Sopenharmony_ci -h 877db96d56Sopenharmony_ci --help 887db96d56Sopenharmony_ci Print this help message and exit. 897db96d56Sopenharmony_ci 907db96d56Sopenharmony_ci -k word 917db96d56Sopenharmony_ci --keyword=word 927db96d56Sopenharmony_ci Keywords to look for in addition to the default set, which are: 937db96d56Sopenharmony_ci %(DEFAULTKEYWORDS)s 947db96d56Sopenharmony_ci 957db96d56Sopenharmony_ci You can have multiple -k flags on the command line. 967db96d56Sopenharmony_ci 977db96d56Sopenharmony_ci -K 987db96d56Sopenharmony_ci --no-default-keywords 997db96d56Sopenharmony_ci Disable the default set of keywords (see above). Any keywords 1007db96d56Sopenharmony_ci explicitly added with the -k/--keyword option are still recognized. 1017db96d56Sopenharmony_ci 1027db96d56Sopenharmony_ci --no-location 1037db96d56Sopenharmony_ci Do not write filename/lineno location comments. 1047db96d56Sopenharmony_ci 1057db96d56Sopenharmony_ci -n 1067db96d56Sopenharmony_ci --add-location 1077db96d56Sopenharmony_ci Write filename/lineno location comments indicating where each 1087db96d56Sopenharmony_ci extracted string is found in the source. These lines appear before 1097db96d56Sopenharmony_ci each msgid. The style of comments is controlled by the -S/--style 1107db96d56Sopenharmony_ci option. This is the default. 1117db96d56Sopenharmony_ci 1127db96d56Sopenharmony_ci -o filename 1137db96d56Sopenharmony_ci --output=filename 1147db96d56Sopenharmony_ci Rename the default output file from messages.pot to filename. If 1157db96d56Sopenharmony_ci filename is `-' then the output is sent to standard out. 1167db96d56Sopenharmony_ci 1177db96d56Sopenharmony_ci -p dir 1187db96d56Sopenharmony_ci --output-dir=dir 1197db96d56Sopenharmony_ci Output files will be placed in directory dir. 1207db96d56Sopenharmony_ci 1217db96d56Sopenharmony_ci -S stylename 1227db96d56Sopenharmony_ci --style stylename 1237db96d56Sopenharmony_ci Specify which style to use for location comments. Two styles are 1247db96d56Sopenharmony_ci supported: 1257db96d56Sopenharmony_ci 1267db96d56Sopenharmony_ci Solaris # File: filename, line: line-number 1277db96d56Sopenharmony_ci GNU #: filename:line 1287db96d56Sopenharmony_ci 1297db96d56Sopenharmony_ci The style name is case insensitive. GNU style is the default. 1307db96d56Sopenharmony_ci 1317db96d56Sopenharmony_ci -v 1327db96d56Sopenharmony_ci --verbose 1337db96d56Sopenharmony_ci Print the names of the files being processed. 1347db96d56Sopenharmony_ci 1357db96d56Sopenharmony_ci -V 1367db96d56Sopenharmony_ci --version 1377db96d56Sopenharmony_ci Print the version of pygettext and exit. 1387db96d56Sopenharmony_ci 1397db96d56Sopenharmony_ci -w columns 1407db96d56Sopenharmony_ci --width=columns 1417db96d56Sopenharmony_ci Set width of output to columns. 1427db96d56Sopenharmony_ci 1437db96d56Sopenharmony_ci -x filename 1447db96d56Sopenharmony_ci --exclude-file=filename 1457db96d56Sopenharmony_ci Specify a file that contains a list of strings that are not be 1467db96d56Sopenharmony_ci extracted from the input files. Each string to be excluded must 1477db96d56Sopenharmony_ci appear on a line by itself in the file. 1487db96d56Sopenharmony_ci 1497db96d56Sopenharmony_ci -X filename 1507db96d56Sopenharmony_ci --no-docstrings=filename 1517db96d56Sopenharmony_ci Specify a file that contains a list of files (one per line) that 1527db96d56Sopenharmony_ci should not have their docstrings extracted. This is only useful in 1537db96d56Sopenharmony_ci conjunction with the -D option above. 1547db96d56Sopenharmony_ci 1557db96d56Sopenharmony_ciIf `inputfile' is -, standard input is read. 1567db96d56Sopenharmony_ci""") 1577db96d56Sopenharmony_ci 1587db96d56Sopenharmony_ciimport os 1597db96d56Sopenharmony_ciimport importlib.machinery 1607db96d56Sopenharmony_ciimport importlib.util 1617db96d56Sopenharmony_ciimport sys 1627db96d56Sopenharmony_ciimport glob 1637db96d56Sopenharmony_ciimport time 1647db96d56Sopenharmony_ciimport getopt 1657db96d56Sopenharmony_ciimport ast 1667db96d56Sopenharmony_ciimport token 1677db96d56Sopenharmony_ciimport tokenize 1687db96d56Sopenharmony_ci 1697db96d56Sopenharmony_ci__version__ = '1.5' 1707db96d56Sopenharmony_ci 1717db96d56Sopenharmony_cidefault_keywords = ['_'] 1727db96d56Sopenharmony_ciDEFAULTKEYWORDS = ', '.join(default_keywords) 1737db96d56Sopenharmony_ci 1747db96d56Sopenharmony_ciEMPTYSTRING = '' 1757db96d56Sopenharmony_ci 1767db96d56Sopenharmony_ci 1777db96d56Sopenharmony_ci 1787db96d56Sopenharmony_ci# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's 1797db96d56Sopenharmony_ci# there. 1807db96d56Sopenharmony_cipot_header = _('''\ 1817db96d56Sopenharmony_ci# SOME DESCRIPTIVE TITLE. 1827db96d56Sopenharmony_ci# Copyright (C) YEAR ORGANIZATION 1837db96d56Sopenharmony_ci# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. 1847db96d56Sopenharmony_ci# 1857db96d56Sopenharmony_cimsgid "" 1867db96d56Sopenharmony_cimsgstr "" 1877db96d56Sopenharmony_ci"Project-Id-Version: PACKAGE VERSION\\n" 1887db96d56Sopenharmony_ci"POT-Creation-Date: %(time)s\\n" 1897db96d56Sopenharmony_ci"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" 1907db96d56Sopenharmony_ci"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n" 1917db96d56Sopenharmony_ci"Language-Team: LANGUAGE <LL@li.org>\\n" 1927db96d56Sopenharmony_ci"MIME-Version: 1.0\\n" 1937db96d56Sopenharmony_ci"Content-Type: text/plain; charset=%(charset)s\\n" 1947db96d56Sopenharmony_ci"Content-Transfer-Encoding: %(encoding)s\\n" 1957db96d56Sopenharmony_ci"Generated-By: pygettext.py %(version)s\\n" 1967db96d56Sopenharmony_ci 1977db96d56Sopenharmony_ci''') 1987db96d56Sopenharmony_ci 1997db96d56Sopenharmony_ci 2007db96d56Sopenharmony_cidef usage(code, msg=''): 2017db96d56Sopenharmony_ci print(__doc__ % globals(), file=sys.stderr) 2027db96d56Sopenharmony_ci if msg: 2037db96d56Sopenharmony_ci print(msg, file=sys.stderr) 2047db96d56Sopenharmony_ci sys.exit(code) 2057db96d56Sopenharmony_ci 2067db96d56Sopenharmony_ci 2077db96d56Sopenharmony_ci 2087db96d56Sopenharmony_cidef make_escapes(pass_nonascii): 2097db96d56Sopenharmony_ci global escapes, escape 2107db96d56Sopenharmony_ci if pass_nonascii: 2117db96d56Sopenharmony_ci # Allow non-ascii characters to pass through so that e.g. 'msgid 2127db96d56Sopenharmony_ci # "H�he"' would result not result in 'msgid "H\366he"'. Otherwise we 2137db96d56Sopenharmony_ci # escape any character outside the 32..126 range. 2147db96d56Sopenharmony_ci mod = 128 2157db96d56Sopenharmony_ci escape = escape_ascii 2167db96d56Sopenharmony_ci else: 2177db96d56Sopenharmony_ci mod = 256 2187db96d56Sopenharmony_ci escape = escape_nonascii 2197db96d56Sopenharmony_ci escapes = [r"\%03o" % i for i in range(mod)] 2207db96d56Sopenharmony_ci for i in range(32, 127): 2217db96d56Sopenharmony_ci escapes[i] = chr(i) 2227db96d56Sopenharmony_ci escapes[ord('\\')] = r'\\' 2237db96d56Sopenharmony_ci escapes[ord('\t')] = r'\t' 2247db96d56Sopenharmony_ci escapes[ord('\r')] = r'\r' 2257db96d56Sopenharmony_ci escapes[ord('\n')] = r'\n' 2267db96d56Sopenharmony_ci escapes[ord('\"')] = r'\"' 2277db96d56Sopenharmony_ci 2287db96d56Sopenharmony_ci 2297db96d56Sopenharmony_cidef escape_ascii(s, encoding): 2307db96d56Sopenharmony_ci return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s) 2317db96d56Sopenharmony_ci 2327db96d56Sopenharmony_cidef escape_nonascii(s, encoding): 2337db96d56Sopenharmony_ci return ''.join(escapes[b] for b in s.encode(encoding)) 2347db96d56Sopenharmony_ci 2357db96d56Sopenharmony_ci 2367db96d56Sopenharmony_cidef is_literal_string(s): 2377db96d56Sopenharmony_ci return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"') 2387db96d56Sopenharmony_ci 2397db96d56Sopenharmony_ci 2407db96d56Sopenharmony_cidef safe_eval(s): 2417db96d56Sopenharmony_ci # unwrap quotes, safely 2427db96d56Sopenharmony_ci return eval(s, {'__builtins__':{}}, {}) 2437db96d56Sopenharmony_ci 2447db96d56Sopenharmony_ci 2457db96d56Sopenharmony_cidef normalize(s, encoding): 2467db96d56Sopenharmony_ci # This converts the various Python string types into a format that is 2477db96d56Sopenharmony_ci # appropriate for .po files, namely much closer to C style. 2487db96d56Sopenharmony_ci lines = s.split('\n') 2497db96d56Sopenharmony_ci if len(lines) == 1: 2507db96d56Sopenharmony_ci s = '"' + escape(s, encoding) + '"' 2517db96d56Sopenharmony_ci else: 2527db96d56Sopenharmony_ci if not lines[-1]: 2537db96d56Sopenharmony_ci del lines[-1] 2547db96d56Sopenharmony_ci lines[-1] = lines[-1] + '\n' 2557db96d56Sopenharmony_ci for i in range(len(lines)): 2567db96d56Sopenharmony_ci lines[i] = escape(lines[i], encoding) 2577db96d56Sopenharmony_ci lineterm = '\\n"\n"' 2587db96d56Sopenharmony_ci s = '""\n"' + lineterm.join(lines) + '"' 2597db96d56Sopenharmony_ci return s 2607db96d56Sopenharmony_ci 2617db96d56Sopenharmony_ci 2627db96d56Sopenharmony_cidef containsAny(str, set): 2637db96d56Sopenharmony_ci """Check whether 'str' contains ANY of the chars in 'set'""" 2647db96d56Sopenharmony_ci return 1 in [c in str for c in set] 2657db96d56Sopenharmony_ci 2667db96d56Sopenharmony_ci 2677db96d56Sopenharmony_cidef getFilesForName(name): 2687db96d56Sopenharmony_ci """Get a list of module files for a filename, a module or package name, 2697db96d56Sopenharmony_ci or a directory. 2707db96d56Sopenharmony_ci """ 2717db96d56Sopenharmony_ci if not os.path.exists(name): 2727db96d56Sopenharmony_ci # check for glob chars 2737db96d56Sopenharmony_ci if containsAny(name, "*?[]"): 2747db96d56Sopenharmony_ci files = glob.glob(name) 2757db96d56Sopenharmony_ci list = [] 2767db96d56Sopenharmony_ci for file in files: 2777db96d56Sopenharmony_ci list.extend(getFilesForName(file)) 2787db96d56Sopenharmony_ci return list 2797db96d56Sopenharmony_ci 2807db96d56Sopenharmony_ci # try to find module or package 2817db96d56Sopenharmony_ci try: 2827db96d56Sopenharmony_ci spec = importlib.util.find_spec(name) 2837db96d56Sopenharmony_ci name = spec.origin 2847db96d56Sopenharmony_ci except ImportError: 2857db96d56Sopenharmony_ci name = None 2867db96d56Sopenharmony_ci if not name: 2877db96d56Sopenharmony_ci return [] 2887db96d56Sopenharmony_ci 2897db96d56Sopenharmony_ci if os.path.isdir(name): 2907db96d56Sopenharmony_ci # find all python files in directory 2917db96d56Sopenharmony_ci list = [] 2927db96d56Sopenharmony_ci # get extension for python source files 2937db96d56Sopenharmony_ci _py_ext = importlib.machinery.SOURCE_SUFFIXES[0] 2947db96d56Sopenharmony_ci for root, dirs, files in os.walk(name): 2957db96d56Sopenharmony_ci # don't recurse into CVS directories 2967db96d56Sopenharmony_ci if 'CVS' in dirs: 2977db96d56Sopenharmony_ci dirs.remove('CVS') 2987db96d56Sopenharmony_ci # add all *.py files to list 2997db96d56Sopenharmony_ci list.extend( 3007db96d56Sopenharmony_ci [os.path.join(root, file) for file in files 3017db96d56Sopenharmony_ci if os.path.splitext(file)[1] == _py_ext] 3027db96d56Sopenharmony_ci ) 3037db96d56Sopenharmony_ci return list 3047db96d56Sopenharmony_ci elif os.path.exists(name): 3057db96d56Sopenharmony_ci # a single file 3067db96d56Sopenharmony_ci return [name] 3077db96d56Sopenharmony_ci 3087db96d56Sopenharmony_ci return [] 3097db96d56Sopenharmony_ci 3107db96d56Sopenharmony_ci 3117db96d56Sopenharmony_ciclass TokenEater: 3127db96d56Sopenharmony_ci def __init__(self, options): 3137db96d56Sopenharmony_ci self.__options = options 3147db96d56Sopenharmony_ci self.__messages = {} 3157db96d56Sopenharmony_ci self.__state = self.__waiting 3167db96d56Sopenharmony_ci self.__data = [] 3177db96d56Sopenharmony_ci self.__lineno = -1 3187db96d56Sopenharmony_ci self.__freshmodule = 1 3197db96d56Sopenharmony_ci self.__curfile = None 3207db96d56Sopenharmony_ci self.__enclosurecount = 0 3217db96d56Sopenharmony_ci 3227db96d56Sopenharmony_ci def __call__(self, ttype, tstring, stup, etup, line): 3237db96d56Sopenharmony_ci # dispatch 3247db96d56Sopenharmony_ci## import token 3257db96d56Sopenharmony_ci## print('ttype:', token.tok_name[ttype], 'tstring:', tstring, 3267db96d56Sopenharmony_ci## file=sys.stderr) 3277db96d56Sopenharmony_ci self.__state(ttype, tstring, stup[0]) 3287db96d56Sopenharmony_ci 3297db96d56Sopenharmony_ci def __waiting(self, ttype, tstring, lineno): 3307db96d56Sopenharmony_ci opts = self.__options 3317db96d56Sopenharmony_ci # Do docstring extractions, if enabled 3327db96d56Sopenharmony_ci if opts.docstrings and not opts.nodocstrings.get(self.__curfile): 3337db96d56Sopenharmony_ci # module docstring? 3347db96d56Sopenharmony_ci if self.__freshmodule: 3357db96d56Sopenharmony_ci if ttype == tokenize.STRING and is_literal_string(tstring): 3367db96d56Sopenharmony_ci self.__addentry(safe_eval(tstring), lineno, isdocstring=1) 3377db96d56Sopenharmony_ci self.__freshmodule = 0 3387db96d56Sopenharmony_ci return 3397db96d56Sopenharmony_ci if ttype in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING): 3407db96d56Sopenharmony_ci return 3417db96d56Sopenharmony_ci self.__freshmodule = 0 3427db96d56Sopenharmony_ci # class or func/method docstring? 3437db96d56Sopenharmony_ci if ttype == tokenize.NAME and tstring in ('class', 'def'): 3447db96d56Sopenharmony_ci self.__state = self.__suiteseen 3457db96d56Sopenharmony_ci return 3467db96d56Sopenharmony_ci if ttype == tokenize.NAME and tstring in opts.keywords: 3477db96d56Sopenharmony_ci self.__state = self.__keywordseen 3487db96d56Sopenharmony_ci return 3497db96d56Sopenharmony_ci if ttype == tokenize.STRING: 3507db96d56Sopenharmony_ci maybe_fstring = ast.parse(tstring, mode='eval').body 3517db96d56Sopenharmony_ci if not isinstance(maybe_fstring, ast.JoinedStr): 3527db96d56Sopenharmony_ci return 3537db96d56Sopenharmony_ci for value in filter(lambda node: isinstance(node, ast.FormattedValue), 3547db96d56Sopenharmony_ci maybe_fstring.values): 3557db96d56Sopenharmony_ci for call in filter(lambda node: isinstance(node, ast.Call), 3567db96d56Sopenharmony_ci ast.walk(value)): 3577db96d56Sopenharmony_ci func = call.func 3587db96d56Sopenharmony_ci if isinstance(func, ast.Name): 3597db96d56Sopenharmony_ci func_name = func.id 3607db96d56Sopenharmony_ci elif isinstance(func, ast.Attribute): 3617db96d56Sopenharmony_ci func_name = func.attr 3627db96d56Sopenharmony_ci else: 3637db96d56Sopenharmony_ci continue 3647db96d56Sopenharmony_ci 3657db96d56Sopenharmony_ci if func_name not in opts.keywords: 3667db96d56Sopenharmony_ci continue 3677db96d56Sopenharmony_ci if len(call.args) != 1: 3687db96d56Sopenharmony_ci print(_( 3697db96d56Sopenharmony_ci '*** %(file)s:%(lineno)s: Seen unexpected amount of' 3707db96d56Sopenharmony_ci ' positional arguments in gettext call: %(source_segment)s' 3717db96d56Sopenharmony_ci ) % { 3727db96d56Sopenharmony_ci 'source_segment': ast.get_source_segment(tstring, call) or tstring, 3737db96d56Sopenharmony_ci 'file': self.__curfile, 3747db96d56Sopenharmony_ci 'lineno': lineno 3757db96d56Sopenharmony_ci }, file=sys.stderr) 3767db96d56Sopenharmony_ci continue 3777db96d56Sopenharmony_ci if call.keywords: 3787db96d56Sopenharmony_ci print(_( 3797db96d56Sopenharmony_ci '*** %(file)s:%(lineno)s: Seen unexpected keyword arguments' 3807db96d56Sopenharmony_ci ' in gettext call: %(source_segment)s' 3817db96d56Sopenharmony_ci ) % { 3827db96d56Sopenharmony_ci 'source_segment': ast.get_source_segment(tstring, call) or tstring, 3837db96d56Sopenharmony_ci 'file': self.__curfile, 3847db96d56Sopenharmony_ci 'lineno': lineno 3857db96d56Sopenharmony_ci }, file=sys.stderr) 3867db96d56Sopenharmony_ci continue 3877db96d56Sopenharmony_ci arg = call.args[0] 3887db96d56Sopenharmony_ci if not isinstance(arg, ast.Constant): 3897db96d56Sopenharmony_ci print(_( 3907db96d56Sopenharmony_ci '*** %(file)s:%(lineno)s: Seen unexpected argument type' 3917db96d56Sopenharmony_ci ' in gettext call: %(source_segment)s' 3927db96d56Sopenharmony_ci ) % { 3937db96d56Sopenharmony_ci 'source_segment': ast.get_source_segment(tstring, call) or tstring, 3947db96d56Sopenharmony_ci 'file': self.__curfile, 3957db96d56Sopenharmony_ci 'lineno': lineno 3967db96d56Sopenharmony_ci }, file=sys.stderr) 3977db96d56Sopenharmony_ci continue 3987db96d56Sopenharmony_ci if isinstance(arg.value, str): 3997db96d56Sopenharmony_ci self.__addentry(arg.value, lineno) 4007db96d56Sopenharmony_ci 4017db96d56Sopenharmony_ci def __suiteseen(self, ttype, tstring, lineno): 4027db96d56Sopenharmony_ci # skip over any enclosure pairs until we see the colon 4037db96d56Sopenharmony_ci if ttype == tokenize.OP: 4047db96d56Sopenharmony_ci if tstring == ':' and self.__enclosurecount == 0: 4057db96d56Sopenharmony_ci # we see a colon and we're not in an enclosure: end of def 4067db96d56Sopenharmony_ci self.__state = self.__suitedocstring 4077db96d56Sopenharmony_ci elif tstring in '([{': 4087db96d56Sopenharmony_ci self.__enclosurecount += 1 4097db96d56Sopenharmony_ci elif tstring in ')]}': 4107db96d56Sopenharmony_ci self.__enclosurecount -= 1 4117db96d56Sopenharmony_ci 4127db96d56Sopenharmony_ci def __suitedocstring(self, ttype, tstring, lineno): 4137db96d56Sopenharmony_ci # ignore any intervening noise 4147db96d56Sopenharmony_ci if ttype == tokenize.STRING and is_literal_string(tstring): 4157db96d56Sopenharmony_ci self.__addentry(safe_eval(tstring), lineno, isdocstring=1) 4167db96d56Sopenharmony_ci self.__state = self.__waiting 4177db96d56Sopenharmony_ci elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, 4187db96d56Sopenharmony_ci tokenize.COMMENT): 4197db96d56Sopenharmony_ci # there was no class docstring 4207db96d56Sopenharmony_ci self.__state = self.__waiting 4217db96d56Sopenharmony_ci 4227db96d56Sopenharmony_ci def __keywordseen(self, ttype, tstring, lineno): 4237db96d56Sopenharmony_ci if ttype == tokenize.OP and tstring == '(': 4247db96d56Sopenharmony_ci self.__data = [] 4257db96d56Sopenharmony_ci self.__lineno = lineno 4267db96d56Sopenharmony_ci self.__state = self.__openseen 4277db96d56Sopenharmony_ci else: 4287db96d56Sopenharmony_ci self.__state = self.__waiting 4297db96d56Sopenharmony_ci 4307db96d56Sopenharmony_ci def __openseen(self, ttype, tstring, lineno): 4317db96d56Sopenharmony_ci if ttype == tokenize.OP and tstring == ')': 4327db96d56Sopenharmony_ci # We've seen the last of the translatable strings. Record the 4337db96d56Sopenharmony_ci # line number of the first line of the strings and update the list 4347db96d56Sopenharmony_ci # of messages seen. Reset state for the next batch. If there 4357db96d56Sopenharmony_ci # were no strings inside _(), then just ignore this entry. 4367db96d56Sopenharmony_ci if self.__data: 4377db96d56Sopenharmony_ci self.__addentry(EMPTYSTRING.join(self.__data)) 4387db96d56Sopenharmony_ci self.__state = self.__waiting 4397db96d56Sopenharmony_ci elif ttype == tokenize.STRING and is_literal_string(tstring): 4407db96d56Sopenharmony_ci self.__data.append(safe_eval(tstring)) 4417db96d56Sopenharmony_ci elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, 4427db96d56Sopenharmony_ci token.NEWLINE, tokenize.NL]: 4437db96d56Sopenharmony_ci # warn if we see anything else than STRING or whitespace 4447db96d56Sopenharmony_ci print(_( 4457db96d56Sopenharmony_ci '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' 4467db96d56Sopenharmony_ci ) % { 4477db96d56Sopenharmony_ci 'token': tstring, 4487db96d56Sopenharmony_ci 'file': self.__curfile, 4497db96d56Sopenharmony_ci 'lineno': self.__lineno 4507db96d56Sopenharmony_ci }, file=sys.stderr) 4517db96d56Sopenharmony_ci self.__state = self.__waiting 4527db96d56Sopenharmony_ci 4537db96d56Sopenharmony_ci def __addentry(self, msg, lineno=None, isdocstring=0): 4547db96d56Sopenharmony_ci if lineno is None: 4557db96d56Sopenharmony_ci lineno = self.__lineno 4567db96d56Sopenharmony_ci if not msg in self.__options.toexclude: 4577db96d56Sopenharmony_ci entry = (self.__curfile, lineno) 4587db96d56Sopenharmony_ci self.__messages.setdefault(msg, {})[entry] = isdocstring 4597db96d56Sopenharmony_ci 4607db96d56Sopenharmony_ci def set_filename(self, filename): 4617db96d56Sopenharmony_ci self.__curfile = filename 4627db96d56Sopenharmony_ci self.__freshmodule = 1 4637db96d56Sopenharmony_ci 4647db96d56Sopenharmony_ci def write(self, fp): 4657db96d56Sopenharmony_ci options = self.__options 4667db96d56Sopenharmony_ci timestamp = time.strftime('%Y-%m-%d %H:%M%z') 4677db96d56Sopenharmony_ci encoding = fp.encoding if fp.encoding else 'UTF-8' 4687db96d56Sopenharmony_ci print(pot_header % {'time': timestamp, 'version': __version__, 4697db96d56Sopenharmony_ci 'charset': encoding, 4707db96d56Sopenharmony_ci 'encoding': '8bit'}, file=fp) 4717db96d56Sopenharmony_ci # Sort the entries. First sort each particular entry's keys, then 4727db96d56Sopenharmony_ci # sort all the entries by their first item. 4737db96d56Sopenharmony_ci reverse = {} 4747db96d56Sopenharmony_ci for k, v in self.__messages.items(): 4757db96d56Sopenharmony_ci keys = sorted(v.keys()) 4767db96d56Sopenharmony_ci reverse.setdefault(tuple(keys), []).append((k, v)) 4777db96d56Sopenharmony_ci rkeys = sorted(reverse.keys()) 4787db96d56Sopenharmony_ci for rkey in rkeys: 4797db96d56Sopenharmony_ci rentries = reverse[rkey] 4807db96d56Sopenharmony_ci rentries.sort() 4817db96d56Sopenharmony_ci for k, v in rentries: 4827db96d56Sopenharmony_ci # If the entry was gleaned out of a docstring, then add a 4837db96d56Sopenharmony_ci # comment stating so. This is to aid translators who may wish 4847db96d56Sopenharmony_ci # to skip translating some unimportant docstrings. 4857db96d56Sopenharmony_ci isdocstring = any(v.values()) 4867db96d56Sopenharmony_ci # k is the message string, v is a dictionary-set of (filename, 4877db96d56Sopenharmony_ci # lineno) tuples. We want to sort the entries in v first by 4887db96d56Sopenharmony_ci # file name and then by line number. 4897db96d56Sopenharmony_ci v = sorted(v.keys()) 4907db96d56Sopenharmony_ci if not options.writelocations: 4917db96d56Sopenharmony_ci pass 4927db96d56Sopenharmony_ci # location comments are different b/w Solaris and GNU: 4937db96d56Sopenharmony_ci elif options.locationstyle == options.SOLARIS: 4947db96d56Sopenharmony_ci for filename, lineno in v: 4957db96d56Sopenharmony_ci d = {'filename': filename, 'lineno': lineno} 4967db96d56Sopenharmony_ci print(_( 4977db96d56Sopenharmony_ci '# File: %(filename)s, line: %(lineno)d') % d, file=fp) 4987db96d56Sopenharmony_ci elif options.locationstyle == options.GNU: 4997db96d56Sopenharmony_ci # fit as many locations on one line, as long as the 5007db96d56Sopenharmony_ci # resulting line length doesn't exceed 'options.width' 5017db96d56Sopenharmony_ci locline = '#:' 5027db96d56Sopenharmony_ci for filename, lineno in v: 5037db96d56Sopenharmony_ci d = {'filename': filename, 'lineno': lineno} 5047db96d56Sopenharmony_ci s = _(' %(filename)s:%(lineno)d') % d 5057db96d56Sopenharmony_ci if len(locline) + len(s) <= options.width: 5067db96d56Sopenharmony_ci locline = locline + s 5077db96d56Sopenharmony_ci else: 5087db96d56Sopenharmony_ci print(locline, file=fp) 5097db96d56Sopenharmony_ci locline = "#:" + s 5107db96d56Sopenharmony_ci if len(locline) > 2: 5117db96d56Sopenharmony_ci print(locline, file=fp) 5127db96d56Sopenharmony_ci if isdocstring: 5137db96d56Sopenharmony_ci print('#, docstring', file=fp) 5147db96d56Sopenharmony_ci print('msgid', normalize(k, encoding), file=fp) 5157db96d56Sopenharmony_ci print('msgstr ""\n', file=fp) 5167db96d56Sopenharmony_ci 5177db96d56Sopenharmony_ci 5187db96d56Sopenharmony_ci 5197db96d56Sopenharmony_cidef main(): 5207db96d56Sopenharmony_ci global default_keywords 5217db96d56Sopenharmony_ci try: 5227db96d56Sopenharmony_ci opts, args = getopt.getopt( 5237db96d56Sopenharmony_ci sys.argv[1:], 5247db96d56Sopenharmony_ci 'ad:DEhk:Kno:p:S:Vvw:x:X:', 5257db96d56Sopenharmony_ci ['extract-all', 'default-domain=', 'escape', 'help', 5267db96d56Sopenharmony_ci 'keyword=', 'no-default-keywords', 5277db96d56Sopenharmony_ci 'add-location', 'no-location', 'output=', 'output-dir=', 5287db96d56Sopenharmony_ci 'style=', 'verbose', 'version', 'width=', 'exclude-file=', 5297db96d56Sopenharmony_ci 'docstrings', 'no-docstrings', 5307db96d56Sopenharmony_ci ]) 5317db96d56Sopenharmony_ci except getopt.error as msg: 5327db96d56Sopenharmony_ci usage(1, msg) 5337db96d56Sopenharmony_ci 5347db96d56Sopenharmony_ci # for holding option values 5357db96d56Sopenharmony_ci class Options: 5367db96d56Sopenharmony_ci # constants 5377db96d56Sopenharmony_ci GNU = 1 5387db96d56Sopenharmony_ci SOLARIS = 2 5397db96d56Sopenharmony_ci # defaults 5407db96d56Sopenharmony_ci extractall = 0 # FIXME: currently this option has no effect at all. 5417db96d56Sopenharmony_ci escape = 0 5427db96d56Sopenharmony_ci keywords = [] 5437db96d56Sopenharmony_ci outpath = '' 5447db96d56Sopenharmony_ci outfile = 'messages.pot' 5457db96d56Sopenharmony_ci writelocations = 1 5467db96d56Sopenharmony_ci locationstyle = GNU 5477db96d56Sopenharmony_ci verbose = 0 5487db96d56Sopenharmony_ci width = 78 5497db96d56Sopenharmony_ci excludefilename = '' 5507db96d56Sopenharmony_ci docstrings = 0 5517db96d56Sopenharmony_ci nodocstrings = {} 5527db96d56Sopenharmony_ci 5537db96d56Sopenharmony_ci options = Options() 5547db96d56Sopenharmony_ci locations = {'gnu' : options.GNU, 5557db96d56Sopenharmony_ci 'solaris' : options.SOLARIS, 5567db96d56Sopenharmony_ci } 5577db96d56Sopenharmony_ci 5587db96d56Sopenharmony_ci # parse options 5597db96d56Sopenharmony_ci for opt, arg in opts: 5607db96d56Sopenharmony_ci if opt in ('-h', '--help'): 5617db96d56Sopenharmony_ci usage(0) 5627db96d56Sopenharmony_ci elif opt in ('-a', '--extract-all'): 5637db96d56Sopenharmony_ci options.extractall = 1 5647db96d56Sopenharmony_ci elif opt in ('-d', '--default-domain'): 5657db96d56Sopenharmony_ci options.outfile = arg + '.pot' 5667db96d56Sopenharmony_ci elif opt in ('-E', '--escape'): 5677db96d56Sopenharmony_ci options.escape = 1 5687db96d56Sopenharmony_ci elif opt in ('-D', '--docstrings'): 5697db96d56Sopenharmony_ci options.docstrings = 1 5707db96d56Sopenharmony_ci elif opt in ('-k', '--keyword'): 5717db96d56Sopenharmony_ci options.keywords.append(arg) 5727db96d56Sopenharmony_ci elif opt in ('-K', '--no-default-keywords'): 5737db96d56Sopenharmony_ci default_keywords = [] 5747db96d56Sopenharmony_ci elif opt in ('-n', '--add-location'): 5757db96d56Sopenharmony_ci options.writelocations = 1 5767db96d56Sopenharmony_ci elif opt in ('--no-location',): 5777db96d56Sopenharmony_ci options.writelocations = 0 5787db96d56Sopenharmony_ci elif opt in ('-S', '--style'): 5797db96d56Sopenharmony_ci options.locationstyle = locations.get(arg.lower()) 5807db96d56Sopenharmony_ci if options.locationstyle is None: 5817db96d56Sopenharmony_ci usage(1, _('Invalid value for --style: %s') % arg) 5827db96d56Sopenharmony_ci elif opt in ('-o', '--output'): 5837db96d56Sopenharmony_ci options.outfile = arg 5847db96d56Sopenharmony_ci elif opt in ('-p', '--output-dir'): 5857db96d56Sopenharmony_ci options.outpath = arg 5867db96d56Sopenharmony_ci elif opt in ('-v', '--verbose'): 5877db96d56Sopenharmony_ci options.verbose = 1 5887db96d56Sopenharmony_ci elif opt in ('-V', '--version'): 5897db96d56Sopenharmony_ci print(_('pygettext.py (xgettext for Python) %s') % __version__) 5907db96d56Sopenharmony_ci sys.exit(0) 5917db96d56Sopenharmony_ci elif opt in ('-w', '--width'): 5927db96d56Sopenharmony_ci try: 5937db96d56Sopenharmony_ci options.width = int(arg) 5947db96d56Sopenharmony_ci except ValueError: 5957db96d56Sopenharmony_ci usage(1, _('--width argument must be an integer: %s') % arg) 5967db96d56Sopenharmony_ci elif opt in ('-x', '--exclude-file'): 5977db96d56Sopenharmony_ci options.excludefilename = arg 5987db96d56Sopenharmony_ci elif opt in ('-X', '--no-docstrings'): 5997db96d56Sopenharmony_ci fp = open(arg) 6007db96d56Sopenharmony_ci try: 6017db96d56Sopenharmony_ci while 1: 6027db96d56Sopenharmony_ci line = fp.readline() 6037db96d56Sopenharmony_ci if not line: 6047db96d56Sopenharmony_ci break 6057db96d56Sopenharmony_ci options.nodocstrings[line[:-1]] = 1 6067db96d56Sopenharmony_ci finally: 6077db96d56Sopenharmony_ci fp.close() 6087db96d56Sopenharmony_ci 6097db96d56Sopenharmony_ci # calculate escapes 6107db96d56Sopenharmony_ci make_escapes(not options.escape) 6117db96d56Sopenharmony_ci 6127db96d56Sopenharmony_ci # calculate all keywords 6137db96d56Sopenharmony_ci options.keywords.extend(default_keywords) 6147db96d56Sopenharmony_ci 6157db96d56Sopenharmony_ci # initialize list of strings to exclude 6167db96d56Sopenharmony_ci if options.excludefilename: 6177db96d56Sopenharmony_ci try: 6187db96d56Sopenharmony_ci with open(options.excludefilename) as fp: 6197db96d56Sopenharmony_ci options.toexclude = fp.readlines() 6207db96d56Sopenharmony_ci except IOError: 6217db96d56Sopenharmony_ci print(_( 6227db96d56Sopenharmony_ci "Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr) 6237db96d56Sopenharmony_ci sys.exit(1) 6247db96d56Sopenharmony_ci else: 6257db96d56Sopenharmony_ci options.toexclude = [] 6267db96d56Sopenharmony_ci 6277db96d56Sopenharmony_ci # resolve args to module lists 6287db96d56Sopenharmony_ci expanded = [] 6297db96d56Sopenharmony_ci for arg in args: 6307db96d56Sopenharmony_ci if arg == '-': 6317db96d56Sopenharmony_ci expanded.append(arg) 6327db96d56Sopenharmony_ci else: 6337db96d56Sopenharmony_ci expanded.extend(getFilesForName(arg)) 6347db96d56Sopenharmony_ci args = expanded 6357db96d56Sopenharmony_ci 6367db96d56Sopenharmony_ci # slurp through all the files 6377db96d56Sopenharmony_ci eater = TokenEater(options) 6387db96d56Sopenharmony_ci for filename in args: 6397db96d56Sopenharmony_ci if filename == '-': 6407db96d56Sopenharmony_ci if options.verbose: 6417db96d56Sopenharmony_ci print(_('Reading standard input')) 6427db96d56Sopenharmony_ci fp = sys.stdin.buffer 6437db96d56Sopenharmony_ci closep = 0 6447db96d56Sopenharmony_ci else: 6457db96d56Sopenharmony_ci if options.verbose: 6467db96d56Sopenharmony_ci print(_('Working on %s') % filename) 6477db96d56Sopenharmony_ci fp = open(filename, 'rb') 6487db96d56Sopenharmony_ci closep = 1 6497db96d56Sopenharmony_ci try: 6507db96d56Sopenharmony_ci eater.set_filename(filename) 6517db96d56Sopenharmony_ci try: 6527db96d56Sopenharmony_ci tokens = tokenize.tokenize(fp.readline) 6537db96d56Sopenharmony_ci for _token in tokens: 6547db96d56Sopenharmony_ci eater(*_token) 6557db96d56Sopenharmony_ci except tokenize.TokenError as e: 6567db96d56Sopenharmony_ci print('%s: %s, line %d, column %d' % ( 6577db96d56Sopenharmony_ci e.args[0], filename, e.args[1][0], e.args[1][1]), 6587db96d56Sopenharmony_ci file=sys.stderr) 6597db96d56Sopenharmony_ci finally: 6607db96d56Sopenharmony_ci if closep: 6617db96d56Sopenharmony_ci fp.close() 6627db96d56Sopenharmony_ci 6637db96d56Sopenharmony_ci # write the output 6647db96d56Sopenharmony_ci if options.outfile == '-': 6657db96d56Sopenharmony_ci fp = sys.stdout 6667db96d56Sopenharmony_ci closep = 0 6677db96d56Sopenharmony_ci else: 6687db96d56Sopenharmony_ci if options.outpath: 6697db96d56Sopenharmony_ci options.outfile = os.path.join(options.outpath, options.outfile) 6707db96d56Sopenharmony_ci fp = open(options.outfile, 'w') 6717db96d56Sopenharmony_ci closep = 1 6727db96d56Sopenharmony_ci try: 6737db96d56Sopenharmony_ci eater.write(fp) 6747db96d56Sopenharmony_ci finally: 6757db96d56Sopenharmony_ci if closep: 6767db96d56Sopenharmony_ci fp.close() 6777db96d56Sopenharmony_ci 6787db96d56Sopenharmony_ci 6797db96d56Sopenharmony_ciif __name__ == '__main__': 6807db96d56Sopenharmony_ci main() 6817db96d56Sopenharmony_ci # some more test strings 6827db96d56Sopenharmony_ci # this one creates a warning 6837db96d56Sopenharmony_ci _('*** Seen unexpected token "%(token)s"') % {'token': 'test'} 6847db96d56Sopenharmony_ci _('more' 'than' 'one' 'string') 685