1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6*   Copyright (C) 1999-2012, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9*******************************************************************************
10*   file name:  genprops.cpp
11*   encoding:   US-ASCII
12*   tab size:   8 (not used)
13*   indentation:4
14*
15*   created on: 1999dec08
16*   created by: Markus W. Scherer
17*
18*   This program parses the ppucd.txt preparsed Unicode Character Database file
19*   and writes several source and binary files into the ICU source tree.
20*/
21
22#include <stdio.h>
23#include <stdlib.h>
24#include "unicode/utypes.h"
25#include "unicode/localpointer.h"
26#include "unicode/uniset.h"
27#include "unicode/unistr.h"
28#include "charstr.h"
29#include "genprops.h"
30#include "ppucd.h"
31#include "toolutil.h"
32#include "uoptions.h"
33
34U_NAMESPACE_USE
35
36UBool beVerbose=false;
37UBool beQuiet=false;
38
39PropsBuilder::PropsBuilder() {}
40PropsBuilder::~PropsBuilder() {}
41void PropsBuilder::setUnicodeVersion(const UVersionInfo) {}
42void PropsBuilder::setAlgNamesRange(UChar32, UChar32,
43                                    const char *, const char *, UErrorCode &) {}
44void PropsBuilder::setProps(const UniProps &, const UnicodeSet &, UErrorCode &) {}
45void PropsBuilder::parseUnidataFiles(const char *, UErrorCode &) {}
46void PropsBuilder::build(UErrorCode &) {}
47void PropsBuilder::writeCSourceFile(const char *, UErrorCode &) {}
48void PropsBuilder::writeJavaSourceFile(const char *, UErrorCode &) {}
49void PropsBuilder::writeBinaryData(const char *, UBool, UErrorCode &) {}
50
51enum {
52    HELP_H,
53    HELP_QUESTION_MARK,
54    VERBOSE,
55    QUIET,
56    COPYRIGHT
57};
58
59/* Keep these values in sync with the above enums */
60static UOption options[]={
61    UOPTION_HELP_H,
62    UOPTION_HELP_QUESTION_MARK,
63    UOPTION_VERBOSE,
64    UOPTION_QUIET,
65    UOPTION_COPYRIGHT
66};
67
68extern int
69main(int argc, char* argv[]) {
70    U_MAIN_INIT_ARGS(argc, argv);
71    argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
72
73    /* error handling, printing usage message */
74    if(argc<0) {
75        fprintf(stderr,
76            "error in command line argument \"%s\"\n",
77            argv[-argc]);
78    }
79    if(argc<2 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
80        /*
81         * Broken into chunks because the C89 standard says the minimum
82         * required supported string length is 509 bytes.
83         */
84        fprintf(stderr,
85            "Usage: %s [-options] path/to/ICU/src/root\n"
86            "\n"
87            "Reads the preparsed UCD file path/to/ICU/src/root/source/data/unidata/ppucd.txt and\n"
88            "writes source and binary data files with the character properties.\n"
89            "(UCD=Unicode Character Database)\n"
90            "\n",
91            argv[0]);
92        fprintf(stderr,
93            "Options:\n"
94            "\t-h or -? or --help  this usage text\n"
95            "\t-v or --verbose     verbose output\n"
96            "\t-q or --quiet       no output\n"
97            "\t-c or --copyright   include a copyright notice\n");
98        return argc<2 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
99    }
100
101    /* get the options values */
102    beVerbose=options[VERBOSE].doesOccur;
103    beQuiet=options[QUIET].doesOccur;
104
105    /* initialize */
106    IcuToolErrorCode errorCode("genprops");
107    LocalPointer<PNamesBuilder> pnamesBuilder(createPNamesBuilder(errorCode));
108    LocalPointer<PropsBuilder> corePropsBuilder(createCorePropsBuilder(errorCode));
109    LocalPointer<PropsBuilder> bidiPropsBuilder(createBiDiPropsBuilder(errorCode));
110    LocalPointer<PropsBuilder> casePropsBuilder(createCasePropsBuilder(errorCode));
111    LocalPointer<PropsBuilder> layoutPropsBuilder(createLayoutPropsBuilder(errorCode));
112    LocalPointer<PropsBuilder> emojiPropsBuilder(createEmojiPropsBuilder(errorCode));
113    LocalPointer<PropsBuilder> namesPropsBuilder(createNamesPropsBuilder(errorCode));
114    if(errorCode.isFailure()) {
115        fprintf(stderr, "genprops: unable to create PropsBuilders - %s\n", errorCode.errorName());
116        return errorCode.reset();
117    }
118
119    CharString icuSrcRoot(argv[1], errorCode);
120
121    CharString icuSource(icuSrcRoot, errorCode);
122    icuSource.appendPathPart("source", errorCode);
123
124    CharString icuSourceData(icuSource, errorCode);
125    icuSourceData.appendPathPart("data", errorCode);
126
127    CharString unidataPath(icuSourceData, errorCode);
128    unidataPath.appendPathPart("unidata", errorCode);
129
130    CharString ppucdPath(unidataPath, errorCode);
131    ppucdPath.appendPathPart("ppucd.txt", errorCode);
132
133    PreparsedUCD ppucd(ppucdPath.data(), errorCode);
134    if(errorCode.isFailure()) {
135        fprintf(stderr, "genprops: unable to open %s - %s\n",
136                ppucdPath.data(), errorCode.errorName());
137        return errorCode.reset();
138    }
139
140    // The PNamesBuilder uses preparsed pnames_data.h.
141    pnamesBuilder->build(errorCode);
142    if(U_FAILURE(errorCode)) {
143        fprintf(stderr, "genprops: PNamesBuilder::build() failed - %s\n",
144                errorCode.errorName());
145        return errorCode.reset();
146    }
147    ppucd.setPropertyNames(pnamesBuilder->getPropertyNames());
148
149    PreparsedUCD::LineType lineType;
150    UnicodeSet newValues;
151    while((lineType=ppucd.readLine(errorCode))!=PreparsedUCD::NO_LINE) {
152        if(ppucd.lineHasPropertyValues()) {
153            const UniProps *props=ppucd.getProps(newValues, errorCode);
154            corePropsBuilder->setProps(*props, newValues, errorCode);
155            bidiPropsBuilder->setProps(*props, newValues, errorCode);
156            casePropsBuilder->setProps(*props, newValues, errorCode);
157            layoutPropsBuilder->setProps(*props, newValues, errorCode);
158            emojiPropsBuilder->setProps(*props, newValues, errorCode);
159            namesPropsBuilder->setProps(*props, newValues, errorCode);
160        } else if(lineType==PreparsedUCD::UNICODE_VERSION_LINE) {
161            const UVersionInfo &version=ppucd.getUnicodeVersion();
162            corePropsBuilder->setUnicodeVersion(version);
163            bidiPropsBuilder->setUnicodeVersion(version);
164            casePropsBuilder->setUnicodeVersion(version);
165            layoutPropsBuilder->setUnicodeVersion(version);
166            emojiPropsBuilder->setUnicodeVersion(version);
167            namesPropsBuilder->setUnicodeVersion(version);
168        } else if(lineType==PreparsedUCD::ALG_NAMES_RANGE_LINE) {
169            UChar32 start, end;
170            if(ppucd.getRangeForAlgNames(start, end, errorCode)) {
171                const char *type=ppucd.nextField();
172                const char *prefix=ppucd.nextField();  // NULL if type==hangul
173                namesPropsBuilder->setAlgNamesRange(start, end, type, prefix, errorCode);
174            }
175        }
176        if(errorCode.isFailure()) {
177            fprintf(stderr,
178                    "genprops: error parsing or setting values from ppucd.txt line %ld - %s\n",
179                    (long)ppucd.getLineNumber(), errorCode.errorName());
180            return errorCode.reset();
181        }
182    }
183
184    emojiPropsBuilder->parseUnidataFiles(unidataPath.data(), errorCode);
185
186    if (!beQuiet) { puts(""); }
187    corePropsBuilder->build(errorCode);
188    if (!beQuiet) { puts(""); }
189    bidiPropsBuilder->build(errorCode);
190    if (!beQuiet) { puts(""); }
191    casePropsBuilder->build(errorCode);
192    if (!beQuiet) { puts(""); }
193    layoutPropsBuilder->build(errorCode);
194    if (!beQuiet) { puts(""); }
195    emojiPropsBuilder->build(errorCode);
196    if (!beQuiet) { puts(""); }
197    namesPropsBuilder->build(errorCode);
198    if(errorCode.isFailure()) {
199        fprintf(stderr, "genprops error: failure finalizing the data - %s\n",
200                errorCode.errorName());
201        return errorCode.reset();
202    }
203
204    // Write the files with the generated data.
205    CharString sourceCommon(icuSource, errorCode);
206    sourceCommon.appendPathPart("common", errorCode);
207
208    CharString sourceDataIn(icuSourceData, errorCode);
209    sourceDataIn.appendPathPart("in", errorCode);
210
211    UBool withCopyright=options[COPYRIGHT].doesOccur;
212
213    pnamesBuilder->writeCSourceFile(sourceCommon.data(), errorCode);
214    pnamesBuilder->writeBinaryData(sourceDataIn.data(), withCopyright, errorCode);
215    corePropsBuilder->writeCSourceFile(sourceCommon.data(), errorCode);
216    corePropsBuilder->writeBinaryData(sourceDataIn.data(), withCopyright, errorCode);
217    bidiPropsBuilder->writeCSourceFile(sourceCommon.data(), errorCode);
218    bidiPropsBuilder->writeBinaryData(sourceDataIn.data(), withCopyright, errorCode);
219    casePropsBuilder->writeCSourceFile(sourceCommon.data(), errorCode);
220    casePropsBuilder->writeBinaryData(sourceDataIn.data(), withCopyright, errorCode);
221    namesPropsBuilder->writeBinaryData(sourceDataIn.data(), withCopyright, errorCode);
222    layoutPropsBuilder->writeBinaryData(sourceDataIn.data(), withCopyright, errorCode);
223    emojiPropsBuilder->writeBinaryData(sourceDataIn.data(), withCopyright, errorCode);
224
225    return errorCode;
226}
227
228/*
229 * Hey, Emacs, please set the following:
230 *
231 * Local Variables:
232 * indent-tabs-mode: nil
233 * End:
234 *
235 */
236