1// © 2018 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4// layoutpropsbuilder.cpp
5// created: 2018aug30 Markus W. Scherer
6
7#include <stdio.h>
8#include <string.h>
9#include "unicode/utypes.h"
10#include "unicode/uchar.h"
11#include "unicode/ucptrie.h"
12#include "unicode/udata.h"
13#include "unicode/umutablecptrie.h"
14#include "unicode/uniset.h"
15#include "cmemory.h"
16#include "genprops.h"
17#include "ppucd.h"
18#include "uassert.h"
19#include "ulayout_props.h"
20#include "unewdata.h"
21
22/* Unicode layout properties file format ---------------------------------------
23
24The file format prepared and written here contains several data
25structures that store indexes or data.
26
27Before the data contents described below, there are the headers required by
28the udata API for loading ICU data. Especially, a UDataInfo structure
29precedes the actual data. It contains platform properties values and the
30file format version.
31
32The following is a description of format version 1.0 .
33
34The file contains the following structures:
35
36    const int32_t indexes[i0] with values i0, i1, ...:
37    (see ULAYOUT_IX_... constants for names of indexes)
38
39    i0 indexesLength; -- length of indexes[] (ULAYOUT_IX_COUNT)
40    i1 inpcTop; -- limit byte offset of the InPC trie
41    i2 inscTop; -- limit byte offset of the InSC trie
42    i3 voTop; -- limit byte offset of the vo trie
43    i4..i7 -- reserved, same as the last limit byte offset
44    i8 -- reserved, 0
45
46    i9 maxValues; -- max values of the InPC, InSC, vo properties
47        (8 bits each; lowest 8 bits reserved, 0)
48    i10..i11 -- reserved, 0
49
50    After the indexes array follow consecutive, serialized,
51    single-property code point tries for the following properties,
52    each built "small" or "fast",
53    each padded to a multiple of 16 bytes:
54    - InPC
55    - InSC
56    - vo
57
58----------------------------------------------------------------------------- */
59
60U_NAMESPACE_USE
61
62// UDataInfo cf. udata.h
63static UDataInfo dataInfo = {
64    sizeof(UDataInfo),
65    0,
66
67    U_IS_BIG_ENDIAN,
68    U_CHARSET_FAMILY,
69    U_SIZEOF_UCHAR,
70    0,
71
72    // dataFormat="Layo"
73    { ULAYOUT_FMT_0, ULAYOUT_FMT_1, ULAYOUT_FMT_2, ULAYOUT_FMT_3 },
74    { 1, 0, 0, 0 },  // formatVersion
75    { 12, 0, 0, 0 }  // dataVersion
76};
77
78class LayoutPropsBuilder : public PropsBuilder {
79public:
80    LayoutPropsBuilder(UErrorCode &errorCode);
81    virtual ~LayoutPropsBuilder() U_OVERRIDE;
82
83    virtual void setUnicodeVersion(const UVersionInfo version) U_OVERRIDE;
84    virtual void setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) U_OVERRIDE;
85    virtual void build(UErrorCode &errorCode) U_OVERRIDE;
86    virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) U_OVERRIDE;
87
88private:
89    void setIntProp(const UniProps &, const UnicodeSet &newValues,
90                    UProperty prop, UMutableCPTrie *trie,
91                    UErrorCode &errorCode);
92    int32_t getMaxIntValue(UProperty prop) const {
93        return maxIntValues[prop - UCHAR_INT_START];
94    }
95    void checkMaxIntValue(UProperty prop, int32_t maxMax, UErrorCode &errorCode) const;
96
97    int32_t maxIntValues[UCHAR_INT_LIMIT - UCHAR_INT_START];
98    UMutableCPTrie *inpcMutableTrie;
99    UMutableCPTrie *inscMutableTrie;
100    UMutableCPTrie *voMutableTrie;
101
102    UCPTrie *inpcTrie;
103    UCPTrie *inscTrie;
104    UCPTrie *voTrie;
105};
106
107LayoutPropsBuilder::LayoutPropsBuilder(UErrorCode &errorCode) :
108        inpcTrie(nullptr), inscTrie(nullptr), voTrie(nullptr) {
109    memset(maxIntValues, 0, sizeof(maxIntValues));
110    inpcMutableTrie = umutablecptrie_open(0, 0, &errorCode);
111    inscMutableTrie = umutablecptrie_open(0, 0, &errorCode);
112    voMutableTrie = umutablecptrie_open(0, 0, &errorCode);
113    if (U_FAILURE(errorCode)) {
114        fprintf(stderr, "genprops error: layoutpropsbuilder umutablecptrie_open() failed - %s\n",
115                u_errorName(errorCode));
116    }
117}
118
119LayoutPropsBuilder::~LayoutPropsBuilder() {
120    umutablecptrie_close(inpcMutableTrie);
121    umutablecptrie_close(inscMutableTrie);
122    umutablecptrie_close(voMutableTrie);
123    ucptrie_close(inpcTrie);
124    ucptrie_close(inscTrie);
125    ucptrie_close(voTrie);
126}
127
128void
129LayoutPropsBuilder::setUnicodeVersion(const UVersionInfo version) {
130    uprv_memcpy(dataInfo.dataVersion, version, 4);
131}
132
133void
134LayoutPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
135                             UErrorCode &errorCode) {
136    setIntProp(props, newValues, UCHAR_INDIC_POSITIONAL_CATEGORY, inpcMutableTrie, errorCode);
137    setIntProp(props, newValues, UCHAR_INDIC_SYLLABIC_CATEGORY, inscMutableTrie, errorCode);
138    setIntProp(props, newValues, UCHAR_VERTICAL_ORIENTATION, voMutableTrie, errorCode);
139}
140
141void LayoutPropsBuilder::setIntProp(const UniProps &props, const UnicodeSet &newValues,
142                                    UProperty prop, UMutableCPTrie *trie,
143                                    UErrorCode &errorCode) {
144    if (U_SUCCESS(errorCode) && newValues.contains(prop)) {
145        UChar32 start=props.start;
146        UChar32 end=props.end;
147        int32_t value = props.getIntProp(prop);
148        if (value < 0) {
149            fprintf(stderr, "error: unencodable negative value for property 0x%x %04lX..%04lX=%ld\n",
150                    (int)prop, (long)start, (long)end, (long)value);
151            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
152            return;
153        }
154        if (value > maxIntValues[prop - UCHAR_INT_START]) {
155            maxIntValues[prop - UCHAR_INT_START] = value;
156        }
157        if (start == end) {
158            umutablecptrie_set(trie, start, value, &errorCode);
159        } else {
160            umutablecptrie_setRange(trie, start, end, value, &errorCode);
161        }
162        if (U_FAILURE(errorCode)) {
163            fprintf(stderr, "error: umutablecptrie_set(prop 0x%x trie %04lX..%04lX) failed - %s\n",
164                    (int)prop, (long)start, (long)end, u_errorName(errorCode));
165        }
166    }
167}
168
169namespace {
170
171UCPTrie *buildUCPTrie(const char *name, UMutableCPTrie *mutableTrie,
172                      UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) {
173    UCPTrie *trie = umutablecptrie_buildImmutable(mutableTrie, type, valueWidth, &errorCode);
174    if(U_FAILURE(errorCode)) {
175        fprintf(stderr, "genprops error: %s trie buildImmutable() failed: %s\n",
176                name, u_errorName(errorCode));
177        return trie;
178    }
179    if (!beQuiet) {
180        UErrorCode overflow = U_ZERO_ERROR;
181        int32_t length = ucptrie_toBinary(trie, nullptr, 0, &overflow);
182        printf("%11s trie size in bytes:        %5u\n", name, (int)length);
183    }
184    return trie;
185}
186
187constexpr int32_t TRIE_BLOCK_CAPACITY = 100000;
188
189uint8_t inpcBytes[TRIE_BLOCK_CAPACITY];
190uint8_t inscBytes[TRIE_BLOCK_CAPACITY];
191uint8_t voBytes[TRIE_BLOCK_CAPACITY];
192
193int32_t inpcLength = 0;
194int32_t inscLength = 0;
195int32_t voLength = 0;
196
197int32_t writeTrieBytes(const UCPTrie *trie, uint8_t block[], UErrorCode &errorCode) {
198    int32_t length = ucptrie_toBinary(trie, block, TRIE_BLOCK_CAPACITY, &errorCode);
199    while ((length & 0xf) != 0) {
200        block[length++] = 0xaa;
201    }
202    return length;
203}
204
205}  // namespace
206
207void
208LayoutPropsBuilder::build(UErrorCode &errorCode) {
209    if (U_FAILURE(errorCode)) { return; }
210    if (!beQuiet) {
211        puts("* text layout properties stats *");
212    }
213
214    checkMaxIntValue(UCHAR_INDIC_POSITIONAL_CATEGORY, 0xff, errorCode);
215    checkMaxIntValue(UCHAR_INDIC_SYLLABIC_CATEGORY, 0xff, errorCode);
216    checkMaxIntValue(UCHAR_VERTICAL_ORIENTATION, 0xff, errorCode);
217    inpcTrie = buildUCPTrie("inpc", inpcMutableTrie,
218                            UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode);
219    inscTrie = buildUCPTrie("insc", inscMutableTrie,
220                            UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode);
221    voTrie = buildUCPTrie("vo", voMutableTrie,
222                          UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode);
223
224    inpcLength = writeTrieBytes(inpcTrie, inpcBytes, errorCode);
225    inscLength = writeTrieBytes(inscTrie, inscBytes, errorCode);
226    voLength = writeTrieBytes(voTrie, voBytes, errorCode);
227
228    if (!beQuiet) {
229        int32_t size = ULAYOUT_IX_COUNT * 4 + inpcLength + inscLength + voLength;
230        printf("data size:                             %5d\n", (int)size);
231    }
232}
233
234void LayoutPropsBuilder::checkMaxIntValue(UProperty prop, int32_t maxMax,
235                                          UErrorCode &errorCode) const {
236    int32_t max = getMaxIntValue(prop);
237    if (max > maxMax) {
238        fprintf(stderr, "genprops error: 0x%x max value = %d overflow\n", (int)prop, (int)max);
239        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
240    }
241}
242
243// In ICU 63, we had functions writeCSourceFile() and writeJavaSourceFile().
244// For Java, each serialized trie was written as a String constant with
245// one byte per char and an optimization for byte 0,
246// to optimize for Java .class file size.
247// (See ICU 63 if we need to resurrect some of that code.)
248// Since ICU 64, we write a binary ulayout.icu file for use in both C++ & Java.
249
250void
251LayoutPropsBuilder::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
252    if (U_FAILURE(errorCode)) { return; }
253
254    UNewDataMemory *pData = udata_create(
255        path, ULAYOUT_DATA_TYPE, ULAYOUT_DATA_NAME, &dataInfo,
256        withCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
257    if (U_FAILURE(errorCode)) {
258        fprintf(stderr, "genprops: udata_create(%s, ulayout.icu) failed - %s\n",
259                path, u_errorName(errorCode));
260        return;
261    }
262
263    int32_t indexes[ULAYOUT_IX_COUNT] = { ULAYOUT_IX_COUNT };
264    int32_t top = ULAYOUT_IX_COUNT * 4;
265
266    indexes[ULAYOUT_IX_INPC_TRIE_TOP] = (top += inpcLength);
267    indexes[ULAYOUT_IX_INSC_TRIE_TOP] = (top += inscLength);
268    indexes[ULAYOUT_IX_VO_TRIE_TOP] = (top += voLength);
269
270    // Set reserved trie-top values to the top of the last trie
271    // so that they look empty until a later file format version
272    // uses one or more of these slots.
273    for (int32_t i = ULAYOUT_IX_RESERVED_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) {
274        indexes[i] = top;
275    }
276
277    indexes[ULAYOUT_IX_MAX_VALUES] =
278        ((getMaxIntValue(UCHAR_INDIC_POSITIONAL_CATEGORY)) << ULAYOUT_MAX_INPC_SHIFT) |
279        ((getMaxIntValue(UCHAR_INDIC_SYLLABIC_CATEGORY)) << ULAYOUT_MAX_INSC_SHIFT) |
280        ((getMaxIntValue(UCHAR_VERTICAL_ORIENTATION)) << ULAYOUT_MAX_VO_SHIFT);
281
282    udata_writeBlock(pData, indexes, sizeof(indexes));
283    udata_writeBlock(pData, inpcBytes, inpcLength);
284    udata_writeBlock(pData, inscBytes, inscLength);
285    udata_writeBlock(pData, voBytes, voLength);
286
287    long dataLength = udata_finish(pData, &errorCode);
288    if (U_FAILURE(errorCode)) {
289        fprintf(stderr, "genprops: error %s writing the output file\n", u_errorName(errorCode));
290        return;
291    }
292
293    if (dataLength != (long)top) {
294        fprintf(stderr,
295                "udata_finish(ulayout.icu) reports %ld bytes written but should be %ld\n",
296                dataLength, (long)top);
297        errorCode = U_INTERNAL_PROGRAM_ERROR;
298    }
299}
300
301PropsBuilder *
302createLayoutPropsBuilder(UErrorCode &errorCode) {
303    if(U_FAILURE(errorCode)) { return nullptr; }
304    PropsBuilder *pb=new LayoutPropsBuilder(errorCode);
305    if(pb==nullptr) {
306        errorCode=U_MEMORY_ALLOCATION_ERROR;
307    }
308    return pb;
309}
310