1// © 2018 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3 4// layoutpropsbuilder.cpp 5// created: 2018aug30 Markus W. Scherer 6 7#include <stdio.h> 8#include <string.h> 9#include "unicode/utypes.h" 10#include "unicode/uchar.h" 11#include "unicode/ucptrie.h" 12#include "unicode/udata.h" 13#include "unicode/umutablecptrie.h" 14#include "unicode/uniset.h" 15#include "cmemory.h" 16#include "genprops.h" 17#include "ppucd.h" 18#include "uassert.h" 19#include "ulayout_props.h" 20#include "unewdata.h" 21 22/* Unicode layout properties file format --------------------------------------- 23 24The file format prepared and written here contains several data 25structures that store indexes or data. 26 27Before the data contents described below, there are the headers required by 28the udata API for loading ICU data. Especially, a UDataInfo structure 29precedes the actual data. It contains platform properties values and the 30file format version. 31 32The following is a description of format version 1.0 . 33 34The file contains the following structures: 35 36 const int32_t indexes[i0] with values i0, i1, ...: 37 (see ULAYOUT_IX_... constants for names of indexes) 38 39 i0 indexesLength; -- length of indexes[] (ULAYOUT_IX_COUNT) 40 i1 inpcTop; -- limit byte offset of the InPC trie 41 i2 inscTop; -- limit byte offset of the InSC trie 42 i3 voTop; -- limit byte offset of the vo trie 43 i4..i7 -- reserved, same as the last limit byte offset 44 i8 -- reserved, 0 45 46 i9 maxValues; -- max values of the InPC, InSC, vo properties 47 (8 bits each; lowest 8 bits reserved, 0) 48 i10..i11 -- reserved, 0 49 50 After the indexes array follow consecutive, serialized, 51 single-property code point tries for the following properties, 52 each built "small" or "fast", 53 each padded to a multiple of 16 bytes: 54 - InPC 55 - InSC 56 - vo 57 58----------------------------------------------------------------------------- */ 59 60U_NAMESPACE_USE 61 62// UDataInfo cf. udata.h 63static UDataInfo dataInfo = { 64 sizeof(UDataInfo), 65 0, 66 67 U_IS_BIG_ENDIAN, 68 U_CHARSET_FAMILY, 69 U_SIZEOF_UCHAR, 70 0, 71 72 // dataFormat="Layo" 73 { ULAYOUT_FMT_0, ULAYOUT_FMT_1, ULAYOUT_FMT_2, ULAYOUT_FMT_3 }, 74 { 1, 0, 0, 0 }, // formatVersion 75 { 12, 0, 0, 0 } // dataVersion 76}; 77 78class LayoutPropsBuilder : public PropsBuilder { 79public: 80 LayoutPropsBuilder(UErrorCode &errorCode); 81 virtual ~LayoutPropsBuilder() U_OVERRIDE; 82 83 virtual void setUnicodeVersion(const UVersionInfo version) U_OVERRIDE; 84 virtual void setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) U_OVERRIDE; 85 virtual void build(UErrorCode &errorCode) U_OVERRIDE; 86 virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) U_OVERRIDE; 87 88private: 89 void setIntProp(const UniProps &, const UnicodeSet &newValues, 90 UProperty prop, UMutableCPTrie *trie, 91 UErrorCode &errorCode); 92 int32_t getMaxIntValue(UProperty prop) const { 93 return maxIntValues[prop - UCHAR_INT_START]; 94 } 95 void checkMaxIntValue(UProperty prop, int32_t maxMax, UErrorCode &errorCode) const; 96 97 int32_t maxIntValues[UCHAR_INT_LIMIT - UCHAR_INT_START]; 98 UMutableCPTrie *inpcMutableTrie; 99 UMutableCPTrie *inscMutableTrie; 100 UMutableCPTrie *voMutableTrie; 101 102 UCPTrie *inpcTrie; 103 UCPTrie *inscTrie; 104 UCPTrie *voTrie; 105}; 106 107LayoutPropsBuilder::LayoutPropsBuilder(UErrorCode &errorCode) : 108 inpcTrie(nullptr), inscTrie(nullptr), voTrie(nullptr) { 109 memset(maxIntValues, 0, sizeof(maxIntValues)); 110 inpcMutableTrie = umutablecptrie_open(0, 0, &errorCode); 111 inscMutableTrie = umutablecptrie_open(0, 0, &errorCode); 112 voMutableTrie = umutablecptrie_open(0, 0, &errorCode); 113 if (U_FAILURE(errorCode)) { 114 fprintf(stderr, "genprops error: layoutpropsbuilder umutablecptrie_open() failed - %s\n", 115 u_errorName(errorCode)); 116 } 117} 118 119LayoutPropsBuilder::~LayoutPropsBuilder() { 120 umutablecptrie_close(inpcMutableTrie); 121 umutablecptrie_close(inscMutableTrie); 122 umutablecptrie_close(voMutableTrie); 123 ucptrie_close(inpcTrie); 124 ucptrie_close(inscTrie); 125 ucptrie_close(voTrie); 126} 127 128void 129LayoutPropsBuilder::setUnicodeVersion(const UVersionInfo version) { 130 uprv_memcpy(dataInfo.dataVersion, version, 4); 131} 132 133void 134LayoutPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues, 135 UErrorCode &errorCode) { 136 setIntProp(props, newValues, UCHAR_INDIC_POSITIONAL_CATEGORY, inpcMutableTrie, errorCode); 137 setIntProp(props, newValues, UCHAR_INDIC_SYLLABIC_CATEGORY, inscMutableTrie, errorCode); 138 setIntProp(props, newValues, UCHAR_VERTICAL_ORIENTATION, voMutableTrie, errorCode); 139} 140 141void LayoutPropsBuilder::setIntProp(const UniProps &props, const UnicodeSet &newValues, 142 UProperty prop, UMutableCPTrie *trie, 143 UErrorCode &errorCode) { 144 if (U_SUCCESS(errorCode) && newValues.contains(prop)) { 145 UChar32 start=props.start; 146 UChar32 end=props.end; 147 int32_t value = props.getIntProp(prop); 148 if (value < 0) { 149 fprintf(stderr, "error: unencodable negative value for property 0x%x %04lX..%04lX=%ld\n", 150 (int)prop, (long)start, (long)end, (long)value); 151 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 152 return; 153 } 154 if (value > maxIntValues[prop - UCHAR_INT_START]) { 155 maxIntValues[prop - UCHAR_INT_START] = value; 156 } 157 if (start == end) { 158 umutablecptrie_set(trie, start, value, &errorCode); 159 } else { 160 umutablecptrie_setRange(trie, start, end, value, &errorCode); 161 } 162 if (U_FAILURE(errorCode)) { 163 fprintf(stderr, "error: umutablecptrie_set(prop 0x%x trie %04lX..%04lX) failed - %s\n", 164 (int)prop, (long)start, (long)end, u_errorName(errorCode)); 165 } 166 } 167} 168 169namespace { 170 171UCPTrie *buildUCPTrie(const char *name, UMutableCPTrie *mutableTrie, 172 UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) { 173 UCPTrie *trie = umutablecptrie_buildImmutable(mutableTrie, type, valueWidth, &errorCode); 174 if(U_FAILURE(errorCode)) { 175 fprintf(stderr, "genprops error: %s trie buildImmutable() failed: %s\n", 176 name, u_errorName(errorCode)); 177 return trie; 178 } 179 if (!beQuiet) { 180 UErrorCode overflow = U_ZERO_ERROR; 181 int32_t length = ucptrie_toBinary(trie, nullptr, 0, &overflow); 182 printf("%11s trie size in bytes: %5u\n", name, (int)length); 183 } 184 return trie; 185} 186 187constexpr int32_t TRIE_BLOCK_CAPACITY = 100000; 188 189uint8_t inpcBytes[TRIE_BLOCK_CAPACITY]; 190uint8_t inscBytes[TRIE_BLOCK_CAPACITY]; 191uint8_t voBytes[TRIE_BLOCK_CAPACITY]; 192 193int32_t inpcLength = 0; 194int32_t inscLength = 0; 195int32_t voLength = 0; 196 197int32_t writeTrieBytes(const UCPTrie *trie, uint8_t block[], UErrorCode &errorCode) { 198 int32_t length = ucptrie_toBinary(trie, block, TRIE_BLOCK_CAPACITY, &errorCode); 199 while ((length & 0xf) != 0) { 200 block[length++] = 0xaa; 201 } 202 return length; 203} 204 205} // namespace 206 207void 208LayoutPropsBuilder::build(UErrorCode &errorCode) { 209 if (U_FAILURE(errorCode)) { return; } 210 if (!beQuiet) { 211 puts("* text layout properties stats *"); 212 } 213 214 checkMaxIntValue(UCHAR_INDIC_POSITIONAL_CATEGORY, 0xff, errorCode); 215 checkMaxIntValue(UCHAR_INDIC_SYLLABIC_CATEGORY, 0xff, errorCode); 216 checkMaxIntValue(UCHAR_VERTICAL_ORIENTATION, 0xff, errorCode); 217 inpcTrie = buildUCPTrie("inpc", inpcMutableTrie, 218 UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode); 219 inscTrie = buildUCPTrie("insc", inscMutableTrie, 220 UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode); 221 voTrie = buildUCPTrie("vo", voMutableTrie, 222 UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode); 223 224 inpcLength = writeTrieBytes(inpcTrie, inpcBytes, errorCode); 225 inscLength = writeTrieBytes(inscTrie, inscBytes, errorCode); 226 voLength = writeTrieBytes(voTrie, voBytes, errorCode); 227 228 if (!beQuiet) { 229 int32_t size = ULAYOUT_IX_COUNT * 4 + inpcLength + inscLength + voLength; 230 printf("data size: %5d\n", (int)size); 231 } 232} 233 234void LayoutPropsBuilder::checkMaxIntValue(UProperty prop, int32_t maxMax, 235 UErrorCode &errorCode) const { 236 int32_t max = getMaxIntValue(prop); 237 if (max > maxMax) { 238 fprintf(stderr, "genprops error: 0x%x max value = %d overflow\n", (int)prop, (int)max); 239 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 240 } 241} 242 243// In ICU 63, we had functions writeCSourceFile() and writeJavaSourceFile(). 244// For Java, each serialized trie was written as a String constant with 245// one byte per char and an optimization for byte 0, 246// to optimize for Java .class file size. 247// (See ICU 63 if we need to resurrect some of that code.) 248// Since ICU 64, we write a binary ulayout.icu file for use in both C++ & Java. 249 250void 251LayoutPropsBuilder::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) { 252 if (U_FAILURE(errorCode)) { return; } 253 254 UNewDataMemory *pData = udata_create( 255 path, ULAYOUT_DATA_TYPE, ULAYOUT_DATA_NAME, &dataInfo, 256 withCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); 257 if (U_FAILURE(errorCode)) { 258 fprintf(stderr, "genprops: udata_create(%s, ulayout.icu) failed - %s\n", 259 path, u_errorName(errorCode)); 260 return; 261 } 262 263 int32_t indexes[ULAYOUT_IX_COUNT] = { ULAYOUT_IX_COUNT }; 264 int32_t top = ULAYOUT_IX_COUNT * 4; 265 266 indexes[ULAYOUT_IX_INPC_TRIE_TOP] = (top += inpcLength); 267 indexes[ULAYOUT_IX_INSC_TRIE_TOP] = (top += inscLength); 268 indexes[ULAYOUT_IX_VO_TRIE_TOP] = (top += voLength); 269 270 // Set reserved trie-top values to the top of the last trie 271 // so that they look empty until a later file format version 272 // uses one or more of these slots. 273 for (int32_t i = ULAYOUT_IX_RESERVED_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) { 274 indexes[i] = top; 275 } 276 277 indexes[ULAYOUT_IX_MAX_VALUES] = 278 ((getMaxIntValue(UCHAR_INDIC_POSITIONAL_CATEGORY)) << ULAYOUT_MAX_INPC_SHIFT) | 279 ((getMaxIntValue(UCHAR_INDIC_SYLLABIC_CATEGORY)) << ULAYOUT_MAX_INSC_SHIFT) | 280 ((getMaxIntValue(UCHAR_VERTICAL_ORIENTATION)) << ULAYOUT_MAX_VO_SHIFT); 281 282 udata_writeBlock(pData, indexes, sizeof(indexes)); 283 udata_writeBlock(pData, inpcBytes, inpcLength); 284 udata_writeBlock(pData, inscBytes, inscLength); 285 udata_writeBlock(pData, voBytes, voLength); 286 287 long dataLength = udata_finish(pData, &errorCode); 288 if (U_FAILURE(errorCode)) { 289 fprintf(stderr, "genprops: error %s writing the output file\n", u_errorName(errorCode)); 290 return; 291 } 292 293 if (dataLength != (long)top) { 294 fprintf(stderr, 295 "udata_finish(ulayout.icu) reports %ld bytes written but should be %ld\n", 296 dataLength, (long)top); 297 errorCode = U_INTERNAL_PROGRAM_ERROR; 298 } 299} 300 301PropsBuilder * 302createLayoutPropsBuilder(UErrorCode &errorCode) { 303 if(U_FAILURE(errorCode)) { return nullptr; } 304 PropsBuilder *pb=new LayoutPropsBuilder(errorCode); 305 if(pb==nullptr) { 306 errorCode=U_MEMORY_ALLOCATION_ERROR; 307 } 308 return pb; 309} 310