1// © 2017 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3 4// umutablecptrie.cpp (inspired by utrie2_builder.cpp) 5// created: 2017dec29 Markus W. Scherer 6 7// #define UCPTRIE_DEBUG 8#ifdef UCPTRIE_DEBUG 9# include <stdio.h> 10#endif 11 12#include "unicode/utypes.h" 13#include "unicode/ucptrie.h" 14#include "unicode/umutablecptrie.h" 15#include "unicode/uobject.h" 16#include "unicode/utf16.h" 17#include "cmemory.h" 18#include "uassert.h" 19#include "ucptrie_impl.h" 20 21// ICU-20235 In case Microsoft math.h has defined this, undefine it. 22#ifdef OVERFLOW 23#undef OVERFLOW 24#endif 25 26U_NAMESPACE_BEGIN 27 28namespace { 29 30constexpr int32_t MAX_UNICODE = 0x10ffff; 31 32constexpr int32_t UNICODE_LIMIT = 0x110000; 33constexpr int32_t BMP_LIMIT = 0x10000; 34constexpr int32_t ASCII_LIMIT = 0x80; 35 36constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3; 37constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3; 38constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3; 39 40constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3)); 41 42// Flag values for data blocks. 43constexpr uint8_t ALL_SAME = 0; 44constexpr uint8_t MIXED = 1; 45constexpr uint8_t SAME_AS = 2; 46 47/** Start with allocation of 16k data entries. */ 48constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14); 49 50/** Grow about 8x each time. */ 51constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17); 52 53/** 54 * Maximum length of the build-time data array. 55 * One entry per 0x110000 code points. 56 */ 57constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT; 58 59// Flag values for index-3 blocks while compacting/building. 60constexpr uint8_t I3_NULL = 0; 61constexpr uint8_t I3_BMP = 1; 62constexpr uint8_t I3_16 = 2; 63constexpr uint8_t I3_18 = 3; 64 65constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8; 66 67class AllSameBlocks; 68class MixedBlocks; 69 70class MutableCodePointTrie : public UMemory { 71public: 72 MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode); 73 MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode); 74 MutableCodePointTrie(const MutableCodePointTrie &other) = delete; 75 ~MutableCodePointTrie(); 76 77 MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete; 78 79 static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode); 80 static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode); 81 82 uint32_t get(UChar32 c) const; 83 int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context, 84 uint32_t *pValue) const; 85 86 void set(UChar32 c, uint32_t value, UErrorCode &errorCode); 87 void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode); 88 89 UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode); 90 91private: 92 void clear(); 93 94 bool ensureHighStart(UChar32 c); 95 int32_t allocDataBlock(int32_t blockLength); 96 int32_t getDataBlock(int32_t i); 97 98 void maskValues(uint32_t mask); 99 UChar32 findHighStart() const; 100 int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks); 101 int32_t compactData( 102 int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity, 103 int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode); 104 int32_t compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, UErrorCode &errorCode); 105 int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode); 106 107 uint32_t *index = nullptr; 108 int32_t indexCapacity = 0; 109 int32_t index3NullOffset = -1; 110 uint32_t *data = nullptr; 111 int32_t dataCapacity = 0; 112 int32_t dataLength = 0; 113 int32_t dataNullOffset = -1; 114 115 uint32_t origInitialValue; 116 uint32_t initialValue; 117 uint32_t errorValue; 118 UChar32 highStart; 119 uint32_t highValue; 120#ifdef UCPTRIE_DEBUG 121public: 122 const char *name; 123#endif 124private: 125 /** Temporary array while building the final data. */ 126 uint16_t *index16 = nullptr; 127 uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3]; 128}; 129 130MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) : 131 origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue), 132 highStart(0), highValue(initialValue) 133#ifdef UCPTRIE_DEBUG 134 , name("open") 135#endif 136 { 137 if (U_FAILURE(errorCode)) { return; } 138 index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4); 139 data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4); 140 if (index == nullptr || data == nullptr) { 141 errorCode = U_MEMORY_ALLOCATION_ERROR; 142 return; 143 } 144 indexCapacity = BMP_I_LIMIT; 145 dataCapacity = INITIAL_DATA_LENGTH; 146} 147 148MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) : 149 index3NullOffset(other.index3NullOffset), 150 dataNullOffset(other.dataNullOffset), 151 origInitialValue(other.origInitialValue), initialValue(other.initialValue), 152 errorValue(other.errorValue), 153 highStart(other.highStart), highValue(other.highValue) 154#ifdef UCPTRIE_DEBUG 155 , name("mutable clone") 156#endif 157 { 158 if (U_FAILURE(errorCode)) { return; } 159 int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT; 160 index = (uint32_t *)uprv_malloc(iCapacity * 4); 161 data = (uint32_t *)uprv_malloc(other.dataCapacity * 4); 162 if (index == nullptr || data == nullptr) { 163 errorCode = U_MEMORY_ALLOCATION_ERROR; 164 return; 165 } 166 indexCapacity = iCapacity; 167 dataCapacity = other.dataCapacity; 168 169 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; 170 uprv_memcpy(flags, other.flags, iLimit); 171 uprv_memcpy(index, other.index, iLimit * 4); 172 uprv_memcpy(data, other.data, (size_t)other.dataLength * 4); 173 dataLength = other.dataLength; 174 U_ASSERT(other.index16 == nullptr); 175} 176 177MutableCodePointTrie::~MutableCodePointTrie() { 178 uprv_free(index); 179 uprv_free(data); 180 uprv_free(index16); 181} 182 183MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) { 184 // Use the highValue as the initialValue to reduce the highStart. 185 uint32_t errorValue = ucpmap_get(map, -1); 186 uint32_t initialValue = ucpmap_get(map, 0x10ffff); 187 LocalPointer<MutableCodePointTrie> mutableTrie( 188 new MutableCodePointTrie(initialValue, errorValue, errorCode), 189 errorCode); 190 if (U_FAILURE(errorCode)) { 191 return nullptr; 192 } 193 UChar32 start = 0, end; 194 uint32_t value; 195 while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0, 196 nullptr, nullptr, &value)) >= 0) { 197 if (value != initialValue) { 198 if (start == end) { 199 mutableTrie->set(start, value, errorCode); 200 } else { 201 mutableTrie->setRange(start, end, value, errorCode); 202 } 203 } 204 start = end + 1; 205 } 206 if (U_SUCCESS(errorCode)) { 207 return mutableTrie.orphan(); 208 } else { 209 return nullptr; 210 } 211} 212 213MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) { 214 // Use the highValue as the initialValue to reduce the highStart. 215 uint32_t errorValue; 216 uint32_t initialValue; 217 switch (trie->valueWidth) { 218 case UCPTRIE_VALUE_BITS_16: 219 errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; 220 initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; 221 break; 222 case UCPTRIE_VALUE_BITS_32: 223 errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; 224 initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; 225 break; 226 case UCPTRIE_VALUE_BITS_8: 227 errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; 228 initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; 229 break; 230 default: 231 // Unreachable if the trie is properly initialized. 232 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 233 return nullptr; 234 } 235 LocalPointer<MutableCodePointTrie> mutableTrie( 236 new MutableCodePointTrie(initialValue, errorValue, errorCode), 237 errorCode); 238 if (U_FAILURE(errorCode)) { 239 return nullptr; 240 } 241 UChar32 start = 0, end; 242 uint32_t value; 243 while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0, 244 nullptr, nullptr, &value)) >= 0) { 245 if (value != initialValue) { 246 if (start == end) { 247 mutableTrie->set(start, value, errorCode); 248 } else { 249 mutableTrie->setRange(start, end, value, errorCode); 250 } 251 } 252 start = end + 1; 253 } 254 if (U_SUCCESS(errorCode)) { 255 return mutableTrie.orphan(); 256 } else { 257 return nullptr; 258 } 259} 260 261void MutableCodePointTrie::clear() { 262 index3NullOffset = dataNullOffset = -1; 263 dataLength = 0; 264 highValue = initialValue = origInitialValue; 265 highStart = 0; 266 uprv_free(index16); 267 index16 = nullptr; 268} 269 270uint32_t MutableCodePointTrie::get(UChar32 c) const { 271 if ((uint32_t)c > MAX_UNICODE) { 272 return errorValue; 273 } 274 if (c >= highStart) { 275 return highValue; 276 } 277 int32_t i = c >> UCPTRIE_SHIFT_3; 278 if (flags[i] == ALL_SAME) { 279 return index[i]; 280 } else { 281 return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)]; 282 } 283} 284 285inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue, 286 UCPMapValueFilter *filter, const void *context) { 287 if (value == initialValue) { 288 value = nullValue; 289 } else if (filter != nullptr) { 290 value = filter(context, value); 291 } 292 return value; 293} 294 295UChar32 MutableCodePointTrie::getRange( 296 UChar32 start, UCPMapValueFilter *filter, const void *context, 297 uint32_t *pValue) const { 298 if ((uint32_t)start > MAX_UNICODE) { 299 return U_SENTINEL; 300 } 301 if (start >= highStart) { 302 if (pValue != nullptr) { 303 uint32_t value = highValue; 304 if (filter != nullptr) { value = filter(context, value); } 305 *pValue = value; 306 } 307 return MAX_UNICODE; 308 } 309 uint32_t nullValue = initialValue; 310 if (filter != nullptr) { nullValue = filter(context, nullValue); } 311 UChar32 c = start; 312 uint32_t trieValue, value; 313 bool haveValue = false; 314 int32_t i = c >> UCPTRIE_SHIFT_3; 315 do { 316 if (flags[i] == ALL_SAME) { 317 uint32_t trieValue2 = index[i]; 318 if (haveValue) { 319 if (trieValue2 != trieValue) { 320 if (filter == nullptr || 321 maybeFilterValue(trieValue2, initialValue, nullValue, 322 filter, context) != value) { 323 return c - 1; 324 } 325 trieValue = trieValue2; // may or may not help 326 } 327 } else { 328 trieValue = trieValue2; 329 value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context); 330 if (pValue != nullptr) { *pValue = value; } 331 haveValue = true; 332 } 333 c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK; 334 } else /* MIXED */ { 335 int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK); 336 uint32_t trieValue2 = data[di]; 337 if (haveValue) { 338 if (trieValue2 != trieValue) { 339 if (filter == nullptr || 340 maybeFilterValue(trieValue2, initialValue, nullValue, 341 filter, context) != value) { 342 return c - 1; 343 } 344 trieValue = trieValue2; // may or may not help 345 } 346 } else { 347 trieValue = trieValue2; 348 value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context); 349 if (pValue != nullptr) { *pValue = value; } 350 haveValue = true; 351 } 352 while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) { 353 trieValue2 = data[++di]; 354 if (trieValue2 != trieValue) { 355 if (filter == nullptr || 356 maybeFilterValue(trieValue2, initialValue, nullValue, 357 filter, context) != value) { 358 return c - 1; 359 } 360 } 361 trieValue = trieValue2; // may or may not help 362 } 363 } 364 ++i; 365 } while (c < highStart); 366 U_ASSERT(haveValue); 367 if (maybeFilterValue(highValue, initialValue, nullValue, 368 filter, context) != value) { 369 return c - 1; 370 } else { 371 return MAX_UNICODE; 372 } 373} 374 375void 376writeBlock(uint32_t *block, uint32_t value) { 377 uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH; 378 while (block < limit) { 379 *block++ = value; 380 } 381} 382 383bool MutableCodePointTrie::ensureHighStart(UChar32 c) { 384 if (c >= highStart) { 385 // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction. 386 c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); 387 int32_t i = highStart >> UCPTRIE_SHIFT_3; 388 int32_t iLimit = c >> UCPTRIE_SHIFT_3; 389 if (iLimit > indexCapacity) { 390 uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4); 391 if (newIndex == nullptr) { return false; } 392 uprv_memcpy(newIndex, index, i * 4); 393 uprv_free(index); 394 index = newIndex; 395 indexCapacity = I_LIMIT; 396 } 397 do { 398 flags[i] = ALL_SAME; 399 index[i] = initialValue; 400 } while(++i < iLimit); 401 highStart = c; 402 } 403 return true; 404} 405 406int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) { 407 int32_t newBlock = dataLength; 408 int32_t newTop = newBlock + blockLength; 409 if (newTop > dataCapacity) { 410 int32_t capacity; 411 if (dataCapacity < MEDIUM_DATA_LENGTH) { 412 capacity = MEDIUM_DATA_LENGTH; 413 } else if (dataCapacity < MAX_DATA_LENGTH) { 414 capacity = MAX_DATA_LENGTH; 415 } else { 416 // Should never occur. 417 // Either MAX_DATA_LENGTH is incorrect, 418 // or the code writes more values than should be possible. 419 return -1; 420 } 421 uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4); 422 if (newData == nullptr) { 423 return -1; 424 } 425 uprv_memcpy(newData, data, (size_t)dataLength * 4); 426 uprv_free(data); 427 data = newData; 428 dataCapacity = capacity; 429 } 430 dataLength = newTop; 431 return newBlock; 432} 433 434/** 435 * No error checking for illegal arguments. 436 * 437 * @return -1 if no new data block available (out of memory in data array) 438 * @internal 439 */ 440int32_t MutableCodePointTrie::getDataBlock(int32_t i) { 441 if (flags[i] == MIXED) { 442 return index[i]; 443 } 444 if (i < BMP_I_LIMIT) { 445 int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH); 446 if (newBlock < 0) { return newBlock; } 447 int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1); 448 int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; 449 do { 450 U_ASSERT(flags[iStart] == ALL_SAME); 451 writeBlock(data + newBlock, index[iStart]); 452 flags[iStart] = MIXED; 453 index[iStart++] = newBlock; 454 newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; 455 } while (iStart < iLimit); 456 return index[i]; 457 } else { 458 int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH); 459 if (newBlock < 0) { return newBlock; } 460 writeBlock(data + newBlock, index[i]); 461 flags[i] = MIXED; 462 index[i] = newBlock; 463 return newBlock; 464 } 465} 466 467void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) { 468 if (U_FAILURE(errorCode)) { 469 return; 470 } 471 if ((uint32_t)c > MAX_UNICODE) { 472 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 473 return; 474 } 475 476 int32_t block; 477 if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) { 478 errorCode = U_MEMORY_ALLOCATION_ERROR; 479 return; 480 } 481 482 data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value; 483} 484 485void 486fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) { 487 uint32_t *pLimit = block + limit; 488 block += start; 489 while (block < pLimit) { 490 *block++ = value; 491 } 492} 493 494void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) { 495 if (U_FAILURE(errorCode)) { 496 return; 497 } 498 if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) { 499 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 500 return; 501 } 502 if (!ensureHighStart(end)) { 503 errorCode = U_MEMORY_ALLOCATION_ERROR; 504 return; 505 } 506 507 UChar32 limit = end + 1; 508 if (start & UCPTRIE_SMALL_DATA_MASK) { 509 // Set partial block at [start..following block boundary[. 510 int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3); 511 if (block < 0) { 512 errorCode = U_MEMORY_ALLOCATION_ERROR; 513 return; 514 } 515 516 UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK; 517 if (nextStart <= limit) { 518 fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, 519 value); 520 start = nextStart; 521 } else { 522 fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK, 523 value); 524 return; 525 } 526 } 527 528 // Number of positions in the last, partial block. 529 int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK; 530 531 // Round down limit to a block boundary. 532 limit &= ~UCPTRIE_SMALL_DATA_MASK; 533 534 // Iterate over all-value blocks. 535 while (start < limit) { 536 int32_t i = start >> UCPTRIE_SHIFT_3; 537 if (flags[i] == ALL_SAME) { 538 index[i] = value; 539 } else /* MIXED */ { 540 fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value); 541 } 542 start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; 543 } 544 545 if (rest > 0) { 546 // Set partial block at [last block boundary..limit[. 547 int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3); 548 if (block < 0) { 549 errorCode = U_MEMORY_ALLOCATION_ERROR; 550 return; 551 } 552 553 fillBlock(data + block, 0, rest, value); 554 } 555} 556 557/* compaction --------------------------------------------------------------- */ 558 559void MutableCodePointTrie::maskValues(uint32_t mask) { 560 initialValue &= mask; 561 errorValue &= mask; 562 highValue &= mask; 563 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; 564 for (int32_t i = 0; i < iLimit; ++i) { 565 if (flags[i] == ALL_SAME) { 566 index[i] &= mask; 567 } 568 } 569 for (int32_t i = 0; i < dataLength; ++i) { 570 data[i] &= mask; 571 } 572} 573 574template<typename UIntA, typename UIntB> 575bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) { 576 while (length > 0 && *s == *t) { 577 ++s; 578 ++t; 579 --length; 580 } 581 return length == 0; 582} 583 584bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) { 585 const uint32_t *pLimit = p + length; 586 while (p < pLimit && *p == value) { ++p; } 587 return p == pLimit; 588} 589 590/** Search for an identical block. */ 591int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length, 592 const uint16_t *q, int32_t qStart, int32_t blockLength) { 593 // Ensure that we do not even partially get past length. 594 length -= blockLength; 595 596 q += qStart; 597 while (pStart <= length) { 598 if (equalBlocks(p + pStart, q, blockLength)) { 599 return pStart; 600 } 601 ++pStart; 602 } 603 return -1; 604} 605 606int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit, 607 uint32_t value, int32_t blockLength) { 608 // Ensure that we do not even partially get past limit. 609 limit -= blockLength; 610 611 for (int32_t block = start; block <= limit; ++block) { 612 if (p[block] == value) { 613 for (int32_t i = 1;; ++i) { 614 if (i == blockLength) { 615 return block; 616 } 617 if (p[block + i] != value) { 618 block += i; 619 break; 620 } 621 } 622 } 623 } 624 return -1; 625} 626 627/** 628 * Look for maximum overlap of the beginning of the other block 629 * with the previous, adjacent block. 630 */ 631template<typename UIntA, typename UIntB> 632int32_t getOverlap(const UIntA *p, int32_t length, 633 const UIntB *q, int32_t qStart, int32_t blockLength) { 634 int32_t overlap = blockLength - 1; 635 U_ASSERT(overlap <= length); 636 q += qStart; 637 while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) { 638 --overlap; 639 } 640 return overlap; 641} 642 643int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value, 644 int32_t blockLength) { 645 int32_t min = length - (blockLength - 1); 646 int32_t i = length; 647 while (min < i && p[i - 1] == value) { --i; } 648 return length - i; 649} 650 651bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) { 652 for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { 653 if (index[i] == dataOffset) { 654 return true; 655 } 656 } 657 return false; 658} 659 660/** 661 * Finds the start of the last range in the trie by enumerating backward. 662 * Indexes for code points higher than this will be omitted. 663 */ 664UChar32 MutableCodePointTrie::findHighStart() const { 665 int32_t i = highStart >> UCPTRIE_SHIFT_3; 666 while (i > 0) { 667 bool match; 668 if (flags[--i] == ALL_SAME) { 669 match = index[i] == highValue; 670 } else /* MIXED */ { 671 const uint32_t *p = data + index[i]; 672 for (int32_t j = 0;; ++j) { 673 if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) { 674 match = true; 675 break; 676 } 677 if (p[j] != highValue) { 678 match = false; 679 break; 680 } 681 } 682 } 683 if (!match) { 684 return (i + 1) << UCPTRIE_SHIFT_3; 685 } 686 } 687 return 0; 688} 689 690class AllSameBlocks { 691public: 692 static constexpr int32_t NEW_UNIQUE = -1; 693 static constexpr int32_t OVERFLOW = -2; 694 695 AllSameBlocks() : length(0), mostRecent(-1) {} 696 697 int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) { 698 if (mostRecent >= 0 && values[mostRecent] == value) { 699 refCounts[mostRecent] += count; 700 return indexes[mostRecent]; 701 } 702 for (int32_t i = 0; i < length; ++i) { 703 if (values[i] == value) { 704 mostRecent = i; 705 refCounts[i] += count; 706 return indexes[i]; 707 } 708 } 709 if (length == CAPACITY) { 710 return OVERFLOW; 711 } 712 mostRecent = length; 713 indexes[length] = index; 714 values[length] = value; 715 refCounts[length++] = count; 716 return NEW_UNIQUE; 717 } 718 719 /** Replaces the block which has the lowest reference count. */ 720 void add(int32_t index, int32_t count, uint32_t value) { 721 U_ASSERT(length == CAPACITY); 722 int32_t least = -1; 723 int32_t leastCount = I_LIMIT; 724 for (int32_t i = 0; i < length; ++i) { 725 U_ASSERT(values[i] != value); 726 if (refCounts[i] < leastCount) { 727 least = i; 728 leastCount = refCounts[i]; 729 } 730 } 731 U_ASSERT(least >= 0); 732 mostRecent = least; 733 indexes[least] = index; 734 values[least] = value; 735 refCounts[least] = count; 736 } 737 738 int32_t findMostUsed() const { 739 if (length == 0) { return -1; } 740 int32_t max = -1; 741 int32_t maxCount = 0; 742 for (int32_t i = 0; i < length; ++i) { 743 if (refCounts[i] > maxCount) { 744 max = i; 745 maxCount = refCounts[i]; 746 } 747 } 748 return indexes[max]; 749 } 750 751private: 752 static constexpr int32_t CAPACITY = 32; 753 754 int32_t length; 755 int32_t mostRecent; 756 757 int32_t indexes[CAPACITY]; 758 uint32_t values[CAPACITY]; 759 int32_t refCounts[CAPACITY]; 760}; 761 762// Custom hash table for mixed-value blocks to be found anywhere in the 763// compacted data or index so far. 764class MixedBlocks { 765public: 766 MixedBlocks() {} 767 ~MixedBlocks() { 768 uprv_free(table); 769 } 770 771 bool init(int32_t maxLength, int32_t newBlockLength) { 772 // We store actual data indexes + 1 to reserve 0 for empty entries. 773 int32_t maxDataIndex = maxLength - newBlockLength + 1; 774 int32_t newLength; 775 if (maxDataIndex <= 0xfff) { // 4k 776 newLength = 6007; 777 shift = 12; 778 mask = 0xfff; 779 } else if (maxDataIndex <= 0x7fff) { // 32k 780 newLength = 50021; 781 shift = 15; 782 mask = 0x7fff; 783 } else if (maxDataIndex <= 0x1ffff) { // 128k 784 newLength = 200003; 785 shift = 17; 786 mask = 0x1ffff; 787 } else { 788 // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M 789 newLength = 1500007; 790 shift = 21; 791 mask = 0x1fffff; 792 } 793 if (newLength > capacity) { 794 uprv_free(table); 795 table = (uint32_t *)uprv_malloc(newLength * 4); 796 if (table == nullptr) { 797 return false; 798 } 799 capacity = newLength; 800 } 801 length = newLength; 802 uprv_memset(table, 0, length * 4); 803 804 blockLength = newBlockLength; 805 return true; 806 } 807 808 template<typename UInt> 809 void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) { 810 int32_t start = prevDataLength - blockLength; 811 if (start >= minStart) { 812 ++start; // Skip the last block that we added last time. 813 } else { 814 start = minStart; // Begin with the first full block. 815 } 816 for (int32_t end = newDataLength - blockLength; start <= end; ++start) { 817 uint32_t hashCode = makeHashCode(data, start); 818 addEntry(data, start, hashCode, start); 819 } 820 } 821 822 template<typename UIntA, typename UIntB> 823 int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const { 824 uint32_t hashCode = makeHashCode(blockData, blockStart); 825 int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode); 826 if (entryIndex >= 0) { 827 return (table[entryIndex] & mask) - 1; 828 } else { 829 return -1; 830 } 831 } 832 833 int32_t findAllSameBlock(const uint32_t *data, uint32_t blockValue) const { 834 uint32_t hashCode = makeHashCode(blockValue); 835 int32_t entryIndex = findEntry(data, blockValue, hashCode); 836 if (entryIndex >= 0) { 837 return (table[entryIndex] & mask) - 1; 838 } else { 839 return -1; 840 } 841 } 842 843private: 844 template<typename UInt> 845 uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const { 846 int32_t blockLimit = blockStart + blockLength; 847 uint32_t hashCode = blockData[blockStart++]; 848 do { 849 hashCode = 37 * hashCode + blockData[blockStart++]; 850 } while (blockStart < blockLimit); 851 return hashCode; 852 } 853 854 uint32_t makeHashCode(uint32_t blockValue) const { 855 uint32_t hashCode = blockValue; 856 for (int32_t i = 1; i < blockLength; ++i) { 857 hashCode = 37 * hashCode + blockValue; 858 } 859 return hashCode; 860 } 861 862 template<typename UInt> 863 void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) { 864 U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask); 865 int32_t entryIndex = findEntry(data, data, blockStart, hashCode); 866 if (entryIndex < 0) { 867 table[~entryIndex] = (hashCode << shift) | (dataIndex + 1); 868 } 869 } 870 871 template<typename UIntA, typename UIntB> 872 int32_t findEntry(const UIntA *data, const UIntB *blockData, int32_t blockStart, 873 uint32_t hashCode) const { 874 uint32_t shiftedHashCode = hashCode << shift; 875 int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1 876 for (int32_t entryIndex = initialEntryIndex;;) { 877 uint32_t entry = table[entryIndex]; 878 if (entry == 0) { 879 return ~entryIndex; 880 } 881 if ((entry & ~mask) == shiftedHashCode) { 882 int32_t dataIndex = (entry & mask) - 1; 883 if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) { 884 return entryIndex; 885 } 886 } 887 entryIndex = nextIndex(initialEntryIndex, entryIndex); 888 } 889 } 890 891 int32_t findEntry(const uint32_t *data, uint32_t blockValue, uint32_t hashCode) const { 892 uint32_t shiftedHashCode = hashCode << shift; 893 int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1 894 for (int32_t entryIndex = initialEntryIndex;;) { 895 uint32_t entry = table[entryIndex]; 896 if (entry == 0) { 897 return ~entryIndex; 898 } 899 if ((entry & ~mask) == shiftedHashCode) { 900 int32_t dataIndex = (entry & mask) - 1; 901 if (allValuesSameAs(data + dataIndex, blockLength, blockValue)) { 902 return entryIndex; 903 } 904 } 905 entryIndex = nextIndex(initialEntryIndex, entryIndex); 906 } 907 } 908 909 inline int32_t nextIndex(int32_t initialEntryIndex, int32_t entryIndex) const { 910 // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length); 911 return (entryIndex + initialEntryIndex) % length; 912 } 913 914 // Hash table. 915 // The length is a prime number, larger than the maximum data length. 916 // The "shift" lower bits store a data index + 1. 917 // The remaining upper bits store a partial hashCode of the block data values. 918 uint32_t *table = nullptr; 919 int32_t capacity = 0; 920 int32_t length = 0; 921 int32_t shift = 0; 922 uint32_t mask = 0; 923 924 int32_t blockLength = 0; 925}; 926 927int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) { 928#ifdef UCPTRIE_DEBUG 929 bool overflow = false; 930#endif 931 932 // ASCII data will be stored as a linear table, even if the following code 933 // does not yet count it that way. 934 int32_t newDataCapacity = ASCII_LIMIT; 935 // Add room for a small data null block in case it would match the start of 936 // a fast data block where dataNullOffset must not be set in that case. 937 newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; 938 // Add room for special values (errorValue, highValue) and padding. 939 newDataCapacity += 4; 940 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; 941 int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; 942 int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; 943 for (int32_t i = 0; i < iLimit; i += inc) { 944 if (i == fastILimit) { 945 blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; 946 inc = 1; 947 } 948 uint32_t value = index[i]; 949 if (flags[i] == MIXED) { 950 // Really mixed? 951 const uint32_t *p = data + value; 952 value = *p; 953 if (allValuesSameAs(p + 1, blockLength - 1, value)) { 954 flags[i] = ALL_SAME; 955 index[i] = value; 956 // Fall through to ALL_SAME handling. 957 } else { 958 newDataCapacity += blockLength; 959 continue; 960 } 961 } else { 962 U_ASSERT(flags[i] == ALL_SAME); 963 if (inc > 1) { 964 // Do all of the fast-range data block's ALL_SAME parts have the same value? 965 bool allSame = true; 966 int32_t next_i = i + inc; 967 for (int32_t j = i + 1; j < next_i; ++j) { 968 U_ASSERT(flags[j] == ALL_SAME); 969 if (index[j] != value) { 970 allSame = false; 971 break; 972 } 973 } 974 if (!allSame) { 975 // Turn it into a MIXED block. 976 if (getDataBlock(i) < 0) { 977 return -1; 978 } 979 newDataCapacity += blockLength; 980 continue; 981 } 982 } 983 } 984 // Is there another ALL_SAME block with the same value? 985 int32_t other = allSameBlocks.findOrAdd(i, inc, value); 986 if (other == AllSameBlocks::OVERFLOW) { 987 // The fixed-size array overflowed. Slow check for a duplicate block. 988#ifdef UCPTRIE_DEBUG 989 if (!overflow) { 990 puts("UCPTrie AllSameBlocks overflow"); 991 overflow = true; 992 } 993#endif 994 int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; 995 for (int32_t j = 0;; j += jInc) { 996 if (j == i) { 997 allSameBlocks.add(i, inc, value); 998 break; 999 } 1000 if (j == fastILimit) { 1001 jInc = 1; 1002 } 1003 if (flags[j] == ALL_SAME && index[j] == value) { 1004 allSameBlocks.add(j, jInc + inc, value); 1005 other = j; 1006 break; 1007 // We could keep counting blocks with the same value 1008 // before we add the first one, which may improve compaction in rare cases, 1009 // but it would make it slower. 1010 } 1011 } 1012 } 1013 if (other >= 0) { 1014 flags[i] = SAME_AS; 1015 index[i] = other; 1016 } else { 1017 // New unique same-value block. 1018 newDataCapacity += blockLength; 1019 } 1020 } 1021 return newDataCapacity; 1022} 1023 1024#ifdef UCPTRIE_DEBUG 1025# define DEBUG_DO(expr) expr 1026#else 1027# define DEBUG_DO(expr) 1028#endif 1029 1030#ifdef UCPTRIE_DEBUG 1031// Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF 1032int32_t appendValue(char s[], int32_t length, uint32_t value) { 1033 value ^= value >> 16; 1034 value ^= value >> 8; 1035 s[length] = 0xE2; 1036 s[length + 1] = (char)(0xA0 + ((value >> 6) & 3)); 1037 s[length + 2] = (char)(0x80 + (value & 0x3F)); 1038 return length + 3; 1039} 1040 1041void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value, 1042 UChar32 start, int32_t overlap, uint32_t initialValue) { 1043 char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3]; 1044 int32_t length = 0; 1045 int32_t i; 1046 for (i = 0; i < overlap; ++i) { 1047 length = appendValue(s, length, 0); // Braille blank 1048 } 1049 s[length++] = '|'; 1050 for (; i < blockLength; ++i) { 1051 if (block != nullptr) { 1052 value = block[i]; 1053 } 1054 if (value == initialValue) { 1055 value = 0x40; // Braille lower left dot 1056 } 1057 length = appendValue(s, length, value); 1058 } 1059 s[length] = 0; 1060 start += overlap; 1061 if (start <= 0xffff) { 1062 printf(" %04lX %s|\n", (long)start, s); 1063 } else if (start <= 0xfffff) { 1064 printf(" %5lX %s|\n", (long)start, s); 1065 } else { 1066 printf(" %6lX %s|\n", (long)start, s); 1067 } 1068} 1069#endif 1070 1071/** 1072 * Compacts a build-time trie. 1073 * 1074 * The compaction 1075 * - removes blocks that are identical with earlier ones 1076 * - overlaps each new non-duplicate block as much as possible with the previously-written one 1077 * - works with fast-range data blocks whose length is a multiple of that of 1078 * higher-code-point data blocks 1079 * 1080 * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks. 1081 */ 1082int32_t MutableCodePointTrie::compactData( 1083 int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity, 1084 int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) { 1085#ifdef UCPTRIE_DEBUG 1086 int32_t countSame=0, sumOverlaps=0; 1087 bool printData = dataLength == 29088 /* line.brk */ || 1088 // dataLength == 30048 /* CanonIterData */ || 1089 dataLength == 50400 /* zh.txt~stroke */; 1090#endif 1091 1092 // The linear ASCII data has been copied into newData already. 1093 int32_t newDataLength = 0; 1094 for (int32_t i = 0; newDataLength < ASCII_LIMIT; 1095 newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { 1096 index[i] = newDataLength; 1097#ifdef UCPTRIE_DEBUG 1098 if (printData) { 1099 printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue); 1100 } 1101#endif 1102 } 1103 1104 int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; 1105 if (!mixedBlocks.init(newDataCapacity, blockLength)) { 1106 errorCode = U_MEMORY_ALLOCATION_ERROR; 1107 return 0; 1108 } 1109 mixedBlocks.extend(newData, 0, 0, newDataLength); 1110 1111 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; 1112 int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; 1113 int32_t fastLength = 0; 1114 for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) { 1115 if (i == fastILimit) { 1116 blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; 1117 inc = 1; 1118 fastLength = newDataLength; 1119 if (!mixedBlocks.init(newDataCapacity, blockLength)) { 1120 errorCode = U_MEMORY_ALLOCATION_ERROR; 1121 return 0; 1122 } 1123 mixedBlocks.extend(newData, 0, 0, newDataLength); 1124 } 1125 if (flags[i] == ALL_SAME) { 1126 uint32_t value = index[i]; 1127 // Find an earlier part of the data array of length blockLength 1128 // that is filled with this value. 1129 int32_t n = mixedBlocks.findAllSameBlock(newData, value); 1130 // If we find a match, and the current block is the data null block, 1131 // and it is not a fast block but matches the start of a fast block, 1132 // then we need to continue looking. 1133 // This is because this small block is shorter than the fast block, 1134 // and not all of the rest of the fast block is filled with this value. 1135 // Otherwise trie.getRange() would detect that the fast block starts at 1136 // dataNullOffset and assume incorrectly that it is filled with the null value. 1137 while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength && 1138 isStartOfSomeFastBlock(n, index, fastILimit)) { 1139 n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength); 1140 } 1141 if (n >= 0) { 1142 DEBUG_DO(++countSame); 1143 index[i] = n; 1144 } else { 1145 n = getAllSameOverlap(newData, newDataLength, value, blockLength); 1146 DEBUG_DO(sumOverlaps += n); 1147#ifdef UCPTRIE_DEBUG 1148 if (printData) { 1149 printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue); 1150 } 1151#endif 1152 index[i] = newDataLength - n; 1153 int32_t prevDataLength = newDataLength; 1154 while (n < blockLength) { 1155 newData[newDataLength++] = value; 1156 ++n; 1157 } 1158 mixedBlocks.extend(newData, 0, prevDataLength, newDataLength); 1159 } 1160 } else if (flags[i] == MIXED) { 1161 const uint32_t *block = data + index[i]; 1162 int32_t n = mixedBlocks.findBlock(newData, block, 0); 1163 if (n >= 0) { 1164 DEBUG_DO(++countSame); 1165 index[i] = n; 1166 } else { 1167 n = getOverlap(newData, newDataLength, block, 0, blockLength); 1168 DEBUG_DO(sumOverlaps += n); 1169#ifdef UCPTRIE_DEBUG 1170 if (printData) { 1171 printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue); 1172 } 1173#endif 1174 index[i] = newDataLength - n; 1175 int32_t prevDataLength = newDataLength; 1176 while (n < blockLength) { 1177 newData[newDataLength++] = block[n++]; 1178 } 1179 mixedBlocks.extend(newData, 0, prevDataLength, newDataLength); 1180 } 1181 } else /* SAME_AS */ { 1182 uint32_t j = index[i]; 1183 index[i] = index[j]; 1184 } 1185 } 1186 1187#ifdef UCPTRIE_DEBUG 1188 /* we saved some space */ 1189 printf("compacting UCPTrie: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n", 1190 (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps); 1191#endif 1192 return newDataLength; 1193} 1194 1195int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, 1196 UErrorCode &errorCode) { 1197 int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3); 1198 if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) { 1199 // Only the linear fast index, no multi-stage index tables. 1200 index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET; 1201 return fastIndexLength; 1202 } 1203 1204 // Condense the fast index table. 1205 // Also, does it contain an index-3 block with all dataNullOffset? 1206 uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH]; // fastIndexLength 1207 int32_t i3FirstNull = -1; 1208 for (int32_t i = 0, j = 0; i < fastILimit; ++j) { 1209 uint32_t i3 = index[i]; 1210 fastIndex[j] = (uint16_t)i3; 1211 if (i3 == (uint32_t)dataNullOffset) { 1212 if (i3FirstNull < 0) { 1213 i3FirstNull = j; 1214 } else if (index3NullOffset < 0 && 1215 (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) { 1216 index3NullOffset = i3FirstNull; 1217 } 1218 } else { 1219 i3FirstNull = -1; 1220 } 1221 // Set the index entries that compactData() skipped. 1222 // Needed when the multi-stage index covers the fast index range as well. 1223 int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; 1224 while (++i < iNext) { 1225 i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; 1226 index[i] = i3; 1227 } 1228 } 1229 1230 if (!mixedBlocks.init(fastIndexLength, UCPTRIE_INDEX_3_BLOCK_LENGTH)) { 1231 errorCode = U_MEMORY_ALLOCATION_ERROR; 1232 return 0; 1233 } 1234 mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength); 1235 1236 // Examine index-3 blocks. For each determine one of: 1237 // - same as the index-3 null block 1238 // - same as a fast-index block 1239 // - 16-bit indexes 1240 // - 18-bit indexes 1241 // We store this in the first flags entry for the index-3 block. 1242 // 1243 // Also determine an upper limit for the index-3 table length. 1244 int32_t index3Capacity = 0; 1245 i3FirstNull = index3NullOffset; 1246 bool hasLongI3Blocks = false; 1247 // If the fast index covers the whole BMP, then 1248 // the multi-stage index is only for supplementary code points. 1249 // Otherwise, the multi-stage index covers all of Unicode. 1250 int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT; 1251 int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; 1252 for (int32_t i = iStart; i < iLimit;) { 1253 int32_t j = i; 1254 int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH; 1255 uint32_t oredI3 = 0; 1256 bool isNull = true; 1257 do { 1258 uint32_t i3 = index[j]; 1259 oredI3 |= i3; 1260 if (i3 != (uint32_t)dataNullOffset) { 1261 isNull = false; 1262 } 1263 } while (++j < jLimit); 1264 if (isNull) { 1265 flags[i] = I3_NULL; 1266 if (i3FirstNull < 0) { 1267 if (oredI3 <= 0xffff) { 1268 index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH; 1269 } else { 1270 index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; 1271 hasLongI3Blocks = true; 1272 } 1273 i3FirstNull = 0; 1274 } 1275 } else { 1276 if (oredI3 <= 0xffff) { 1277 int32_t n = mixedBlocks.findBlock(fastIndex, index, i); 1278 if (n >= 0) { 1279 flags[i] = I3_BMP; 1280 index[i] = n; 1281 } else { 1282 flags[i] = I3_16; 1283 index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH; 1284 } 1285 } else { 1286 flags[i] = I3_18; 1287 index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; 1288 hasLongI3Blocks = true; 1289 } 1290 } 1291 i = j; 1292 } 1293 1294 int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3; 1295 1296 // Length of the index-1 table, rounded up. 1297 int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2; 1298 1299 // Index table: Fast index, index-1, index-3, index-2. 1300 // +1 for possible index table padding. 1301 int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1; 1302 index16 = (uint16_t *)uprv_malloc(index16Capacity * 2); 1303 if (index16 == nullptr) { 1304 errorCode = U_MEMORY_ALLOCATION_ERROR; 1305 return 0; 1306 } 1307 uprv_memcpy(index16, fastIndex, fastIndexLength * 2); 1308 1309 if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) { 1310 errorCode = U_MEMORY_ALLOCATION_ERROR; 1311 return 0; 1312 } 1313 MixedBlocks longI3Blocks; 1314 if (hasLongI3Blocks) { 1315 if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) { 1316 errorCode = U_MEMORY_ALLOCATION_ERROR; 1317 return 0; 1318 } 1319 } 1320 1321 // Compact the index-3 table and write an uncompacted version of the index-2 table. 1322 uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2]; // index2Capacity 1323 int32_t i2Length = 0; 1324 i3FirstNull = index3NullOffset; 1325 int32_t index3Start = fastIndexLength + index1Length; 1326 int32_t indexLength = index3Start; 1327 for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) { 1328 int32_t i3; 1329 uint8_t f = flags[i]; 1330 if (f == I3_NULL && i3FirstNull < 0) { 1331 // First index-3 null block. Write & overlap it like a normal block, then remember it. 1332 f = dataNullOffset <= 0xffff ? I3_16 : I3_18; 1333 i3FirstNull = 0; 1334 } 1335 if (f == I3_NULL) { 1336 i3 = index3NullOffset; 1337 } else if (f == I3_BMP) { 1338 i3 = index[i]; 1339 } else if (f == I3_16) { 1340 int32_t n = mixedBlocks.findBlock(index16, index, i); 1341 if (n >= 0) { 1342 i3 = n; 1343 } else { 1344 if (indexLength == index3Start) { 1345 // No overlap at the boundary between the index-1 and index-3 tables. 1346 n = 0; 1347 } else { 1348 n = getOverlap(index16, indexLength, 1349 index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH); 1350 } 1351 i3 = indexLength - n; 1352 int32_t prevIndexLength = indexLength; 1353 while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) { 1354 index16[indexLength++] = index[i + n++]; 1355 } 1356 mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength); 1357 if (hasLongI3Blocks) { 1358 longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength); 1359 } 1360 } 1361 } else { 1362 U_ASSERT(f == I3_18); 1363 U_ASSERT(hasLongI3Blocks); 1364 // Encode an index-3 block that contains one or more data indexes exceeding 16 bits. 1365 int32_t j = i; 1366 int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH; 1367 int32_t k = indexLength; 1368 do { 1369 ++k; 1370 uint32_t v = index[j++]; 1371 uint32_t upperBits = (v & 0x30000) >> 2; 1372 index16[k++] = v; 1373 v = index[j++]; 1374 upperBits |= (v & 0x30000) >> 4; 1375 index16[k++] = v; 1376 v = index[j++]; 1377 upperBits |= (v & 0x30000) >> 6; 1378 index16[k++] = v; 1379 v = index[j++]; 1380 upperBits |= (v & 0x30000) >> 8; 1381 index16[k++] = v; 1382 v = index[j++]; 1383 upperBits |= (v & 0x30000) >> 10; 1384 index16[k++] = v; 1385 v = index[j++]; 1386 upperBits |= (v & 0x30000) >> 12; 1387 index16[k++] = v; 1388 v = index[j++]; 1389 upperBits |= (v & 0x30000) >> 14; 1390 index16[k++] = v; 1391 v = index[j++]; 1392 upperBits |= (v & 0x30000) >> 16; 1393 index16[k++] = v; 1394 index16[k - 9] = upperBits; 1395 } while (j < jLimit); 1396 int32_t n = longI3Blocks.findBlock(index16, index16, indexLength); 1397 if (n >= 0) { 1398 i3 = n | 0x8000; 1399 } else { 1400 if (indexLength == index3Start) { 1401 // No overlap at the boundary between the index-1 and index-3 tables. 1402 n = 0; 1403 } else { 1404 n = getOverlap(index16, indexLength, 1405 index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH); 1406 } 1407 i3 = (indexLength - n) | 0x8000; 1408 int32_t prevIndexLength = indexLength; 1409 if (n > 0) { 1410 int32_t start = indexLength; 1411 while (n < INDEX_3_18BIT_BLOCK_LENGTH) { 1412 index16[indexLength++] = index16[start + n++]; 1413 } 1414 } else { 1415 indexLength += INDEX_3_18BIT_BLOCK_LENGTH; 1416 } 1417 mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength); 1418 if (hasLongI3Blocks) { 1419 longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength); 1420 } 1421 } 1422 } 1423 if (index3NullOffset < 0 && i3FirstNull >= 0) { 1424 index3NullOffset = i3; 1425 } 1426 // Set the index-2 table entry. 1427 index2[i2Length++] = i3; 1428 } 1429 U_ASSERT(i2Length == index2Capacity); 1430 U_ASSERT(indexLength <= index3Start + index3Capacity); 1431 1432 if (index3NullOffset < 0) { 1433 index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET; 1434 } 1435 if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) { 1436 // The index-3 offsets exceed 15 bits, or 1437 // the last one cannot be distinguished from the no-null-block value. 1438 errorCode = U_INDEX_OUTOFBOUNDS_ERROR; 1439 return 0; 1440 } 1441 1442 // Compact the index-2 table and write the index-1 table. 1443 static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH, 1444 "must re-init mixedBlocks"); 1445 int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH; 1446 int32_t i1 = fastIndexLength; 1447 for (int32_t i = 0; i < i2Length; i += blockLength) { 1448 int32_t n; 1449 if ((i2Length - i) >= blockLength) { 1450 // normal block 1451 U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH); 1452 n = mixedBlocks.findBlock(index16, index2, i); 1453 } else { 1454 // highStart is inside the last index-2 block. Shorten it. 1455 blockLength = i2Length - i; 1456 n = findSameBlock(index16, index3Start, indexLength, 1457 index2, i, blockLength); 1458 } 1459 int32_t i2; 1460 if (n >= 0) { 1461 i2 = n; 1462 } else { 1463 if (indexLength == index3Start) { 1464 // No overlap at the boundary between the index-1 and index-3/2 tables. 1465 n = 0; 1466 } else { 1467 n = getOverlap(index16, indexLength, index2, i, blockLength); 1468 } 1469 i2 = indexLength - n; 1470 int32_t prevIndexLength = indexLength; 1471 while (n < blockLength) { 1472 index16[indexLength++] = index2[i + n++]; 1473 } 1474 mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength); 1475 } 1476 // Set the index-1 table entry. 1477 index16[i1++] = i2; 1478 } 1479 U_ASSERT(i1 == index3Start); 1480 U_ASSERT(indexLength <= index16Capacity); 1481 1482#ifdef UCPTRIE_DEBUG 1483 /* we saved some space */ 1484 printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n", 1485 (long)iLimit, (long)indexLength); 1486#endif 1487 1488 return indexLength; 1489} 1490 1491int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) { 1492 // Find the real highStart and round it up. 1493 U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0); 1494 highValue = get(MAX_UNICODE); 1495 int32_t realHighStart = findHighStart(); 1496 realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) & 1497 ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); 1498 if (realHighStart == UNICODE_LIMIT) { 1499 highValue = initialValue; 1500 } 1501 1502#ifdef UCPTRIE_DEBUG 1503 printf("UCPTrie: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n", 1504 (long)realHighStart, (long)highValue, (long)initialValue); 1505#endif 1506 1507 // We always store indexes and data values for the fast range. 1508 // Pin highStart to the top of that range while building. 1509 UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3; 1510 if (realHighStart < fastLimit) { 1511 for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) { 1512 flags[i] = ALL_SAME; 1513 index[i] = highValue; 1514 } 1515 highStart = fastLimit; 1516 } else { 1517 highStart = realHighStart; 1518 } 1519 1520 uint32_t asciiData[ASCII_LIMIT]; 1521 for (int32_t i = 0; i < ASCII_LIMIT; ++i) { 1522 asciiData[i] = get(i); 1523 } 1524 1525 // First we look for which data blocks have the same value repeated over the whole block, 1526 // deduplicate such blocks, find a good null data block (for faster enumeration), 1527 // and get an upper bound for the necessary data array length. 1528 AllSameBlocks allSameBlocks; 1529 int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks); 1530 if (newDataCapacity < 0) { 1531 errorCode = U_MEMORY_ALLOCATION_ERROR; 1532 return 0; 1533 } 1534 uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4); 1535 if (newData == nullptr) { 1536 errorCode = U_MEMORY_ALLOCATION_ERROR; 1537 return 0; 1538 } 1539 uprv_memcpy(newData, asciiData, sizeof(asciiData)); 1540 1541 int32_t dataNullIndex = allSameBlocks.findMostUsed(); 1542 1543 MixedBlocks mixedBlocks; 1544 int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity, 1545 dataNullIndex, mixedBlocks, errorCode); 1546 if (U_FAILURE(errorCode)) { return 0; } 1547 U_ASSERT(newDataLength <= newDataCapacity); 1548 uprv_free(data); 1549 data = newData; 1550 dataCapacity = newDataCapacity; 1551 dataLength = newDataLength; 1552 if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) { 1553 // The offset of the last data block is too high to be stored in the index table. 1554 errorCode = U_INDEX_OUTOFBOUNDS_ERROR; 1555 return 0; 1556 } 1557 1558 if (dataNullIndex >= 0) { 1559 dataNullOffset = index[dataNullIndex]; 1560#ifdef UCPTRIE_DEBUG 1561 if (data[dataNullOffset] != initialValue) { 1562 printf("UCPTrie initialValue %lx -> more common nullValue %lx\n", 1563 (long)initialValue, (long)data[dataNullOffset]); 1564 } 1565#endif 1566 initialValue = data[dataNullOffset]; 1567 } else { 1568 dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET; 1569 } 1570 1571 int32_t indexLength = compactIndex(fastILimit, mixedBlocks, errorCode); 1572 highStart = realHighStart; 1573 return indexLength; 1574} 1575 1576UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) { 1577 if (U_FAILURE(errorCode)) { 1578 return nullptr; 1579 } 1580 if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type || 1581 valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) { 1582 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 1583 return nullptr; 1584 } 1585 1586 // The mutable trie always stores 32-bit values. 1587 // When we build a UCPTrie for a smaller value width, we first mask off unused bits 1588 // before compacting the data. 1589 switch (valueWidth) { 1590 case UCPTRIE_VALUE_BITS_32: 1591 break; 1592 case UCPTRIE_VALUE_BITS_16: 1593 maskValues(0xffff); 1594 break; 1595 case UCPTRIE_VALUE_BITS_8: 1596 maskValues(0xff); 1597 break; 1598 default: 1599 break; 1600 } 1601 1602 UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT; 1603 int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode); 1604 if (U_FAILURE(errorCode)) { 1605 clear(); 1606 return nullptr; 1607 } 1608 1609 // Ensure data table alignment: The index length must be even for uint32_t data. 1610 if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) { 1611 index16[indexLength++] = 0xffee; // arbitrary value 1612 } 1613 1614 // Make the total trie structure length a multiple of 4 bytes by padding the data table, 1615 // and store special values as the last two data values. 1616 int32_t length = indexLength * 2; 1617 if (valueWidth == UCPTRIE_VALUE_BITS_16) { 1618 if (((indexLength ^ dataLength) & 1) != 0) { 1619 // padding 1620 data[dataLength++] = errorValue; 1621 } 1622 if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { 1623 data[dataLength++] = highValue; 1624 data[dataLength++] = errorValue; 1625 } 1626 length += dataLength * 2; 1627 } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { 1628 // 32-bit data words never need padding to a multiple of 4 bytes. 1629 if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { 1630 if (data[dataLength - 1] != highValue) { 1631 data[dataLength++] = highValue; 1632 } 1633 data[dataLength++] = errorValue; 1634 } 1635 length += dataLength * 4; 1636 } else { 1637 int32_t and3 = (length + dataLength) & 3; 1638 if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) { 1639 // all set 1640 } else if(and3 == 3 && data[dataLength - 1] == highValue) { 1641 data[dataLength++] = errorValue; 1642 } else { 1643 while (and3 != 2) { 1644 data[dataLength++] = highValue; 1645 and3 = (and3 + 1) & 3; 1646 } 1647 data[dataLength++] = highValue; 1648 data[dataLength++] = errorValue; 1649 } 1650 length += dataLength; 1651 } 1652 1653 // Calculate the total length of the UCPTrie as a single memory block. 1654 length += sizeof(UCPTrie); 1655 U_ASSERT((length & 3) == 0); 1656 1657 uint8_t *bytes = (uint8_t *)uprv_malloc(length); 1658 if (bytes == nullptr) { 1659 errorCode = U_MEMORY_ALLOCATION_ERROR; 1660 clear(); 1661 return nullptr; 1662 } 1663 UCPTrie *trie = reinterpret_cast<UCPTrie *>(bytes); 1664 uprv_memset(trie, 0, sizeof(UCPTrie)); 1665 trie->indexLength = indexLength; 1666 trie->dataLength = dataLength; 1667 1668 trie->highStart = highStart; 1669 // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes. 1670 // Runtime code needs to then test for the real highStart as well. 1671 trie->shifted12HighStart = (highStart + 0xfff) >> 12; 1672 trie->type = type; 1673 trie->valueWidth = valueWidth; 1674 1675 trie->index3NullOffset = index3NullOffset; 1676 trie->dataNullOffset = dataNullOffset; 1677 trie->nullValue = initialValue; 1678 1679 bytes += sizeof(UCPTrie); 1680 1681 // Fill the index and data arrays. 1682 uint16_t *dest16 = (uint16_t *)bytes; 1683 trie->index = dest16; 1684 1685 if (highStart <= fastLimit) { 1686 // Condense only the fast index from the mutable-trie index. 1687 for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) { 1688 *dest16++ = (uint16_t)index[i]; // dest16[j] 1689 } 1690 } else { 1691 uprv_memcpy(dest16, index16, indexLength * 2); 1692 dest16 += indexLength; 1693 } 1694 bytes += indexLength * 2; 1695 1696 // Write the data array. 1697 const uint32_t *p = data; 1698 switch (valueWidth) { 1699 case UCPTRIE_VALUE_BITS_16: 1700 // Write 16-bit data values. 1701 trie->data.ptr16 = dest16; 1702 for (int32_t i = dataLength; i > 0; --i) { 1703 *dest16++ = (uint16_t)*p++; 1704 } 1705 break; 1706 case UCPTRIE_VALUE_BITS_32: 1707 // Write 32-bit data values. 1708 trie->data.ptr32 = (uint32_t *)bytes; 1709 uprv_memcpy(bytes, p, (size_t)dataLength * 4); 1710 break; 1711 case UCPTRIE_VALUE_BITS_8: 1712 // Write 8-bit data values. 1713 trie->data.ptr8 = bytes; 1714 for (int32_t i = dataLength; i > 0; --i) { 1715 *bytes++ = (uint8_t)*p++; 1716 } 1717 break; 1718 default: 1719 // Will not occur, valueWidth checked at the beginning. 1720 break; 1721 } 1722 1723#ifdef UCPTRIE_DEBUG 1724 trie->name = name; 1725 1726 ucptrie_printLengths(trie, ""); 1727#endif 1728 1729 clear(); 1730 return trie; 1731} 1732 1733} // namespace 1734 1735U_NAMESPACE_END 1736 1737U_NAMESPACE_USE 1738 1739U_CAPI UMutableCPTrie * U_EXPORT2 1740umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { 1741 if (U_FAILURE(*pErrorCode)) { 1742 return nullptr; 1743 } 1744 LocalPointer<MutableCodePointTrie> trie( 1745 new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode); 1746 if (U_FAILURE(*pErrorCode)) { 1747 return nullptr; 1748 } 1749 return reinterpret_cast<UMutableCPTrie *>(trie.orphan()); 1750} 1751 1752U_CAPI UMutableCPTrie * U_EXPORT2 1753umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) { 1754 if (U_FAILURE(*pErrorCode)) { 1755 return nullptr; 1756 } 1757 if (other == nullptr) { 1758 return nullptr; 1759 } 1760 LocalPointer<MutableCodePointTrie> clone( 1761 new MutableCodePointTrie(*reinterpret_cast<const MutableCodePointTrie *>(other), *pErrorCode), *pErrorCode); 1762 if (U_FAILURE(*pErrorCode)) { 1763 return nullptr; 1764 } 1765 return reinterpret_cast<UMutableCPTrie *>(clone.orphan()); 1766} 1767 1768U_CAPI void U_EXPORT2 1769umutablecptrie_close(UMutableCPTrie *trie) { 1770 delete reinterpret_cast<MutableCodePointTrie *>(trie); 1771} 1772 1773U_CAPI UMutableCPTrie * U_EXPORT2 1774umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) { 1775 if (U_FAILURE(*pErrorCode)) { 1776 return nullptr; 1777 } 1778 if (map == nullptr) { 1779 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 1780 return nullptr; 1781 } 1782 return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPMap(map, *pErrorCode)); 1783} 1784 1785U_CAPI UMutableCPTrie * U_EXPORT2 1786umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) { 1787 if (U_FAILURE(*pErrorCode)) { 1788 return nullptr; 1789 } 1790 if (trie == nullptr) { 1791 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 1792 return nullptr; 1793 } 1794 return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode)); 1795} 1796 1797U_CAPI uint32_t U_EXPORT2 1798umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) { 1799 return reinterpret_cast<const MutableCodePointTrie *>(trie)->get(c); 1800} 1801 1802namespace { 1803 1804UChar32 getRange(const void *trie, UChar32 start, 1805 UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { 1806 return reinterpret_cast<const MutableCodePointTrie *>(trie)-> 1807 getRange(start, filter, context, pValue); 1808} 1809 1810} // namespace 1811 1812U_CAPI UChar32 U_EXPORT2 1813umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start, 1814 UCPMapRangeOption option, uint32_t surrogateValue, 1815 UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { 1816 return ucptrie_internalGetRange(getRange, trie, start, 1817 option, surrogateValue, 1818 filter, context, pValue); 1819} 1820 1821U_CAPI void U_EXPORT2 1822umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { 1823 if (U_FAILURE(*pErrorCode)) { 1824 return; 1825 } 1826 reinterpret_cast<MutableCodePointTrie *>(trie)->set(c, value, *pErrorCode); 1827} 1828 1829U_CAPI void U_EXPORT2 1830umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end, 1831 uint32_t value, UErrorCode *pErrorCode) { 1832 if (U_FAILURE(*pErrorCode)) { 1833 return; 1834 } 1835 reinterpret_cast<MutableCodePointTrie *>(trie)->setRange(start, end, value, *pErrorCode); 1836} 1837 1838/* Compact and internally serialize the trie. */ 1839U_CAPI UCPTrie * U_EXPORT2 1840umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth, 1841 UErrorCode *pErrorCode) { 1842 if (U_FAILURE(*pErrorCode)) { 1843 return nullptr; 1844 } 1845 return reinterpret_cast<MutableCodePointTrie *>(trie)->build(type, valueWidth, *pErrorCode); 1846} 1847 1848#ifdef UCPTRIE_DEBUG 1849U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) { 1850 reinterpret_cast<MutableCodePointTrie *>(trie)->name = name; 1851} 1852#endif 1853