1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4// umutablecptrie.cpp (inspired by utrie2_builder.cpp)
5// created: 2017dec29 Markus W. Scherer
6
7// #define UCPTRIE_DEBUG
8#ifdef UCPTRIE_DEBUG
9#   include <stdio.h>
10#endif
11
12#include "unicode/utypes.h"
13#include "unicode/ucptrie.h"
14#include "unicode/umutablecptrie.h"
15#include "unicode/uobject.h"
16#include "unicode/utf16.h"
17#include "cmemory.h"
18#include "uassert.h"
19#include "ucptrie_impl.h"
20
21// ICU-20235 In case Microsoft math.h has defined this, undefine it.
22#ifdef OVERFLOW
23#undef OVERFLOW
24#endif
25
26U_NAMESPACE_BEGIN
27
28namespace {
29
30constexpr int32_t MAX_UNICODE = 0x10ffff;
31
32constexpr int32_t UNICODE_LIMIT = 0x110000;
33constexpr int32_t BMP_LIMIT = 0x10000;
34constexpr int32_t ASCII_LIMIT = 0x80;
35
36constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3;
37constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3;
38constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3;
39
40constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3));
41
42// Flag values for data blocks.
43constexpr uint8_t ALL_SAME = 0;
44constexpr uint8_t MIXED = 1;
45constexpr uint8_t SAME_AS = 2;
46
47/** Start with allocation of 16k data entries. */
48constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14);
49
50/** Grow about 8x each time. */
51constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17);
52
53/**
54 * Maximum length of the build-time data array.
55 * One entry per 0x110000 code points.
56 */
57constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT;
58
59// Flag values for index-3 blocks while compacting/building.
60constexpr uint8_t I3_NULL = 0;
61constexpr uint8_t I3_BMP = 1;
62constexpr uint8_t I3_16 = 2;
63constexpr uint8_t I3_18 = 3;
64
65constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8;
66
67class AllSameBlocks;
68class MixedBlocks;
69
70class MutableCodePointTrie : public UMemory {
71public:
72    MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode);
73    MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode);
74    MutableCodePointTrie(const MutableCodePointTrie &other) = delete;
75    ~MutableCodePointTrie();
76
77    MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete;
78
79    static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode);
80    static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode);
81
82    uint32_t get(UChar32 c) const;
83    int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context,
84                     uint32_t *pValue) const;
85
86    void set(UChar32 c, uint32_t value, UErrorCode &errorCode);
87    void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode);
88
89    UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode);
90
91private:
92    void clear();
93
94    bool ensureHighStart(UChar32 c);
95    int32_t allocDataBlock(int32_t blockLength);
96    int32_t getDataBlock(int32_t i);
97
98    void maskValues(uint32_t mask);
99    UChar32 findHighStart() const;
100    int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks);
101    int32_t compactData(
102            int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
103            int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
104    int32_t compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
105    int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode);
106
107    uint32_t *index = nullptr;
108    int32_t indexCapacity = 0;
109    int32_t index3NullOffset = -1;
110    uint32_t *data = nullptr;
111    int32_t dataCapacity = 0;
112    int32_t dataLength = 0;
113    int32_t dataNullOffset = -1;
114
115    uint32_t origInitialValue;
116    uint32_t initialValue;
117    uint32_t errorValue;
118    UChar32 highStart;
119    uint32_t highValue;
120#ifdef UCPTRIE_DEBUG
121public:
122    const char *name;
123#endif
124private:
125    /** Temporary array while building the final data. */
126    uint16_t *index16 = nullptr;
127    uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3];
128};
129
130MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) :
131        origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue),
132        highStart(0), highValue(initialValue)
133#ifdef UCPTRIE_DEBUG
134        , name("open")
135#endif
136        {
137    if (U_FAILURE(errorCode)) { return; }
138    index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4);
139    data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4);
140    if (index == nullptr || data == nullptr) {
141        errorCode = U_MEMORY_ALLOCATION_ERROR;
142        return;
143    }
144    indexCapacity = BMP_I_LIMIT;
145    dataCapacity = INITIAL_DATA_LENGTH;
146}
147
148MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) :
149        index3NullOffset(other.index3NullOffset),
150        dataNullOffset(other.dataNullOffset),
151        origInitialValue(other.origInitialValue), initialValue(other.initialValue),
152        errorValue(other.errorValue),
153        highStart(other.highStart), highValue(other.highValue)
154#ifdef UCPTRIE_DEBUG
155        , name("mutable clone")
156#endif
157        {
158    if (U_FAILURE(errorCode)) { return; }
159    int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT;
160    index = (uint32_t *)uprv_malloc(iCapacity * 4);
161    data = (uint32_t *)uprv_malloc(other.dataCapacity * 4);
162    if (index == nullptr || data == nullptr) {
163        errorCode = U_MEMORY_ALLOCATION_ERROR;
164        return;
165    }
166    indexCapacity = iCapacity;
167    dataCapacity = other.dataCapacity;
168
169    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
170    uprv_memcpy(flags, other.flags, iLimit);
171    uprv_memcpy(index, other.index, iLimit * 4);
172    uprv_memcpy(data, other.data, (size_t)other.dataLength * 4);
173    dataLength = other.dataLength;
174    U_ASSERT(other.index16 == nullptr);
175}
176
177MutableCodePointTrie::~MutableCodePointTrie() {
178    uprv_free(index);
179    uprv_free(data);
180    uprv_free(index16);
181}
182
183MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) {
184    // Use the highValue as the initialValue to reduce the highStart.
185    uint32_t errorValue = ucpmap_get(map, -1);
186    uint32_t initialValue = ucpmap_get(map, 0x10ffff);
187    LocalPointer<MutableCodePointTrie> mutableTrie(
188        new MutableCodePointTrie(initialValue, errorValue, errorCode),
189        errorCode);
190    if (U_FAILURE(errorCode)) {
191        return nullptr;
192    }
193    UChar32 start = 0, end;
194    uint32_t value;
195    while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
196                                  nullptr, nullptr, &value)) >= 0) {
197        if (value != initialValue) {
198            if (start == end) {
199                mutableTrie->set(start, value, errorCode);
200            } else {
201                mutableTrie->setRange(start, end, value, errorCode);
202            }
203        }
204        start = end + 1;
205    }
206    if (U_SUCCESS(errorCode)) {
207        return mutableTrie.orphan();
208    } else {
209        return nullptr;
210    }
211}
212
213MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) {
214    // Use the highValue as the initialValue to reduce the highStart.
215    uint32_t errorValue;
216    uint32_t initialValue;
217    switch (trie->valueWidth) {
218    case UCPTRIE_VALUE_BITS_16:
219        errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
220        initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
221        break;
222    case UCPTRIE_VALUE_BITS_32:
223        errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
224        initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
225        break;
226    case UCPTRIE_VALUE_BITS_8:
227        errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
228        initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
229        break;
230    default:
231        // Unreachable if the trie is properly initialized.
232        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
233        return nullptr;
234    }
235    LocalPointer<MutableCodePointTrie> mutableTrie(
236        new MutableCodePointTrie(initialValue, errorValue, errorCode),
237        errorCode);
238    if (U_FAILURE(errorCode)) {
239        return nullptr;
240    }
241    UChar32 start = 0, end;
242    uint32_t value;
243    while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
244                                   nullptr, nullptr, &value)) >= 0) {
245        if (value != initialValue) {
246            if (start == end) {
247                mutableTrie->set(start, value, errorCode);
248            } else {
249                mutableTrie->setRange(start, end, value, errorCode);
250            }
251        }
252        start = end + 1;
253    }
254    if (U_SUCCESS(errorCode)) {
255        return mutableTrie.orphan();
256    } else {
257        return nullptr;
258    }
259}
260
261void MutableCodePointTrie::clear() {
262    index3NullOffset = dataNullOffset = -1;
263    dataLength = 0;
264    highValue = initialValue = origInitialValue;
265    highStart = 0;
266    uprv_free(index16);
267    index16 = nullptr;
268}
269
270uint32_t MutableCodePointTrie::get(UChar32 c) const {
271    if ((uint32_t)c > MAX_UNICODE) {
272        return errorValue;
273    }
274    if (c >= highStart) {
275        return highValue;
276    }
277    int32_t i = c >> UCPTRIE_SHIFT_3;
278    if (flags[i] == ALL_SAME) {
279        return index[i];
280    } else {
281        return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)];
282    }
283}
284
285inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue,
286                                 UCPMapValueFilter *filter, const void *context) {
287    if (value == initialValue) {
288        value = nullValue;
289    } else if (filter != nullptr) {
290        value = filter(context, value);
291    }
292    return value;
293}
294
295UChar32 MutableCodePointTrie::getRange(
296        UChar32 start, UCPMapValueFilter *filter, const void *context,
297        uint32_t *pValue) const {
298    if ((uint32_t)start > MAX_UNICODE) {
299        return U_SENTINEL;
300    }
301    if (start >= highStart) {
302        if (pValue != nullptr) {
303            uint32_t value = highValue;
304            if (filter != nullptr) { value = filter(context, value); }
305            *pValue = value;
306        }
307        return MAX_UNICODE;
308    }
309    uint32_t nullValue = initialValue;
310    if (filter != nullptr) { nullValue = filter(context, nullValue); }
311    UChar32 c = start;
312    uint32_t trieValue, value;
313    bool haveValue = false;
314    int32_t i = c >> UCPTRIE_SHIFT_3;
315    do {
316        if (flags[i] == ALL_SAME) {
317            uint32_t trieValue2 = index[i];
318            if (haveValue) {
319                if (trieValue2 != trieValue) {
320                    if (filter == nullptr ||
321                            maybeFilterValue(trieValue2, initialValue, nullValue,
322                                             filter, context) != value) {
323                        return c - 1;
324                    }
325                    trieValue = trieValue2;  // may or may not help
326                }
327            } else {
328                trieValue = trieValue2;
329                value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
330                if (pValue != nullptr) { *pValue = value; }
331                haveValue = true;
332            }
333            c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK;
334        } else /* MIXED */ {
335            int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK);
336            uint32_t trieValue2 = data[di];
337            if (haveValue) {
338                if (trieValue2 != trieValue) {
339                    if (filter == nullptr ||
340                            maybeFilterValue(trieValue2, initialValue, nullValue,
341                                             filter, context) != value) {
342                        return c - 1;
343                    }
344                    trieValue = trieValue2;  // may or may not help
345                }
346            } else {
347                trieValue = trieValue2;
348                value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
349                if (pValue != nullptr) { *pValue = value; }
350                haveValue = true;
351            }
352            while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) {
353                trieValue2 = data[++di];
354                if (trieValue2 != trieValue) {
355                    if (filter == nullptr ||
356                            maybeFilterValue(trieValue2, initialValue, nullValue,
357                                             filter, context) != value) {
358                        return c - 1;
359                    }
360                }
361                trieValue = trieValue2;  // may or may not help
362            }
363        }
364        ++i;
365    } while (c < highStart);
366    U_ASSERT(haveValue);
367    if (maybeFilterValue(highValue, initialValue, nullValue,
368                         filter, context) != value) {
369        return c - 1;
370    } else {
371        return MAX_UNICODE;
372    }
373}
374
375void
376writeBlock(uint32_t *block, uint32_t value) {
377    uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
378    while (block < limit) {
379        *block++ = value;
380    }
381}
382
383bool MutableCodePointTrie::ensureHighStart(UChar32 c) {
384    if (c >= highStart) {
385        // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction.
386        c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
387        int32_t i = highStart >> UCPTRIE_SHIFT_3;
388        int32_t iLimit = c >> UCPTRIE_SHIFT_3;
389        if (iLimit > indexCapacity) {
390            uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4);
391            if (newIndex == nullptr) { return false; }
392            uprv_memcpy(newIndex, index, i * 4);
393            uprv_free(index);
394            index = newIndex;
395            indexCapacity = I_LIMIT;
396        }
397        do {
398            flags[i] = ALL_SAME;
399            index[i] = initialValue;
400        } while(++i < iLimit);
401        highStart = c;
402    }
403    return true;
404}
405
406int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) {
407    int32_t newBlock = dataLength;
408    int32_t newTop = newBlock + blockLength;
409    if (newTop > dataCapacity) {
410        int32_t capacity;
411        if (dataCapacity < MEDIUM_DATA_LENGTH) {
412            capacity = MEDIUM_DATA_LENGTH;
413        } else if (dataCapacity < MAX_DATA_LENGTH) {
414            capacity = MAX_DATA_LENGTH;
415        } else {
416            // Should never occur.
417            // Either MAX_DATA_LENGTH is incorrect,
418            // or the code writes more values than should be possible.
419            return -1;
420        }
421        uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4);
422        if (newData == nullptr) {
423            return -1;
424        }
425        uprv_memcpy(newData, data, (size_t)dataLength * 4);
426        uprv_free(data);
427        data = newData;
428        dataCapacity = capacity;
429    }
430    dataLength = newTop;
431    return newBlock;
432}
433
434/**
435 * No error checking for illegal arguments.
436 *
437 * @return -1 if no new data block available (out of memory in data array)
438 * @internal
439 */
440int32_t MutableCodePointTrie::getDataBlock(int32_t i) {
441    if (flags[i] == MIXED) {
442        return index[i];
443    }
444    if (i < BMP_I_LIMIT) {
445        int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH);
446        if (newBlock < 0) { return newBlock; }
447        int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1);
448        int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
449        do {
450            U_ASSERT(flags[iStart] == ALL_SAME);
451            writeBlock(data + newBlock, index[iStart]);
452            flags[iStart] = MIXED;
453            index[iStart++] = newBlock;
454            newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
455        } while (iStart < iLimit);
456        return index[i];
457    } else {
458        int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH);
459        if (newBlock < 0) { return newBlock; }
460        writeBlock(data + newBlock, index[i]);
461        flags[i] = MIXED;
462        index[i] = newBlock;
463        return newBlock;
464    }
465}
466
467void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) {
468    if (U_FAILURE(errorCode)) {
469        return;
470    }
471    if ((uint32_t)c > MAX_UNICODE) {
472        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
473        return;
474    }
475
476    int32_t block;
477    if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) {
478        errorCode = U_MEMORY_ALLOCATION_ERROR;
479        return;
480    }
481
482    data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value;
483}
484
485void
486fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) {
487    uint32_t *pLimit = block + limit;
488    block += start;
489    while (block < pLimit) {
490        *block++ = value;
491    }
492}
493
494void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) {
495    if (U_FAILURE(errorCode)) {
496        return;
497    }
498    if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) {
499        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
500        return;
501    }
502    if (!ensureHighStart(end)) {
503        errorCode = U_MEMORY_ALLOCATION_ERROR;
504        return;
505    }
506
507    UChar32 limit = end + 1;
508    if (start & UCPTRIE_SMALL_DATA_MASK) {
509        // Set partial block at [start..following block boundary[.
510        int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
511        if (block < 0) {
512            errorCode = U_MEMORY_ALLOCATION_ERROR;
513            return;
514        }
515
516        UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK;
517        if (nextStart <= limit) {
518            fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH,
519                      value);
520            start = nextStart;
521        } else {
522            fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK,
523                      value);
524            return;
525        }
526    }
527
528    // Number of positions in the last, partial block.
529    int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK;
530
531    // Round down limit to a block boundary.
532    limit &= ~UCPTRIE_SMALL_DATA_MASK;
533
534    // Iterate over all-value blocks.
535    while (start < limit) {
536        int32_t i = start >> UCPTRIE_SHIFT_3;
537        if (flags[i] == ALL_SAME) {
538            index[i] = value;
539        } else /* MIXED */ {
540            fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value);
541        }
542        start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
543    }
544
545    if (rest > 0) {
546        // Set partial block at [last block boundary..limit[.
547        int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
548        if (block < 0) {
549            errorCode = U_MEMORY_ALLOCATION_ERROR;
550            return;
551        }
552
553        fillBlock(data + block, 0, rest, value);
554    }
555}
556
557/* compaction --------------------------------------------------------------- */
558
559void MutableCodePointTrie::maskValues(uint32_t mask) {
560    initialValue &= mask;
561    errorValue &= mask;
562    highValue &= mask;
563    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
564    for (int32_t i = 0; i < iLimit; ++i) {
565        if (flags[i] == ALL_SAME) {
566            index[i] &= mask;
567        }
568    }
569    for (int32_t i = 0; i < dataLength; ++i) {
570        data[i] &= mask;
571    }
572}
573
574template<typename UIntA, typename UIntB>
575bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) {
576    while (length > 0 && *s == *t) {
577        ++s;
578        ++t;
579        --length;
580    }
581    return length == 0;
582}
583
584bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) {
585    const uint32_t *pLimit = p + length;
586    while (p < pLimit && *p == value) { ++p; }
587    return p == pLimit;
588}
589
590/** Search for an identical block. */
591int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
592                      const uint16_t *q, int32_t qStart, int32_t blockLength) {
593    // Ensure that we do not even partially get past length.
594    length -= blockLength;
595
596    q += qStart;
597    while (pStart <= length) {
598        if (equalBlocks(p + pStart, q, blockLength)) {
599            return pStart;
600        }
601        ++pStart;
602    }
603    return -1;
604}
605
606int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit,
607                         uint32_t value, int32_t blockLength) {
608    // Ensure that we do not even partially get past limit.
609    limit -= blockLength;
610
611    for (int32_t block = start; block <= limit; ++block) {
612        if (p[block] == value) {
613            for (int32_t i = 1;; ++i) {
614                if (i == blockLength) {
615                    return block;
616                }
617                if (p[block + i] != value) {
618                    block += i;
619                    break;
620                }
621            }
622        }
623    }
624    return -1;
625}
626
627/**
628 * Look for maximum overlap of the beginning of the other block
629 * with the previous, adjacent block.
630 */
631template<typename UIntA, typename UIntB>
632int32_t getOverlap(const UIntA *p, int32_t length,
633                   const UIntB *q, int32_t qStart, int32_t blockLength) {
634    int32_t overlap = blockLength - 1;
635    U_ASSERT(overlap <= length);
636    q += qStart;
637    while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
638        --overlap;
639    }
640    return overlap;
641}
642
643int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value,
644                          int32_t blockLength) {
645    int32_t min = length - (blockLength - 1);
646    int32_t i = length;
647    while (min < i && p[i - 1] == value) { --i; }
648    return length - i;
649}
650
651bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) {
652    for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
653        if (index[i] == dataOffset) {
654            return true;
655        }
656    }
657    return false;
658}
659
660/**
661 * Finds the start of the last range in the trie by enumerating backward.
662 * Indexes for code points higher than this will be omitted.
663 */
664UChar32 MutableCodePointTrie::findHighStart() const {
665    int32_t i = highStart >> UCPTRIE_SHIFT_3;
666    while (i > 0) {
667        bool match;
668        if (flags[--i] == ALL_SAME) {
669            match = index[i] == highValue;
670        } else /* MIXED */ {
671            const uint32_t *p = data + index[i];
672            for (int32_t j = 0;; ++j) {
673                if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) {
674                    match = true;
675                    break;
676                }
677                if (p[j] != highValue) {
678                    match = false;
679                    break;
680                }
681            }
682        }
683        if (!match) {
684            return (i + 1) << UCPTRIE_SHIFT_3;
685        }
686    }
687    return 0;
688}
689
690class AllSameBlocks {
691public:
692    static constexpr int32_t NEW_UNIQUE = -1;
693    static constexpr int32_t OVERFLOW = -2;
694
695    AllSameBlocks() : length(0), mostRecent(-1) {}
696
697    int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) {
698        if (mostRecent >= 0 && values[mostRecent] == value) {
699            refCounts[mostRecent] += count;
700            return indexes[mostRecent];
701        }
702        for (int32_t i = 0; i < length; ++i) {
703            if (values[i] == value) {
704                mostRecent = i;
705                refCounts[i] += count;
706                return indexes[i];
707            }
708        }
709        if (length == CAPACITY) {
710            return OVERFLOW;
711        }
712        mostRecent = length;
713        indexes[length] = index;
714        values[length] = value;
715        refCounts[length++] = count;
716        return NEW_UNIQUE;
717    }
718
719    /** Replaces the block which has the lowest reference count. */
720    void add(int32_t index, int32_t count, uint32_t value) {
721        U_ASSERT(length == CAPACITY);
722        int32_t least = -1;
723        int32_t leastCount = I_LIMIT;
724        for (int32_t i = 0; i < length; ++i) {
725            U_ASSERT(values[i] != value);
726            if (refCounts[i] < leastCount) {
727                least = i;
728                leastCount = refCounts[i];
729            }
730        }
731        U_ASSERT(least >= 0);
732        mostRecent = least;
733        indexes[least] = index;
734        values[least] = value;
735        refCounts[least] = count;
736    }
737
738    int32_t findMostUsed() const {
739        if (length == 0) { return -1; }
740        int32_t max = -1;
741        int32_t maxCount = 0;
742        for (int32_t i = 0; i < length; ++i) {
743            if (refCounts[i] > maxCount) {
744                max = i;
745                maxCount = refCounts[i];
746            }
747        }
748        return indexes[max];
749    }
750
751private:
752    static constexpr int32_t CAPACITY = 32;
753
754    int32_t length;
755    int32_t mostRecent;
756
757    int32_t indexes[CAPACITY];
758    uint32_t values[CAPACITY];
759    int32_t refCounts[CAPACITY];
760};
761
762// Custom hash table for mixed-value blocks to be found anywhere in the
763// compacted data or index so far.
764class MixedBlocks {
765public:
766    MixedBlocks() {}
767    ~MixedBlocks() {
768        uprv_free(table);
769    }
770
771    bool init(int32_t maxLength, int32_t newBlockLength) {
772        // We store actual data indexes + 1 to reserve 0 for empty entries.
773        int32_t maxDataIndex = maxLength - newBlockLength + 1;
774        int32_t newLength;
775        if (maxDataIndex <= 0xfff) {  // 4k
776            newLength = 6007;
777            shift = 12;
778            mask = 0xfff;
779        } else if (maxDataIndex <= 0x7fff) {  // 32k
780            newLength = 50021;
781            shift = 15;
782            mask = 0x7fff;
783        } else if (maxDataIndex <= 0x1ffff) {  // 128k
784            newLength = 200003;
785            shift = 17;
786            mask = 0x1ffff;
787        } else {
788            // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M
789            newLength = 1500007;
790            shift = 21;
791            mask = 0x1fffff;
792        }
793        if (newLength > capacity) {
794            uprv_free(table);
795            table = (uint32_t *)uprv_malloc(newLength * 4);
796            if (table == nullptr) {
797                return false;
798            }
799            capacity = newLength;
800        }
801        length = newLength;
802        uprv_memset(table, 0, length * 4);
803
804        blockLength = newBlockLength;
805        return true;
806    }
807
808    template<typename UInt>
809    void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) {
810        int32_t start = prevDataLength - blockLength;
811        if (start >= minStart) {
812            ++start;  // Skip the last block that we added last time.
813        } else {
814            start = minStart;  // Begin with the first full block.
815        }
816        for (int32_t end = newDataLength - blockLength; start <= end; ++start) {
817            uint32_t hashCode = makeHashCode(data, start);
818            addEntry(data, start, hashCode, start);
819        }
820    }
821
822    template<typename UIntA, typename UIntB>
823    int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const {
824        uint32_t hashCode = makeHashCode(blockData, blockStart);
825        int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode);
826        if (entryIndex >= 0) {
827            return (table[entryIndex] & mask) - 1;
828        } else {
829            return -1;
830        }
831    }
832
833    int32_t findAllSameBlock(const uint32_t *data, uint32_t blockValue) const {
834        uint32_t hashCode = makeHashCode(blockValue);
835        int32_t entryIndex = findEntry(data, blockValue, hashCode);
836        if (entryIndex >= 0) {
837            return (table[entryIndex] & mask) - 1;
838        } else {
839            return -1;
840        }
841    }
842
843private:
844    template<typename UInt>
845    uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const {
846        int32_t blockLimit = blockStart + blockLength;
847        uint32_t hashCode = blockData[blockStart++];
848        do {
849            hashCode = 37 * hashCode + blockData[blockStart++];
850        } while (blockStart < blockLimit);
851        return hashCode;
852    }
853
854    uint32_t makeHashCode(uint32_t blockValue) const {
855        uint32_t hashCode = blockValue;
856        for (int32_t i = 1; i < blockLength; ++i) {
857            hashCode = 37 * hashCode + blockValue;
858        }
859        return hashCode;
860    }
861
862    template<typename UInt>
863    void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) {
864        U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask);
865        int32_t entryIndex = findEntry(data, data, blockStart, hashCode);
866        if (entryIndex < 0) {
867            table[~entryIndex] = (hashCode << shift) | (dataIndex + 1);
868        }
869    }
870
871    template<typename UIntA, typename UIntB>
872    int32_t findEntry(const UIntA *data, const UIntB *blockData, int32_t blockStart,
873                      uint32_t hashCode) const {
874        uint32_t shiftedHashCode = hashCode << shift;
875        int32_t initialEntryIndex = (hashCode % (length - 1)) + 1;  // 1..length-1
876        for (int32_t entryIndex = initialEntryIndex;;) {
877            uint32_t entry = table[entryIndex];
878            if (entry == 0) {
879                return ~entryIndex;
880            }
881            if ((entry & ~mask) == shiftedHashCode) {
882                int32_t dataIndex = (entry & mask) - 1;
883                if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) {
884                    return entryIndex;
885                }
886            }
887            entryIndex = nextIndex(initialEntryIndex, entryIndex);
888        }
889    }
890
891    int32_t findEntry(const uint32_t *data, uint32_t blockValue, uint32_t hashCode) const {
892        uint32_t shiftedHashCode = hashCode << shift;
893        int32_t initialEntryIndex = (hashCode % (length - 1)) + 1;  // 1..length-1
894        for (int32_t entryIndex = initialEntryIndex;;) {
895            uint32_t entry = table[entryIndex];
896            if (entry == 0) {
897                return ~entryIndex;
898            }
899            if ((entry & ~mask) == shiftedHashCode) {
900                int32_t dataIndex = (entry & mask) - 1;
901                if (allValuesSameAs(data + dataIndex, blockLength, blockValue)) {
902                    return entryIndex;
903                }
904            }
905            entryIndex = nextIndex(initialEntryIndex, entryIndex);
906        }
907    }
908
909    inline int32_t nextIndex(int32_t initialEntryIndex, int32_t entryIndex) const {
910        // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length);
911        return (entryIndex + initialEntryIndex) % length;
912    }
913
914    // Hash table.
915    // The length is a prime number, larger than the maximum data length.
916    // The "shift" lower bits store a data index + 1.
917    // The remaining upper bits store a partial hashCode of the block data values.
918    uint32_t *table = nullptr;
919    int32_t capacity = 0;
920    int32_t length = 0;
921    int32_t shift = 0;
922    uint32_t mask = 0;
923
924    int32_t blockLength = 0;
925};
926
927int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) {
928#ifdef UCPTRIE_DEBUG
929    bool overflow = false;
930#endif
931
932    // ASCII data will be stored as a linear table, even if the following code
933    // does not yet count it that way.
934    int32_t newDataCapacity = ASCII_LIMIT;
935    // Add room for a small data null block in case it would match the start of
936    // a fast data block where dataNullOffset must not be set in that case.
937    newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
938    // Add room for special values (errorValue, highValue) and padding.
939    newDataCapacity += 4;
940    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
941    int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
942    int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
943    for (int32_t i = 0; i < iLimit; i += inc) {
944        if (i == fastILimit) {
945            blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
946            inc = 1;
947        }
948        uint32_t value = index[i];
949        if (flags[i] == MIXED) {
950            // Really mixed?
951            const uint32_t *p = data + value;
952            value = *p;
953            if (allValuesSameAs(p + 1, blockLength - 1, value)) {
954                flags[i] = ALL_SAME;
955                index[i] = value;
956                // Fall through to ALL_SAME handling.
957            } else {
958                newDataCapacity += blockLength;
959                continue;
960            }
961        } else {
962            U_ASSERT(flags[i] == ALL_SAME);
963            if (inc > 1) {
964                // Do all of the fast-range data block's ALL_SAME parts have the same value?
965                bool allSame = true;
966                int32_t next_i = i + inc;
967                for (int32_t j = i + 1; j < next_i; ++j) {
968                    U_ASSERT(flags[j] == ALL_SAME);
969                    if (index[j] != value) {
970                        allSame = false;
971                        break;
972                    }
973                }
974                if (!allSame) {
975                    // Turn it into a MIXED block.
976                    if (getDataBlock(i) < 0) {
977                        return -1;
978                    }
979                    newDataCapacity += blockLength;
980                    continue;
981                }
982            }
983        }
984        // Is there another ALL_SAME block with the same value?
985        int32_t other = allSameBlocks.findOrAdd(i, inc, value);
986        if (other == AllSameBlocks::OVERFLOW) {
987            // The fixed-size array overflowed. Slow check for a duplicate block.
988#ifdef UCPTRIE_DEBUG
989            if (!overflow) {
990                puts("UCPTrie AllSameBlocks overflow");
991                overflow = true;
992            }
993#endif
994            int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
995            for (int32_t j = 0;; j += jInc) {
996                if (j == i) {
997                    allSameBlocks.add(i, inc, value);
998                    break;
999                }
1000                if (j == fastILimit) {
1001                    jInc = 1;
1002                }
1003                if (flags[j] == ALL_SAME && index[j] == value) {
1004                    allSameBlocks.add(j, jInc + inc, value);
1005                    other = j;
1006                    break;
1007                    // We could keep counting blocks with the same value
1008                    // before we add the first one, which may improve compaction in rare cases,
1009                    // but it would make it slower.
1010                }
1011            }
1012        }
1013        if (other >= 0) {
1014            flags[i] = SAME_AS;
1015            index[i] = other;
1016        } else {
1017            // New unique same-value block.
1018            newDataCapacity += blockLength;
1019        }
1020    }
1021    return newDataCapacity;
1022}
1023
1024#ifdef UCPTRIE_DEBUG
1025#   define DEBUG_DO(expr) expr
1026#else
1027#   define DEBUG_DO(expr)
1028#endif
1029
1030#ifdef UCPTRIE_DEBUG
1031// Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF
1032int32_t appendValue(char s[], int32_t length, uint32_t value) {
1033    value ^= value >> 16;
1034    value ^= value >> 8;
1035    s[length] = 0xE2;
1036    s[length + 1] = (char)(0xA0 + ((value >> 6) & 3));
1037    s[length + 2] = (char)(0x80 + (value & 0x3F));
1038    return length + 3;
1039}
1040
1041void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value,
1042                UChar32 start, int32_t overlap, uint32_t initialValue) {
1043    char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3];
1044    int32_t length = 0;
1045    int32_t i;
1046    for (i = 0; i < overlap; ++i) {
1047        length = appendValue(s, length, 0);  // Braille blank
1048    }
1049    s[length++] = '|';
1050    for (; i < blockLength; ++i) {
1051        if (block != nullptr) {
1052            value = block[i];
1053        }
1054        if (value == initialValue) {
1055            value = 0x40;  // Braille lower left dot
1056        }
1057        length = appendValue(s, length, value);
1058    }
1059    s[length] = 0;
1060    start += overlap;
1061    if (start <= 0xffff) {
1062        printf("    %04lX  %s|\n", (long)start, s);
1063    } else if (start <= 0xfffff) {
1064        printf("   %5lX  %s|\n", (long)start, s);
1065    } else {
1066        printf("  %6lX  %s|\n", (long)start, s);
1067    }
1068}
1069#endif
1070
1071/**
1072 * Compacts a build-time trie.
1073 *
1074 * The compaction
1075 * - removes blocks that are identical with earlier ones
1076 * - overlaps each new non-duplicate block as much as possible with the previously-written one
1077 * - works with fast-range data blocks whose length is a multiple of that of
1078 *   higher-code-point data blocks
1079 *
1080 * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
1081 */
1082int32_t MutableCodePointTrie::compactData(
1083        int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
1084        int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) {
1085#ifdef UCPTRIE_DEBUG
1086    int32_t countSame=0, sumOverlaps=0;
1087    bool printData = dataLength == 29088 /* line.brk */ ||
1088        // dataLength == 30048 /* CanonIterData */ ||
1089        dataLength == 50400 /* zh.txt~stroke */;
1090#endif
1091
1092    // The linear ASCII data has been copied into newData already.
1093    int32_t newDataLength = 0;
1094    for (int32_t i = 0; newDataLength < ASCII_LIMIT;
1095            newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
1096        index[i] = newDataLength;
1097#ifdef UCPTRIE_DEBUG
1098        if (printData) {
1099            printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue);
1100        }
1101#endif
1102    }
1103
1104    int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
1105    if (!mixedBlocks.init(newDataCapacity, blockLength)) {
1106        errorCode = U_MEMORY_ALLOCATION_ERROR;
1107        return 0;
1108    }
1109    mixedBlocks.extend(newData, 0, 0, newDataLength);
1110
1111    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
1112    int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
1113    int32_t fastLength = 0;
1114    for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) {
1115        if (i == fastILimit) {
1116            blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
1117            inc = 1;
1118            fastLength = newDataLength;
1119            if (!mixedBlocks.init(newDataCapacity, blockLength)) {
1120                errorCode = U_MEMORY_ALLOCATION_ERROR;
1121                return 0;
1122            }
1123            mixedBlocks.extend(newData, 0, 0, newDataLength);
1124        }
1125        if (flags[i] == ALL_SAME) {
1126            uint32_t value = index[i];
1127            // Find an earlier part of the data array of length blockLength
1128            // that is filled with this value.
1129            int32_t n = mixedBlocks.findAllSameBlock(newData, value);
1130            // If we find a match, and the current block is the data null block,
1131            // and it is not a fast block but matches the start of a fast block,
1132            // then we need to continue looking.
1133            // This is because this small block is shorter than the fast block,
1134            // and not all of the rest of the fast block is filled with this value.
1135            // Otherwise trie.getRange() would detect that the fast block starts at
1136            // dataNullOffset and assume incorrectly that it is filled with the null value.
1137            while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength &&
1138                    isStartOfSomeFastBlock(n, index, fastILimit)) {
1139                n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength);
1140            }
1141            if (n >= 0) {
1142                DEBUG_DO(++countSame);
1143                index[i] = n;
1144            } else {
1145                n = getAllSameOverlap(newData, newDataLength, value, blockLength);
1146                DEBUG_DO(sumOverlaps += n);
1147#ifdef UCPTRIE_DEBUG
1148                if (printData) {
1149                    printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue);
1150                }
1151#endif
1152                index[i] = newDataLength - n;
1153                int32_t prevDataLength = newDataLength;
1154                while (n < blockLength) {
1155                    newData[newDataLength++] = value;
1156                    ++n;
1157                }
1158                mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
1159            }
1160        } else if (flags[i] == MIXED) {
1161            const uint32_t *block = data + index[i];
1162            int32_t n = mixedBlocks.findBlock(newData, block, 0);
1163            if (n >= 0) {
1164                DEBUG_DO(++countSame);
1165                index[i] = n;
1166            } else {
1167                n = getOverlap(newData, newDataLength, block, 0, blockLength);
1168                DEBUG_DO(sumOverlaps += n);
1169#ifdef UCPTRIE_DEBUG
1170                if (printData) {
1171                    printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue);
1172                }
1173#endif
1174                index[i] = newDataLength - n;
1175                int32_t prevDataLength = newDataLength;
1176                while (n < blockLength) {
1177                    newData[newDataLength++] = block[n++];
1178                }
1179                mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
1180            }
1181        } else /* SAME_AS */ {
1182            uint32_t j = index[i];
1183            index[i] = index[j];
1184        }
1185    }
1186
1187#ifdef UCPTRIE_DEBUG
1188    /* we saved some space */
1189    printf("compacting UCPTrie: count of 32-bit data words %lu->%lu  countSame=%ld  sumOverlaps=%ld\n",
1190            (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps);
1191#endif
1192    return newDataLength;
1193}
1194
1195int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks,
1196                                           UErrorCode &errorCode) {
1197    int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3);
1198    if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) {
1199        // Only the linear fast index, no multi-stage index tables.
1200        index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
1201        return fastIndexLength;
1202    }
1203
1204    // Condense the fast index table.
1205    // Also, does it contain an index-3 block with all dataNullOffset?
1206    uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH];  // fastIndexLength
1207    int32_t i3FirstNull = -1;
1208    for (int32_t i = 0, j = 0; i < fastILimit; ++j) {
1209        uint32_t i3 = index[i];
1210        fastIndex[j] = (uint16_t)i3;
1211        if (i3 == (uint32_t)dataNullOffset) {
1212            if (i3FirstNull < 0) {
1213                i3FirstNull = j;
1214            } else if (index3NullOffset < 0 &&
1215                    (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1216                index3NullOffset = i3FirstNull;
1217            }
1218        } else {
1219            i3FirstNull = -1;
1220        }
1221        // Set the index entries that compactData() skipped.
1222        // Needed when the multi-stage index covers the fast index range as well.
1223        int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
1224        while (++i < iNext) {
1225            i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
1226            index[i] = i3;
1227        }
1228    }
1229
1230    if (!mixedBlocks.init(fastIndexLength, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1231        errorCode = U_MEMORY_ALLOCATION_ERROR;
1232        return 0;
1233    }
1234    mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength);
1235
1236    // Examine index-3 blocks. For each determine one of:
1237    // - same as the index-3 null block
1238    // - same as a fast-index block
1239    // - 16-bit indexes
1240    // - 18-bit indexes
1241    // We store this in the first flags entry for the index-3 block.
1242    //
1243    // Also determine an upper limit for the index-3 table length.
1244    int32_t index3Capacity = 0;
1245    i3FirstNull = index3NullOffset;
1246    bool hasLongI3Blocks = false;
1247    // If the fast index covers the whole BMP, then
1248    // the multi-stage index is only for supplementary code points.
1249    // Otherwise, the multi-stage index covers all of Unicode.
1250    int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT;
1251    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
1252    for (int32_t i = iStart; i < iLimit;) {
1253        int32_t j = i;
1254        int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
1255        uint32_t oredI3 = 0;
1256        bool isNull = true;
1257        do {
1258            uint32_t i3 = index[j];
1259            oredI3 |= i3;
1260            if (i3 != (uint32_t)dataNullOffset) {
1261                isNull = false;
1262            }
1263        } while (++j < jLimit);
1264        if (isNull) {
1265            flags[i] = I3_NULL;
1266            if (i3FirstNull < 0) {
1267                if (oredI3 <= 0xffff) {
1268                    index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
1269                } else {
1270                    index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
1271                    hasLongI3Blocks = true;
1272                }
1273                i3FirstNull = 0;
1274            }
1275        } else {
1276            if (oredI3 <= 0xffff) {
1277                int32_t n = mixedBlocks.findBlock(fastIndex, index, i);
1278                if (n >= 0) {
1279                    flags[i] = I3_BMP;
1280                    index[i] = n;
1281                } else {
1282                    flags[i] = I3_16;
1283                    index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
1284                }
1285            } else {
1286                flags[i] = I3_18;
1287                index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
1288                hasLongI3Blocks = true;
1289            }
1290        }
1291        i = j;
1292    }
1293
1294    int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3;
1295
1296    // Length of the index-1 table, rounded up.
1297    int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2;
1298
1299    // Index table: Fast index, index-1, index-3, index-2.
1300    // +1 for possible index table padding.
1301    int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1;
1302    index16 = (uint16_t *)uprv_malloc(index16Capacity * 2);
1303    if (index16 == nullptr) {
1304        errorCode = U_MEMORY_ALLOCATION_ERROR;
1305        return 0;
1306    }
1307    uprv_memcpy(index16, fastIndex, fastIndexLength * 2);
1308
1309    if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1310        errorCode = U_MEMORY_ALLOCATION_ERROR;
1311        return 0;
1312    }
1313    MixedBlocks longI3Blocks;
1314    if (hasLongI3Blocks) {
1315        if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) {
1316            errorCode = U_MEMORY_ALLOCATION_ERROR;
1317            return 0;
1318        }
1319    }
1320
1321    // Compact the index-3 table and write an uncompacted version of the index-2 table.
1322    uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2];  // index2Capacity
1323    int32_t i2Length = 0;
1324    i3FirstNull = index3NullOffset;
1325    int32_t index3Start = fastIndexLength + index1Length;
1326    int32_t indexLength = index3Start;
1327    for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1328        int32_t i3;
1329        uint8_t f = flags[i];
1330        if (f == I3_NULL && i3FirstNull < 0) {
1331            // First index-3 null block. Write & overlap it like a normal block, then remember it.
1332            f = dataNullOffset <= 0xffff ? I3_16 : I3_18;
1333            i3FirstNull = 0;
1334        }
1335        if (f == I3_NULL) {
1336            i3 = index3NullOffset;
1337        } else if (f == I3_BMP) {
1338            i3 = index[i];
1339        } else if (f == I3_16) {
1340            int32_t n = mixedBlocks.findBlock(index16, index, i);
1341            if (n >= 0) {
1342                i3 = n;
1343            } else {
1344                if (indexLength == index3Start) {
1345                    // No overlap at the boundary between the index-1 and index-3 tables.
1346                    n = 0;
1347                } else {
1348                    n = getOverlap(index16, indexLength,
1349                                   index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
1350                }
1351                i3 = indexLength - n;
1352                int32_t prevIndexLength = indexLength;
1353                while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1354                    index16[indexLength++] = index[i + n++];
1355                }
1356                mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1357                if (hasLongI3Blocks) {
1358                    longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
1359                }
1360            }
1361        } else {
1362            U_ASSERT(f == I3_18);
1363            U_ASSERT(hasLongI3Blocks);
1364            // Encode an index-3 block that contains one or more data indexes exceeding 16 bits.
1365            int32_t j = i;
1366            int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
1367            int32_t k = indexLength;
1368            do {
1369                ++k;
1370                uint32_t v = index[j++];
1371                uint32_t upperBits = (v & 0x30000) >> 2;
1372                index16[k++] = v;
1373                v = index[j++];
1374                upperBits |= (v & 0x30000) >> 4;
1375                index16[k++] = v;
1376                v = index[j++];
1377                upperBits |= (v & 0x30000) >> 6;
1378                index16[k++] = v;
1379                v = index[j++];
1380                upperBits |= (v & 0x30000) >> 8;
1381                index16[k++] = v;
1382                v = index[j++];
1383                upperBits |= (v & 0x30000) >> 10;
1384                index16[k++] = v;
1385                v = index[j++];
1386                upperBits |= (v & 0x30000) >> 12;
1387                index16[k++] = v;
1388                v = index[j++];
1389                upperBits |= (v & 0x30000) >> 14;
1390                index16[k++] = v;
1391                v = index[j++];
1392                upperBits |= (v & 0x30000) >> 16;
1393                index16[k++] = v;
1394                index16[k - 9] = upperBits;
1395            } while (j < jLimit);
1396            int32_t n = longI3Blocks.findBlock(index16, index16, indexLength);
1397            if (n >= 0) {
1398                i3 = n | 0x8000;
1399            } else {
1400                if (indexLength == index3Start) {
1401                    // No overlap at the boundary between the index-1 and index-3 tables.
1402                    n = 0;
1403                } else {
1404                    n = getOverlap(index16, indexLength,
1405                                   index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
1406                }
1407                i3 = (indexLength - n) | 0x8000;
1408                int32_t prevIndexLength = indexLength;
1409                if (n > 0) {
1410                    int32_t start = indexLength;
1411                    while (n < INDEX_3_18BIT_BLOCK_LENGTH) {
1412                        index16[indexLength++] = index16[start + n++];
1413                    }
1414                } else {
1415                    indexLength += INDEX_3_18BIT_BLOCK_LENGTH;
1416                }
1417                mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1418                if (hasLongI3Blocks) {
1419                    longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
1420                }
1421            }
1422        }
1423        if (index3NullOffset < 0 && i3FirstNull >= 0) {
1424            index3NullOffset = i3;
1425        }
1426        // Set the index-2 table entry.
1427        index2[i2Length++] = i3;
1428    }
1429    U_ASSERT(i2Length == index2Capacity);
1430    U_ASSERT(indexLength <= index3Start + index3Capacity);
1431
1432    if (index3NullOffset < 0) {
1433        index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
1434    }
1435    if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1436        // The index-3 offsets exceed 15 bits, or
1437        // the last one cannot be distinguished from the no-null-block value.
1438        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1439        return 0;
1440    }
1441
1442    // Compact the index-2 table and write the index-1 table.
1443    static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH,
1444                  "must re-init mixedBlocks");
1445    int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH;
1446    int32_t i1 = fastIndexLength;
1447    for (int32_t i = 0; i < i2Length; i += blockLength) {
1448        int32_t n;
1449        if ((i2Length - i) >= blockLength) {
1450            // normal block
1451            U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH);
1452            n = mixedBlocks.findBlock(index16, index2, i);
1453        } else {
1454            // highStart is inside the last index-2 block. Shorten it.
1455            blockLength = i2Length - i;
1456            n = findSameBlock(index16, index3Start, indexLength,
1457                              index2, i, blockLength);
1458        }
1459        int32_t i2;
1460        if (n >= 0) {
1461            i2 = n;
1462        } else {
1463            if (indexLength == index3Start) {
1464                // No overlap at the boundary between the index-1 and index-3/2 tables.
1465                n = 0;
1466            } else {
1467                n = getOverlap(index16, indexLength, index2, i, blockLength);
1468            }
1469            i2 = indexLength - n;
1470            int32_t prevIndexLength = indexLength;
1471            while (n < blockLength) {
1472                index16[indexLength++] = index2[i + n++];
1473            }
1474            mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1475        }
1476        // Set the index-1 table entry.
1477        index16[i1++] = i2;
1478    }
1479    U_ASSERT(i1 == index3Start);
1480    U_ASSERT(indexLength <= index16Capacity);
1481
1482#ifdef UCPTRIE_DEBUG
1483    /* we saved some space */
1484    printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n",
1485            (long)iLimit, (long)indexLength);
1486#endif
1487
1488    return indexLength;
1489}
1490
1491int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) {
1492    // Find the real highStart and round it up.
1493    U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
1494    highValue = get(MAX_UNICODE);
1495    int32_t realHighStart = findHighStart();
1496    realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) &
1497        ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
1498    if (realHighStart == UNICODE_LIMIT) {
1499        highValue = initialValue;
1500    }
1501
1502#ifdef UCPTRIE_DEBUG
1503    printf("UCPTrie: highStart U+%06lx  highValue 0x%lx  initialValue 0x%lx\n",
1504            (long)realHighStart, (long)highValue, (long)initialValue);
1505#endif
1506
1507    // We always store indexes and data values for the fast range.
1508    // Pin highStart to the top of that range while building.
1509    UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3;
1510    if (realHighStart < fastLimit) {
1511        for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) {
1512            flags[i] = ALL_SAME;
1513            index[i] = highValue;
1514        }
1515        highStart = fastLimit;
1516    } else {
1517        highStart = realHighStart;
1518    }
1519
1520    uint32_t asciiData[ASCII_LIMIT];
1521    for (int32_t i = 0; i < ASCII_LIMIT; ++i) {
1522        asciiData[i] = get(i);
1523    }
1524
1525    // First we look for which data blocks have the same value repeated over the whole block,
1526    // deduplicate such blocks, find a good null data block (for faster enumeration),
1527    // and get an upper bound for the necessary data array length.
1528    AllSameBlocks allSameBlocks;
1529    int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks);
1530    if (newDataCapacity < 0) {
1531        errorCode = U_MEMORY_ALLOCATION_ERROR;
1532        return 0;
1533    }
1534    uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4);
1535    if (newData == nullptr) {
1536        errorCode = U_MEMORY_ALLOCATION_ERROR;
1537        return 0;
1538    }
1539    uprv_memcpy(newData, asciiData, sizeof(asciiData));
1540
1541    int32_t dataNullIndex = allSameBlocks.findMostUsed();
1542
1543    MixedBlocks mixedBlocks;
1544    int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity,
1545                                        dataNullIndex, mixedBlocks, errorCode);
1546    if (U_FAILURE(errorCode)) { return 0; }
1547    U_ASSERT(newDataLength <= newDataCapacity);
1548    uprv_free(data);
1549    data = newData;
1550    dataCapacity = newDataCapacity;
1551    dataLength = newDataLength;
1552    if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) {
1553        // The offset of the last data block is too high to be stored in the index table.
1554        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1555        return 0;
1556    }
1557
1558    if (dataNullIndex >= 0) {
1559        dataNullOffset = index[dataNullIndex];
1560#ifdef UCPTRIE_DEBUG
1561        if (data[dataNullOffset] != initialValue) {
1562            printf("UCPTrie initialValue %lx -> more common nullValue %lx\n",
1563                   (long)initialValue, (long)data[dataNullOffset]);
1564        }
1565#endif
1566        initialValue = data[dataNullOffset];
1567    } else {
1568        dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET;
1569    }
1570
1571    int32_t indexLength = compactIndex(fastILimit, mixedBlocks, errorCode);
1572    highStart = realHighStart;
1573    return indexLength;
1574}
1575
1576UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) {
1577    if (U_FAILURE(errorCode)) {
1578        return nullptr;
1579    }
1580    if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
1581            valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) {
1582        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1583        return nullptr;
1584    }
1585
1586    // The mutable trie always stores 32-bit values.
1587    // When we build a UCPTrie for a smaller value width, we first mask off unused bits
1588    // before compacting the data.
1589    switch (valueWidth) {
1590    case UCPTRIE_VALUE_BITS_32:
1591        break;
1592    case UCPTRIE_VALUE_BITS_16:
1593        maskValues(0xffff);
1594        break;
1595    case UCPTRIE_VALUE_BITS_8:
1596        maskValues(0xff);
1597        break;
1598    default:
1599        break;
1600    }
1601
1602    UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT;
1603    int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode);
1604    if (U_FAILURE(errorCode)) {
1605        clear();
1606        return nullptr;
1607    }
1608
1609    // Ensure data table alignment: The index length must be even for uint32_t data.
1610    if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) {
1611        index16[indexLength++] = 0xffee;  // arbitrary value
1612    }
1613
1614    // Make the total trie structure length a multiple of 4 bytes by padding the data table,
1615    // and store special values as the last two data values.
1616    int32_t length = indexLength * 2;
1617    if (valueWidth == UCPTRIE_VALUE_BITS_16) {
1618        if (((indexLength ^ dataLength) & 1) != 0) {
1619            // padding
1620            data[dataLength++] = errorValue;
1621        }
1622        if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
1623            data[dataLength++] = highValue;
1624            data[dataLength++] = errorValue;
1625        }
1626        length += dataLength * 2;
1627    } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
1628        // 32-bit data words never need padding to a multiple of 4 bytes.
1629        if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
1630            if (data[dataLength - 1] != highValue) {
1631                data[dataLength++] = highValue;
1632            }
1633            data[dataLength++] = errorValue;
1634        }
1635        length += dataLength * 4;
1636    } else {
1637        int32_t and3 = (length + dataLength) & 3;
1638        if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) {
1639            // all set
1640        } else if(and3 == 3 && data[dataLength - 1] == highValue) {
1641            data[dataLength++] = errorValue;
1642        } else {
1643            while (and3 != 2) {
1644                data[dataLength++] = highValue;
1645                and3 = (and3 + 1) & 3;
1646            }
1647            data[dataLength++] = highValue;
1648            data[dataLength++] = errorValue;
1649        }
1650        length += dataLength;
1651    }
1652
1653    // Calculate the total length of the UCPTrie as a single memory block.
1654    length += sizeof(UCPTrie);
1655    U_ASSERT((length & 3) == 0);
1656
1657    uint8_t *bytes = (uint8_t *)uprv_malloc(length);
1658    if (bytes == nullptr) {
1659        errorCode = U_MEMORY_ALLOCATION_ERROR;
1660        clear();
1661        return nullptr;
1662    }
1663    UCPTrie *trie = reinterpret_cast<UCPTrie *>(bytes);
1664    uprv_memset(trie, 0, sizeof(UCPTrie));
1665    trie->indexLength = indexLength;
1666    trie->dataLength = dataLength;
1667
1668    trie->highStart = highStart;
1669    // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes.
1670    // Runtime code needs to then test for the real highStart as well.
1671    trie->shifted12HighStart = (highStart + 0xfff) >> 12;
1672    trie->type = type;
1673    trie->valueWidth = valueWidth;
1674
1675    trie->index3NullOffset = index3NullOffset;
1676    trie->dataNullOffset = dataNullOffset;
1677    trie->nullValue = initialValue;
1678
1679    bytes += sizeof(UCPTrie);
1680
1681    // Fill the index and data arrays.
1682    uint16_t *dest16 = (uint16_t *)bytes;
1683    trie->index = dest16;
1684
1685    if (highStart <= fastLimit) {
1686        // Condense only the fast index from the mutable-trie index.
1687        for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) {
1688            *dest16++ = (uint16_t)index[i];  // dest16[j]
1689        }
1690    } else {
1691        uprv_memcpy(dest16, index16, indexLength * 2);
1692        dest16 += indexLength;
1693    }
1694    bytes += indexLength * 2;
1695
1696    // Write the data array.
1697    const uint32_t *p = data;
1698    switch (valueWidth) {
1699    case UCPTRIE_VALUE_BITS_16:
1700        // Write 16-bit data values.
1701        trie->data.ptr16 = dest16;
1702        for (int32_t i = dataLength; i > 0; --i) {
1703            *dest16++ = (uint16_t)*p++;
1704        }
1705        break;
1706    case UCPTRIE_VALUE_BITS_32:
1707        // Write 32-bit data values.
1708        trie->data.ptr32 = (uint32_t *)bytes;
1709        uprv_memcpy(bytes, p, (size_t)dataLength * 4);
1710        break;
1711    case UCPTRIE_VALUE_BITS_8:
1712        // Write 8-bit data values.
1713        trie->data.ptr8 = bytes;
1714        for (int32_t i = dataLength; i > 0; --i) {
1715            *bytes++ = (uint8_t)*p++;
1716        }
1717        break;
1718    default:
1719        // Will not occur, valueWidth checked at the beginning.
1720        break;
1721    }
1722
1723#ifdef UCPTRIE_DEBUG
1724    trie->name = name;
1725
1726    ucptrie_printLengths(trie, "");
1727#endif
1728
1729    clear();
1730    return trie;
1731}
1732
1733}  // namespace
1734
1735U_NAMESPACE_END
1736
1737U_NAMESPACE_USE
1738
1739U_CAPI UMutableCPTrie * U_EXPORT2
1740umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
1741    if (U_FAILURE(*pErrorCode)) {
1742        return nullptr;
1743    }
1744    LocalPointer<MutableCodePointTrie> trie(
1745        new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode);
1746    if (U_FAILURE(*pErrorCode)) {
1747        return nullptr;
1748    }
1749    return reinterpret_cast<UMutableCPTrie *>(trie.orphan());
1750}
1751
1752U_CAPI UMutableCPTrie * U_EXPORT2
1753umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) {
1754    if (U_FAILURE(*pErrorCode)) {
1755        return nullptr;
1756    }
1757    if (other == nullptr) {
1758        return nullptr;
1759    }
1760    LocalPointer<MutableCodePointTrie> clone(
1761        new MutableCodePointTrie(*reinterpret_cast<const MutableCodePointTrie *>(other), *pErrorCode), *pErrorCode);
1762    if (U_FAILURE(*pErrorCode)) {
1763        return nullptr;
1764    }
1765    return reinterpret_cast<UMutableCPTrie *>(clone.orphan());
1766}
1767
1768U_CAPI void U_EXPORT2
1769umutablecptrie_close(UMutableCPTrie *trie) {
1770    delete reinterpret_cast<MutableCodePointTrie *>(trie);
1771}
1772
1773U_CAPI UMutableCPTrie * U_EXPORT2
1774umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) {
1775    if (U_FAILURE(*pErrorCode)) {
1776        return nullptr;
1777    }
1778    if (map == nullptr) {
1779        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
1780        return nullptr;
1781    }
1782    return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPMap(map, *pErrorCode));
1783}
1784
1785U_CAPI UMutableCPTrie * U_EXPORT2
1786umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) {
1787    if (U_FAILURE(*pErrorCode)) {
1788        return nullptr;
1789    }
1790    if (trie == nullptr) {
1791        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
1792        return nullptr;
1793    }
1794    return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode));
1795}
1796
1797U_CAPI uint32_t U_EXPORT2
1798umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) {
1799    return reinterpret_cast<const MutableCodePointTrie *>(trie)->get(c);
1800}
1801
1802namespace {
1803
1804UChar32 getRange(const void *trie, UChar32 start,
1805                 UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
1806    return reinterpret_cast<const MutableCodePointTrie *>(trie)->
1807        getRange(start, filter, context, pValue);
1808}
1809
1810}  // namespace
1811
1812U_CAPI UChar32 U_EXPORT2
1813umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
1814                        UCPMapRangeOption option, uint32_t surrogateValue,
1815                        UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
1816    return ucptrie_internalGetRange(getRange, trie, start,
1817                                    option, surrogateValue,
1818                                    filter, context, pValue);
1819}
1820
1821U_CAPI void U_EXPORT2
1822umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
1823    if (U_FAILURE(*pErrorCode)) {
1824        return;
1825    }
1826    reinterpret_cast<MutableCodePointTrie *>(trie)->set(c, value, *pErrorCode);
1827}
1828
1829U_CAPI void U_EXPORT2
1830umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end,
1831                   uint32_t value, UErrorCode *pErrorCode) {
1832    if (U_FAILURE(*pErrorCode)) {
1833        return;
1834    }
1835    reinterpret_cast<MutableCodePointTrie *>(trie)->setRange(start, end, value, *pErrorCode);
1836}
1837
1838/* Compact and internally serialize the trie. */
1839U_CAPI UCPTrie * U_EXPORT2
1840umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth,
1841                              UErrorCode *pErrorCode) {
1842    if (U_FAILURE(*pErrorCode)) {
1843        return nullptr;
1844    }
1845    return reinterpret_cast<MutableCodePointTrie *>(trie)->build(type, valueWidth, *pErrorCode);
1846}
1847
1848#ifdef UCPTRIE_DEBUG
1849U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) {
1850    reinterpret_cast<MutableCodePointTrie *>(trie)->name = name;
1851}
1852#endif
1853