11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
31cb0ef41Sopenharmony_ci/*
41cb0ef41Sopenharmony_ci*******************************************************************************
51cb0ef41Sopenharmony_ci*   Copyright (C) 2010-2011, International Business Machines
61cb0ef41Sopenharmony_ci*   Corporation and others.  All Rights Reserved.
71cb0ef41Sopenharmony_ci*******************************************************************************
81cb0ef41Sopenharmony_ci*   file name:  bytestrie.cpp
91cb0ef41Sopenharmony_ci*   encoding:   UTF-8
101cb0ef41Sopenharmony_ci*   tab size:   8 (not used)
111cb0ef41Sopenharmony_ci*   indentation:4
121cb0ef41Sopenharmony_ci*
131cb0ef41Sopenharmony_ci*   created on: 2010sep25
141cb0ef41Sopenharmony_ci*   created by: Markus W. Scherer
151cb0ef41Sopenharmony_ci*/
161cb0ef41Sopenharmony_ci
171cb0ef41Sopenharmony_ci#include "unicode/utypes.h"
181cb0ef41Sopenharmony_ci#include "unicode/bytestream.h"
191cb0ef41Sopenharmony_ci#include "unicode/bytestrie.h"
201cb0ef41Sopenharmony_ci#include "unicode/uobject.h"
211cb0ef41Sopenharmony_ci#include "cmemory.h"
221cb0ef41Sopenharmony_ci#include "uassert.h"
231cb0ef41Sopenharmony_ci
241cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN
251cb0ef41Sopenharmony_ci
261cb0ef41Sopenharmony_ciBytesTrie::~BytesTrie() {
271cb0ef41Sopenharmony_ci    uprv_free(ownedArray_);
281cb0ef41Sopenharmony_ci}
291cb0ef41Sopenharmony_ci
301cb0ef41Sopenharmony_ci// lead byte already shifted right by 1.
311cb0ef41Sopenharmony_ciint32_t
321cb0ef41Sopenharmony_ciBytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
331cb0ef41Sopenharmony_ci    int32_t value;
341cb0ef41Sopenharmony_ci    if(leadByte<kMinTwoByteValueLead) {
351cb0ef41Sopenharmony_ci        value=leadByte-kMinOneByteValueLead;
361cb0ef41Sopenharmony_ci    } else if(leadByte<kMinThreeByteValueLead) {
371cb0ef41Sopenharmony_ci        value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
381cb0ef41Sopenharmony_ci    } else if(leadByte<kFourByteValueLead) {
391cb0ef41Sopenharmony_ci        value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
401cb0ef41Sopenharmony_ci    } else if(leadByte==kFourByteValueLead) {
411cb0ef41Sopenharmony_ci        value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
421cb0ef41Sopenharmony_ci    } else {
431cb0ef41Sopenharmony_ci        value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
441cb0ef41Sopenharmony_ci    }
451cb0ef41Sopenharmony_ci    return value;
461cb0ef41Sopenharmony_ci}
471cb0ef41Sopenharmony_ci
481cb0ef41Sopenharmony_ciconst uint8_t *
491cb0ef41Sopenharmony_ciBytesTrie::jumpByDelta(const uint8_t *pos) {
501cb0ef41Sopenharmony_ci    int32_t delta=*pos++;
511cb0ef41Sopenharmony_ci    if(delta<kMinTwoByteDeltaLead) {
521cb0ef41Sopenharmony_ci        // nothing to do
531cb0ef41Sopenharmony_ci    } else if(delta<kMinThreeByteDeltaLead) {
541cb0ef41Sopenharmony_ci        delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
551cb0ef41Sopenharmony_ci    } else if(delta<kFourByteDeltaLead) {
561cb0ef41Sopenharmony_ci        delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
571cb0ef41Sopenharmony_ci        pos+=2;
581cb0ef41Sopenharmony_ci    } else if(delta==kFourByteDeltaLead) {
591cb0ef41Sopenharmony_ci        delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
601cb0ef41Sopenharmony_ci        pos+=3;
611cb0ef41Sopenharmony_ci    } else {
621cb0ef41Sopenharmony_ci        delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
631cb0ef41Sopenharmony_ci        pos+=4;
641cb0ef41Sopenharmony_ci    }
651cb0ef41Sopenharmony_ci    return pos+delta;
661cb0ef41Sopenharmony_ci}
671cb0ef41Sopenharmony_ci
681cb0ef41Sopenharmony_ciUStringTrieResult
691cb0ef41Sopenharmony_ciBytesTrie::current() const {
701cb0ef41Sopenharmony_ci    const uint8_t *pos=pos_;
711cb0ef41Sopenharmony_ci    if(pos==nullptr) {
721cb0ef41Sopenharmony_ci        return USTRINGTRIE_NO_MATCH;
731cb0ef41Sopenharmony_ci    } else {
741cb0ef41Sopenharmony_ci        int32_t node;
751cb0ef41Sopenharmony_ci        return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
761cb0ef41Sopenharmony_ci                valueResult(node) : USTRINGTRIE_NO_VALUE;
771cb0ef41Sopenharmony_ci    }
781cb0ef41Sopenharmony_ci}
791cb0ef41Sopenharmony_ci
801cb0ef41Sopenharmony_ciUStringTrieResult
811cb0ef41Sopenharmony_ciBytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
821cb0ef41Sopenharmony_ci    // Branch according to the current byte.
831cb0ef41Sopenharmony_ci    if(length==0) {
841cb0ef41Sopenharmony_ci        length=*pos++;
851cb0ef41Sopenharmony_ci    }
861cb0ef41Sopenharmony_ci    ++length;
871cb0ef41Sopenharmony_ci    // The length of the branch is the number of bytes to select from.
881cb0ef41Sopenharmony_ci    // The data structure encodes a binary search.
891cb0ef41Sopenharmony_ci    while(length>kMaxBranchLinearSubNodeLength) {
901cb0ef41Sopenharmony_ci        if(inByte<*pos++) {
911cb0ef41Sopenharmony_ci            length>>=1;
921cb0ef41Sopenharmony_ci            pos=jumpByDelta(pos);
931cb0ef41Sopenharmony_ci        } else {
941cb0ef41Sopenharmony_ci            length=length-(length>>1);
951cb0ef41Sopenharmony_ci            pos=skipDelta(pos);
961cb0ef41Sopenharmony_ci        }
971cb0ef41Sopenharmony_ci    }
981cb0ef41Sopenharmony_ci    // Drop down to linear search for the last few bytes.
991cb0ef41Sopenharmony_ci    // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
1001cb0ef41Sopenharmony_ci    // and divides length by 2.
1011cb0ef41Sopenharmony_ci    do {
1021cb0ef41Sopenharmony_ci        if(inByte==*pos++) {
1031cb0ef41Sopenharmony_ci            UStringTrieResult result;
1041cb0ef41Sopenharmony_ci            int32_t node=*pos;
1051cb0ef41Sopenharmony_ci            U_ASSERT(node>=kMinValueLead);
1061cb0ef41Sopenharmony_ci            if(node&kValueIsFinal) {
1071cb0ef41Sopenharmony_ci                // Leave the final value for getValue() to read.
1081cb0ef41Sopenharmony_ci                result=USTRINGTRIE_FINAL_VALUE;
1091cb0ef41Sopenharmony_ci            } else {
1101cb0ef41Sopenharmony_ci                // Use the non-final value as the jump delta.
1111cb0ef41Sopenharmony_ci                ++pos;
1121cb0ef41Sopenharmony_ci                // int32_t delta=readValue(pos, node>>1);
1131cb0ef41Sopenharmony_ci                node>>=1;
1141cb0ef41Sopenharmony_ci                int32_t delta;
1151cb0ef41Sopenharmony_ci                if(node<kMinTwoByteValueLead) {
1161cb0ef41Sopenharmony_ci                    delta=node-kMinOneByteValueLead;
1171cb0ef41Sopenharmony_ci                } else if(node<kMinThreeByteValueLead) {
1181cb0ef41Sopenharmony_ci                    delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
1191cb0ef41Sopenharmony_ci                } else if(node<kFourByteValueLead) {
1201cb0ef41Sopenharmony_ci                    delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
1211cb0ef41Sopenharmony_ci                    pos+=2;
1221cb0ef41Sopenharmony_ci                } else if(node==kFourByteValueLead) {
1231cb0ef41Sopenharmony_ci                    delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
1241cb0ef41Sopenharmony_ci                    pos+=3;
1251cb0ef41Sopenharmony_ci                } else {
1261cb0ef41Sopenharmony_ci                    delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
1271cb0ef41Sopenharmony_ci                    pos+=4;
1281cb0ef41Sopenharmony_ci                }
1291cb0ef41Sopenharmony_ci                // end readValue()
1301cb0ef41Sopenharmony_ci                pos+=delta;
1311cb0ef41Sopenharmony_ci                node=*pos;
1321cb0ef41Sopenharmony_ci                result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
1331cb0ef41Sopenharmony_ci            }
1341cb0ef41Sopenharmony_ci            pos_=pos;
1351cb0ef41Sopenharmony_ci            return result;
1361cb0ef41Sopenharmony_ci        }
1371cb0ef41Sopenharmony_ci        --length;
1381cb0ef41Sopenharmony_ci        pos=skipValue(pos);
1391cb0ef41Sopenharmony_ci    } while(length>1);
1401cb0ef41Sopenharmony_ci    if(inByte==*pos++) {
1411cb0ef41Sopenharmony_ci        pos_=pos;
1421cb0ef41Sopenharmony_ci        int32_t node=*pos;
1431cb0ef41Sopenharmony_ci        return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
1441cb0ef41Sopenharmony_ci    } else {
1451cb0ef41Sopenharmony_ci        stop();
1461cb0ef41Sopenharmony_ci        return USTRINGTRIE_NO_MATCH;
1471cb0ef41Sopenharmony_ci    }
1481cb0ef41Sopenharmony_ci}
1491cb0ef41Sopenharmony_ci
1501cb0ef41Sopenharmony_ciUStringTrieResult
1511cb0ef41Sopenharmony_ciBytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
1521cb0ef41Sopenharmony_ci    for(;;) {
1531cb0ef41Sopenharmony_ci        int32_t node=*pos++;
1541cb0ef41Sopenharmony_ci        if(node<kMinLinearMatch) {
1551cb0ef41Sopenharmony_ci            return branchNext(pos, node, inByte);
1561cb0ef41Sopenharmony_ci        } else if(node<kMinValueLead) {
1571cb0ef41Sopenharmony_ci            // Match the first of length+1 bytes.
1581cb0ef41Sopenharmony_ci            int32_t length=node-kMinLinearMatch;  // Actual match length minus 1.
1591cb0ef41Sopenharmony_ci            if(inByte==*pos++) {
1601cb0ef41Sopenharmony_ci                remainingMatchLength_=--length;
1611cb0ef41Sopenharmony_ci                pos_=pos;
1621cb0ef41Sopenharmony_ci                return (length<0 && (node=*pos)>=kMinValueLead) ?
1631cb0ef41Sopenharmony_ci                        valueResult(node) : USTRINGTRIE_NO_VALUE;
1641cb0ef41Sopenharmony_ci            } else {
1651cb0ef41Sopenharmony_ci                // No match.
1661cb0ef41Sopenharmony_ci                break;
1671cb0ef41Sopenharmony_ci            }
1681cb0ef41Sopenharmony_ci        } else if(node&kValueIsFinal) {
1691cb0ef41Sopenharmony_ci            // No further matching bytes.
1701cb0ef41Sopenharmony_ci            break;
1711cb0ef41Sopenharmony_ci        } else {
1721cb0ef41Sopenharmony_ci            // Skip intermediate value.
1731cb0ef41Sopenharmony_ci            pos=skipValue(pos, node);
1741cb0ef41Sopenharmony_ci            // The next node must not also be a value node.
1751cb0ef41Sopenharmony_ci            U_ASSERT(*pos<kMinValueLead);
1761cb0ef41Sopenharmony_ci        }
1771cb0ef41Sopenharmony_ci    }
1781cb0ef41Sopenharmony_ci    stop();
1791cb0ef41Sopenharmony_ci    return USTRINGTRIE_NO_MATCH;
1801cb0ef41Sopenharmony_ci}
1811cb0ef41Sopenharmony_ci
1821cb0ef41Sopenharmony_ciUStringTrieResult
1831cb0ef41Sopenharmony_ciBytesTrie::next(int32_t inByte) {
1841cb0ef41Sopenharmony_ci    const uint8_t *pos=pos_;
1851cb0ef41Sopenharmony_ci    if(pos==nullptr) {
1861cb0ef41Sopenharmony_ci        return USTRINGTRIE_NO_MATCH;
1871cb0ef41Sopenharmony_ci    }
1881cb0ef41Sopenharmony_ci    if(inByte<0) {
1891cb0ef41Sopenharmony_ci        inByte+=0x100;
1901cb0ef41Sopenharmony_ci    }
1911cb0ef41Sopenharmony_ci    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
1921cb0ef41Sopenharmony_ci    if(length>=0) {
1931cb0ef41Sopenharmony_ci        // Remaining part of a linear-match node.
1941cb0ef41Sopenharmony_ci        if(inByte==*pos++) {
1951cb0ef41Sopenharmony_ci            remainingMatchLength_=--length;
1961cb0ef41Sopenharmony_ci            pos_=pos;
1971cb0ef41Sopenharmony_ci            int32_t node;
1981cb0ef41Sopenharmony_ci            return (length<0 && (node=*pos)>=kMinValueLead) ?
1991cb0ef41Sopenharmony_ci                    valueResult(node) : USTRINGTRIE_NO_VALUE;
2001cb0ef41Sopenharmony_ci        } else {
2011cb0ef41Sopenharmony_ci            stop();
2021cb0ef41Sopenharmony_ci            return USTRINGTRIE_NO_MATCH;
2031cb0ef41Sopenharmony_ci        }
2041cb0ef41Sopenharmony_ci    }
2051cb0ef41Sopenharmony_ci    return nextImpl(pos, inByte);
2061cb0ef41Sopenharmony_ci}
2071cb0ef41Sopenharmony_ci
2081cb0ef41Sopenharmony_ciUStringTrieResult
2091cb0ef41Sopenharmony_ciBytesTrie::next(const char *s, int32_t sLength) {
2101cb0ef41Sopenharmony_ci    if(sLength<0 ? *s==0 : sLength==0) {
2111cb0ef41Sopenharmony_ci        // Empty input.
2121cb0ef41Sopenharmony_ci        return current();
2131cb0ef41Sopenharmony_ci    }
2141cb0ef41Sopenharmony_ci    const uint8_t *pos=pos_;
2151cb0ef41Sopenharmony_ci    if(pos==nullptr) {
2161cb0ef41Sopenharmony_ci        return USTRINGTRIE_NO_MATCH;
2171cb0ef41Sopenharmony_ci    }
2181cb0ef41Sopenharmony_ci    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
2191cb0ef41Sopenharmony_ci    for(;;) {
2201cb0ef41Sopenharmony_ci        // Fetch the next input byte, if there is one.
2211cb0ef41Sopenharmony_ci        // Continue a linear-match node without rechecking sLength<0.
2221cb0ef41Sopenharmony_ci        int32_t inByte;
2231cb0ef41Sopenharmony_ci        if(sLength<0) {
2241cb0ef41Sopenharmony_ci            for(;;) {
2251cb0ef41Sopenharmony_ci                if((inByte=*s++)==0) {
2261cb0ef41Sopenharmony_ci                    remainingMatchLength_=length;
2271cb0ef41Sopenharmony_ci                    pos_=pos;
2281cb0ef41Sopenharmony_ci                    int32_t node;
2291cb0ef41Sopenharmony_ci                    return (length<0 && (node=*pos)>=kMinValueLead) ?
2301cb0ef41Sopenharmony_ci                            valueResult(node) : USTRINGTRIE_NO_VALUE;
2311cb0ef41Sopenharmony_ci                }
2321cb0ef41Sopenharmony_ci                if(length<0) {
2331cb0ef41Sopenharmony_ci                    remainingMatchLength_=length;
2341cb0ef41Sopenharmony_ci                    break;
2351cb0ef41Sopenharmony_ci                }
2361cb0ef41Sopenharmony_ci                if(inByte!=*pos) {
2371cb0ef41Sopenharmony_ci                    stop();
2381cb0ef41Sopenharmony_ci                    return USTRINGTRIE_NO_MATCH;
2391cb0ef41Sopenharmony_ci                }
2401cb0ef41Sopenharmony_ci                ++pos;
2411cb0ef41Sopenharmony_ci                --length;
2421cb0ef41Sopenharmony_ci            }
2431cb0ef41Sopenharmony_ci        } else {
2441cb0ef41Sopenharmony_ci            for(;;) {
2451cb0ef41Sopenharmony_ci                if(sLength==0) {
2461cb0ef41Sopenharmony_ci                    remainingMatchLength_=length;
2471cb0ef41Sopenharmony_ci                    pos_=pos;
2481cb0ef41Sopenharmony_ci                    int32_t node;
2491cb0ef41Sopenharmony_ci                    return (length<0 && (node=*pos)>=kMinValueLead) ?
2501cb0ef41Sopenharmony_ci                            valueResult(node) : USTRINGTRIE_NO_VALUE;
2511cb0ef41Sopenharmony_ci                }
2521cb0ef41Sopenharmony_ci                inByte=*s++;
2531cb0ef41Sopenharmony_ci                --sLength;
2541cb0ef41Sopenharmony_ci                if(length<0) {
2551cb0ef41Sopenharmony_ci                    remainingMatchLength_=length;
2561cb0ef41Sopenharmony_ci                    break;
2571cb0ef41Sopenharmony_ci                }
2581cb0ef41Sopenharmony_ci                if(inByte!=*pos) {
2591cb0ef41Sopenharmony_ci                    stop();
2601cb0ef41Sopenharmony_ci                    return USTRINGTRIE_NO_MATCH;
2611cb0ef41Sopenharmony_ci                }
2621cb0ef41Sopenharmony_ci                ++pos;
2631cb0ef41Sopenharmony_ci                --length;
2641cb0ef41Sopenharmony_ci            }
2651cb0ef41Sopenharmony_ci        }
2661cb0ef41Sopenharmony_ci        for(;;) {
2671cb0ef41Sopenharmony_ci            int32_t node=*pos++;
2681cb0ef41Sopenharmony_ci            if(node<kMinLinearMatch) {
2691cb0ef41Sopenharmony_ci                UStringTrieResult result=branchNext(pos, node, inByte);
2701cb0ef41Sopenharmony_ci                if(result==USTRINGTRIE_NO_MATCH) {
2711cb0ef41Sopenharmony_ci                    return USTRINGTRIE_NO_MATCH;
2721cb0ef41Sopenharmony_ci                }
2731cb0ef41Sopenharmony_ci                // Fetch the next input byte, if there is one.
2741cb0ef41Sopenharmony_ci                if(sLength<0) {
2751cb0ef41Sopenharmony_ci                    if((inByte=*s++)==0) {
2761cb0ef41Sopenharmony_ci                        return result;
2771cb0ef41Sopenharmony_ci                    }
2781cb0ef41Sopenharmony_ci                } else {
2791cb0ef41Sopenharmony_ci                    if(sLength==0) {
2801cb0ef41Sopenharmony_ci                        return result;
2811cb0ef41Sopenharmony_ci                    }
2821cb0ef41Sopenharmony_ci                    inByte=*s++;
2831cb0ef41Sopenharmony_ci                    --sLength;
2841cb0ef41Sopenharmony_ci                }
2851cb0ef41Sopenharmony_ci                if(result==USTRINGTRIE_FINAL_VALUE) {
2861cb0ef41Sopenharmony_ci                    // No further matching bytes.
2871cb0ef41Sopenharmony_ci                    stop();
2881cb0ef41Sopenharmony_ci                    return USTRINGTRIE_NO_MATCH;
2891cb0ef41Sopenharmony_ci                }
2901cb0ef41Sopenharmony_ci                pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
2911cb0ef41Sopenharmony_ci            } else if(node<kMinValueLead) {
2921cb0ef41Sopenharmony_ci                // Match length+1 bytes.
2931cb0ef41Sopenharmony_ci                length=node-kMinLinearMatch;  // Actual match length minus 1.
2941cb0ef41Sopenharmony_ci                if(inByte!=*pos) {
2951cb0ef41Sopenharmony_ci                    stop();
2961cb0ef41Sopenharmony_ci                    return USTRINGTRIE_NO_MATCH;
2971cb0ef41Sopenharmony_ci                }
2981cb0ef41Sopenharmony_ci                ++pos;
2991cb0ef41Sopenharmony_ci                --length;
3001cb0ef41Sopenharmony_ci                break;
3011cb0ef41Sopenharmony_ci            } else if(node&kValueIsFinal) {
3021cb0ef41Sopenharmony_ci                // No further matching bytes.
3031cb0ef41Sopenharmony_ci                stop();
3041cb0ef41Sopenharmony_ci                return USTRINGTRIE_NO_MATCH;
3051cb0ef41Sopenharmony_ci            } else {
3061cb0ef41Sopenharmony_ci                // Skip intermediate value.
3071cb0ef41Sopenharmony_ci                pos=skipValue(pos, node);
3081cb0ef41Sopenharmony_ci                // The next node must not also be a value node.
3091cb0ef41Sopenharmony_ci                U_ASSERT(*pos<kMinValueLead);
3101cb0ef41Sopenharmony_ci            }
3111cb0ef41Sopenharmony_ci        }
3121cb0ef41Sopenharmony_ci    }
3131cb0ef41Sopenharmony_ci}
3141cb0ef41Sopenharmony_ci
3151cb0ef41Sopenharmony_ciconst uint8_t *
3161cb0ef41Sopenharmony_ciBytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
3171cb0ef41Sopenharmony_ci                                     UBool haveUniqueValue, int32_t &uniqueValue) {
3181cb0ef41Sopenharmony_ci    while(length>kMaxBranchLinearSubNodeLength) {
3191cb0ef41Sopenharmony_ci        ++pos;  // ignore the comparison byte
3201cb0ef41Sopenharmony_ci        if(nullptr==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
3211cb0ef41Sopenharmony_ci            return nullptr;
3221cb0ef41Sopenharmony_ci        }
3231cb0ef41Sopenharmony_ci        length=length-(length>>1);
3241cb0ef41Sopenharmony_ci        pos=skipDelta(pos);
3251cb0ef41Sopenharmony_ci    }
3261cb0ef41Sopenharmony_ci    do {
3271cb0ef41Sopenharmony_ci        ++pos;  // ignore a comparison byte
3281cb0ef41Sopenharmony_ci        // handle its value
3291cb0ef41Sopenharmony_ci        int32_t node=*pos++;
3301cb0ef41Sopenharmony_ci        UBool isFinal=(UBool)(node&kValueIsFinal);
3311cb0ef41Sopenharmony_ci        int32_t value=readValue(pos, node>>1);
3321cb0ef41Sopenharmony_ci        pos=skipValue(pos, node);
3331cb0ef41Sopenharmony_ci        if(isFinal) {
3341cb0ef41Sopenharmony_ci            if(haveUniqueValue) {
3351cb0ef41Sopenharmony_ci                if(value!=uniqueValue) {
3361cb0ef41Sopenharmony_ci                    return nullptr;
3371cb0ef41Sopenharmony_ci                }
3381cb0ef41Sopenharmony_ci            } else {
3391cb0ef41Sopenharmony_ci                uniqueValue=value;
3401cb0ef41Sopenharmony_ci                haveUniqueValue=true;
3411cb0ef41Sopenharmony_ci            }
3421cb0ef41Sopenharmony_ci        } else {
3431cb0ef41Sopenharmony_ci            if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
3441cb0ef41Sopenharmony_ci                return nullptr;
3451cb0ef41Sopenharmony_ci            }
3461cb0ef41Sopenharmony_ci            haveUniqueValue=true;
3471cb0ef41Sopenharmony_ci        }
3481cb0ef41Sopenharmony_ci    } while(--length>1);
3491cb0ef41Sopenharmony_ci    return pos+1;  // ignore the last comparison byte
3501cb0ef41Sopenharmony_ci}
3511cb0ef41Sopenharmony_ci
3521cb0ef41Sopenharmony_ciUBool
3531cb0ef41Sopenharmony_ciBytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
3541cb0ef41Sopenharmony_ci    for(;;) {
3551cb0ef41Sopenharmony_ci        int32_t node=*pos++;
3561cb0ef41Sopenharmony_ci        if(node<kMinLinearMatch) {
3571cb0ef41Sopenharmony_ci            if(node==0) {
3581cb0ef41Sopenharmony_ci                node=*pos++;
3591cb0ef41Sopenharmony_ci            }
3601cb0ef41Sopenharmony_ci            pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
3611cb0ef41Sopenharmony_ci            if(pos==nullptr) {
3621cb0ef41Sopenharmony_ci                return false;
3631cb0ef41Sopenharmony_ci            }
3641cb0ef41Sopenharmony_ci            haveUniqueValue=true;
3651cb0ef41Sopenharmony_ci        } else if(node<kMinValueLead) {
3661cb0ef41Sopenharmony_ci            // linear-match node
3671cb0ef41Sopenharmony_ci            pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
3681cb0ef41Sopenharmony_ci        } else {
3691cb0ef41Sopenharmony_ci            UBool isFinal=(UBool)(node&kValueIsFinal);
3701cb0ef41Sopenharmony_ci            int32_t value=readValue(pos, node>>1);
3711cb0ef41Sopenharmony_ci            if(haveUniqueValue) {
3721cb0ef41Sopenharmony_ci                if(value!=uniqueValue) {
3731cb0ef41Sopenharmony_ci                    return false;
3741cb0ef41Sopenharmony_ci                }
3751cb0ef41Sopenharmony_ci            } else {
3761cb0ef41Sopenharmony_ci                uniqueValue=value;
3771cb0ef41Sopenharmony_ci                haveUniqueValue=true;
3781cb0ef41Sopenharmony_ci            }
3791cb0ef41Sopenharmony_ci            if(isFinal) {
3801cb0ef41Sopenharmony_ci                return true;
3811cb0ef41Sopenharmony_ci            }
3821cb0ef41Sopenharmony_ci            pos=skipValue(pos, node);
3831cb0ef41Sopenharmony_ci        }
3841cb0ef41Sopenharmony_ci    }
3851cb0ef41Sopenharmony_ci}
3861cb0ef41Sopenharmony_ci
3871cb0ef41Sopenharmony_ciint32_t
3881cb0ef41Sopenharmony_ciBytesTrie::getNextBytes(ByteSink &out) const {
3891cb0ef41Sopenharmony_ci    const uint8_t *pos=pos_;
3901cb0ef41Sopenharmony_ci    if(pos==nullptr) {
3911cb0ef41Sopenharmony_ci        return 0;
3921cb0ef41Sopenharmony_ci    }
3931cb0ef41Sopenharmony_ci    if(remainingMatchLength_>=0) {
3941cb0ef41Sopenharmony_ci        append(out, *pos);  // Next byte of a pending linear-match node.
3951cb0ef41Sopenharmony_ci        return 1;
3961cb0ef41Sopenharmony_ci    }
3971cb0ef41Sopenharmony_ci    int32_t node=*pos++;
3981cb0ef41Sopenharmony_ci    if(node>=kMinValueLead) {
3991cb0ef41Sopenharmony_ci        if(node&kValueIsFinal) {
4001cb0ef41Sopenharmony_ci            return 0;
4011cb0ef41Sopenharmony_ci        } else {
4021cb0ef41Sopenharmony_ci            pos=skipValue(pos, node);
4031cb0ef41Sopenharmony_ci            node=*pos++;
4041cb0ef41Sopenharmony_ci            U_ASSERT(node<kMinValueLead);
4051cb0ef41Sopenharmony_ci        }
4061cb0ef41Sopenharmony_ci    }
4071cb0ef41Sopenharmony_ci    if(node<kMinLinearMatch) {
4081cb0ef41Sopenharmony_ci        if(node==0) {
4091cb0ef41Sopenharmony_ci            node=*pos++;
4101cb0ef41Sopenharmony_ci        }
4111cb0ef41Sopenharmony_ci        getNextBranchBytes(pos, ++node, out);
4121cb0ef41Sopenharmony_ci        return node;
4131cb0ef41Sopenharmony_ci    } else {
4141cb0ef41Sopenharmony_ci        // First byte of the linear-match node.
4151cb0ef41Sopenharmony_ci        append(out, *pos);
4161cb0ef41Sopenharmony_ci        return 1;
4171cb0ef41Sopenharmony_ci    }
4181cb0ef41Sopenharmony_ci}
4191cb0ef41Sopenharmony_ci
4201cb0ef41Sopenharmony_civoid
4211cb0ef41Sopenharmony_ciBytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
4221cb0ef41Sopenharmony_ci    while(length>kMaxBranchLinearSubNodeLength) {
4231cb0ef41Sopenharmony_ci        ++pos;  // ignore the comparison byte
4241cb0ef41Sopenharmony_ci        getNextBranchBytes(jumpByDelta(pos), length>>1, out);
4251cb0ef41Sopenharmony_ci        length=length-(length>>1);
4261cb0ef41Sopenharmony_ci        pos=skipDelta(pos);
4271cb0ef41Sopenharmony_ci    }
4281cb0ef41Sopenharmony_ci    do {
4291cb0ef41Sopenharmony_ci        append(out, *pos++);
4301cb0ef41Sopenharmony_ci        pos=skipValue(pos);
4311cb0ef41Sopenharmony_ci    } while(--length>1);
4321cb0ef41Sopenharmony_ci    append(out, *pos);
4331cb0ef41Sopenharmony_ci}
4341cb0ef41Sopenharmony_ci
4351cb0ef41Sopenharmony_civoid
4361cb0ef41Sopenharmony_ciBytesTrie::append(ByteSink &out, int c) {
4371cb0ef41Sopenharmony_ci    char ch=(char)c;
4381cb0ef41Sopenharmony_ci    out.Append(&ch, 1);
4391cb0ef41Sopenharmony_ci}
4401cb0ef41Sopenharmony_ci
4411cb0ef41Sopenharmony_ciU_NAMESPACE_END
442