11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 31cb0ef41Sopenharmony_ci/* 41cb0ef41Sopenharmony_ci******************************************************************************* 51cb0ef41Sopenharmony_ci* Copyright (C) 2010-2011, International Business Machines 61cb0ef41Sopenharmony_ci* Corporation and others. All Rights Reserved. 71cb0ef41Sopenharmony_ci******************************************************************************* 81cb0ef41Sopenharmony_ci* file name: ucharstrie.h 91cb0ef41Sopenharmony_ci* encoding: UTF-8 101cb0ef41Sopenharmony_ci* tab size: 8 (not used) 111cb0ef41Sopenharmony_ci* indentation:4 121cb0ef41Sopenharmony_ci* 131cb0ef41Sopenharmony_ci* created on: 2010nov14 141cb0ef41Sopenharmony_ci* created by: Markus W. Scherer 151cb0ef41Sopenharmony_ci*/ 161cb0ef41Sopenharmony_ci 171cb0ef41Sopenharmony_ci#include "unicode/utypes.h" 181cb0ef41Sopenharmony_ci#include "unicode/appendable.h" 191cb0ef41Sopenharmony_ci#include "unicode/ucharstrie.h" 201cb0ef41Sopenharmony_ci#include "unicode/uobject.h" 211cb0ef41Sopenharmony_ci#include "unicode/utf16.h" 221cb0ef41Sopenharmony_ci#include "cmemory.h" 231cb0ef41Sopenharmony_ci#include "uassert.h" 241cb0ef41Sopenharmony_ci 251cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN 261cb0ef41Sopenharmony_ci 271cb0ef41Sopenharmony_ciUCharsTrie::~UCharsTrie() { 281cb0ef41Sopenharmony_ci uprv_free(ownedArray_); 291cb0ef41Sopenharmony_ci} 301cb0ef41Sopenharmony_ci 311cb0ef41Sopenharmony_ciUStringTrieResult 321cb0ef41Sopenharmony_ciUCharsTrie::current() const { 331cb0ef41Sopenharmony_ci const char16_t *pos=pos_; 341cb0ef41Sopenharmony_ci if(pos==nullptr) { 351cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 361cb0ef41Sopenharmony_ci } else { 371cb0ef41Sopenharmony_ci int32_t node; 381cb0ef41Sopenharmony_ci return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ? 391cb0ef41Sopenharmony_ci valueResult(node) : USTRINGTRIE_NO_VALUE; 401cb0ef41Sopenharmony_ci } 411cb0ef41Sopenharmony_ci} 421cb0ef41Sopenharmony_ci 431cb0ef41Sopenharmony_ciUStringTrieResult 441cb0ef41Sopenharmony_ciUCharsTrie::firstForCodePoint(UChar32 cp) { 451cb0ef41Sopenharmony_ci return cp<=0xffff ? 461cb0ef41Sopenharmony_ci first(cp) : 471cb0ef41Sopenharmony_ci (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ? 481cb0ef41Sopenharmony_ci next(U16_TRAIL(cp)) : 491cb0ef41Sopenharmony_ci USTRINGTRIE_NO_MATCH); 501cb0ef41Sopenharmony_ci} 511cb0ef41Sopenharmony_ci 521cb0ef41Sopenharmony_ciUStringTrieResult 531cb0ef41Sopenharmony_ciUCharsTrie::nextForCodePoint(UChar32 cp) { 541cb0ef41Sopenharmony_ci return cp<=0xffff ? 551cb0ef41Sopenharmony_ci next(cp) : 561cb0ef41Sopenharmony_ci (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ? 571cb0ef41Sopenharmony_ci next(U16_TRAIL(cp)) : 581cb0ef41Sopenharmony_ci USTRINGTRIE_NO_MATCH); 591cb0ef41Sopenharmony_ci} 601cb0ef41Sopenharmony_ci 611cb0ef41Sopenharmony_ciUStringTrieResult 621cb0ef41Sopenharmony_ciUCharsTrie::branchNext(const char16_t *pos, int32_t length, int32_t uchar) { 631cb0ef41Sopenharmony_ci // Branch according to the current unit. 641cb0ef41Sopenharmony_ci if(length==0) { 651cb0ef41Sopenharmony_ci length=*pos++; 661cb0ef41Sopenharmony_ci } 671cb0ef41Sopenharmony_ci ++length; 681cb0ef41Sopenharmony_ci // The length of the branch is the number of units to select from. 691cb0ef41Sopenharmony_ci // The data structure encodes a binary search. 701cb0ef41Sopenharmony_ci while(length>kMaxBranchLinearSubNodeLength) { 711cb0ef41Sopenharmony_ci if(uchar<*pos++) { 721cb0ef41Sopenharmony_ci length>>=1; 731cb0ef41Sopenharmony_ci pos=jumpByDelta(pos); 741cb0ef41Sopenharmony_ci } else { 751cb0ef41Sopenharmony_ci length=length-(length>>1); 761cb0ef41Sopenharmony_ci pos=skipDelta(pos); 771cb0ef41Sopenharmony_ci } 781cb0ef41Sopenharmony_ci } 791cb0ef41Sopenharmony_ci // Drop down to linear search for the last few units. 801cb0ef41Sopenharmony_ci // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3 811cb0ef41Sopenharmony_ci // and divides length by 2. 821cb0ef41Sopenharmony_ci do { 831cb0ef41Sopenharmony_ci if(uchar==*pos++) { 841cb0ef41Sopenharmony_ci UStringTrieResult result; 851cb0ef41Sopenharmony_ci int32_t node=*pos; 861cb0ef41Sopenharmony_ci if(node&kValueIsFinal) { 871cb0ef41Sopenharmony_ci // Leave the final value for getValue() to read. 881cb0ef41Sopenharmony_ci result=USTRINGTRIE_FINAL_VALUE; 891cb0ef41Sopenharmony_ci } else { 901cb0ef41Sopenharmony_ci // Use the non-final value as the jump delta. 911cb0ef41Sopenharmony_ci ++pos; 921cb0ef41Sopenharmony_ci // int32_t delta=readValue(pos, node); 931cb0ef41Sopenharmony_ci int32_t delta; 941cb0ef41Sopenharmony_ci if(node<kMinTwoUnitValueLead) { 951cb0ef41Sopenharmony_ci delta=node; 961cb0ef41Sopenharmony_ci } else if(node<kThreeUnitValueLead) { 971cb0ef41Sopenharmony_ci delta=((node-kMinTwoUnitValueLead)<<16)|*pos++; 981cb0ef41Sopenharmony_ci } else { 991cb0ef41Sopenharmony_ci delta=(pos[0]<<16)|pos[1]; 1001cb0ef41Sopenharmony_ci pos+=2; 1011cb0ef41Sopenharmony_ci } 1021cb0ef41Sopenharmony_ci // end readValue() 1031cb0ef41Sopenharmony_ci pos+=delta; 1041cb0ef41Sopenharmony_ci node=*pos; 1051cb0ef41Sopenharmony_ci result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; 1061cb0ef41Sopenharmony_ci } 1071cb0ef41Sopenharmony_ci pos_=pos; 1081cb0ef41Sopenharmony_ci return result; 1091cb0ef41Sopenharmony_ci } 1101cb0ef41Sopenharmony_ci --length; 1111cb0ef41Sopenharmony_ci pos=skipValue(pos); 1121cb0ef41Sopenharmony_ci } while(length>1); 1131cb0ef41Sopenharmony_ci if(uchar==*pos++) { 1141cb0ef41Sopenharmony_ci pos_=pos; 1151cb0ef41Sopenharmony_ci int32_t node=*pos; 1161cb0ef41Sopenharmony_ci return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; 1171cb0ef41Sopenharmony_ci } else { 1181cb0ef41Sopenharmony_ci stop(); 1191cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 1201cb0ef41Sopenharmony_ci } 1211cb0ef41Sopenharmony_ci} 1221cb0ef41Sopenharmony_ci 1231cb0ef41Sopenharmony_ciUStringTrieResult 1241cb0ef41Sopenharmony_ciUCharsTrie::nextImpl(const char16_t *pos, int32_t uchar) { 1251cb0ef41Sopenharmony_ci int32_t node=*pos++; 1261cb0ef41Sopenharmony_ci for(;;) { 1271cb0ef41Sopenharmony_ci if(node<kMinLinearMatch) { 1281cb0ef41Sopenharmony_ci return branchNext(pos, node, uchar); 1291cb0ef41Sopenharmony_ci } else if(node<kMinValueLead) { 1301cb0ef41Sopenharmony_ci // Match the first of length+1 units. 1311cb0ef41Sopenharmony_ci int32_t length=node-kMinLinearMatch; // Actual match length minus 1. 1321cb0ef41Sopenharmony_ci if(uchar==*pos++) { 1331cb0ef41Sopenharmony_ci remainingMatchLength_=--length; 1341cb0ef41Sopenharmony_ci pos_=pos; 1351cb0ef41Sopenharmony_ci return (length<0 && (node=*pos)>=kMinValueLead) ? 1361cb0ef41Sopenharmony_ci valueResult(node) : USTRINGTRIE_NO_VALUE; 1371cb0ef41Sopenharmony_ci } else { 1381cb0ef41Sopenharmony_ci // No match. 1391cb0ef41Sopenharmony_ci break; 1401cb0ef41Sopenharmony_ci } 1411cb0ef41Sopenharmony_ci } else if(node&kValueIsFinal) { 1421cb0ef41Sopenharmony_ci // No further matching units. 1431cb0ef41Sopenharmony_ci break; 1441cb0ef41Sopenharmony_ci } else { 1451cb0ef41Sopenharmony_ci // Skip intermediate value. 1461cb0ef41Sopenharmony_ci pos=skipNodeValue(pos, node); 1471cb0ef41Sopenharmony_ci node&=kNodeTypeMask; 1481cb0ef41Sopenharmony_ci } 1491cb0ef41Sopenharmony_ci } 1501cb0ef41Sopenharmony_ci stop(); 1511cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 1521cb0ef41Sopenharmony_ci} 1531cb0ef41Sopenharmony_ci 1541cb0ef41Sopenharmony_ciUStringTrieResult 1551cb0ef41Sopenharmony_ciUCharsTrie::next(int32_t uchar) { 1561cb0ef41Sopenharmony_ci const char16_t *pos=pos_; 1571cb0ef41Sopenharmony_ci if(pos==nullptr) { 1581cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 1591cb0ef41Sopenharmony_ci } 1601cb0ef41Sopenharmony_ci int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. 1611cb0ef41Sopenharmony_ci if(length>=0) { 1621cb0ef41Sopenharmony_ci // Remaining part of a linear-match node. 1631cb0ef41Sopenharmony_ci if(uchar==*pos++) { 1641cb0ef41Sopenharmony_ci remainingMatchLength_=--length; 1651cb0ef41Sopenharmony_ci pos_=pos; 1661cb0ef41Sopenharmony_ci int32_t node; 1671cb0ef41Sopenharmony_ci return (length<0 && (node=*pos)>=kMinValueLead) ? 1681cb0ef41Sopenharmony_ci valueResult(node) : USTRINGTRIE_NO_VALUE; 1691cb0ef41Sopenharmony_ci } else { 1701cb0ef41Sopenharmony_ci stop(); 1711cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 1721cb0ef41Sopenharmony_ci } 1731cb0ef41Sopenharmony_ci } 1741cb0ef41Sopenharmony_ci return nextImpl(pos, uchar); 1751cb0ef41Sopenharmony_ci} 1761cb0ef41Sopenharmony_ci 1771cb0ef41Sopenharmony_ciUStringTrieResult 1781cb0ef41Sopenharmony_ciUCharsTrie::next(ConstChar16Ptr ptr, int32_t sLength) { 1791cb0ef41Sopenharmony_ci const char16_t *s=ptr; 1801cb0ef41Sopenharmony_ci if(sLength<0 ? *s==0 : sLength==0) { 1811cb0ef41Sopenharmony_ci // Empty input. 1821cb0ef41Sopenharmony_ci return current(); 1831cb0ef41Sopenharmony_ci } 1841cb0ef41Sopenharmony_ci const char16_t *pos=pos_; 1851cb0ef41Sopenharmony_ci if(pos==nullptr) { 1861cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 1871cb0ef41Sopenharmony_ci } 1881cb0ef41Sopenharmony_ci int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. 1891cb0ef41Sopenharmony_ci for(;;) { 1901cb0ef41Sopenharmony_ci // Fetch the next input unit, if there is one. 1911cb0ef41Sopenharmony_ci // Continue a linear-match node without rechecking sLength<0. 1921cb0ef41Sopenharmony_ci int32_t uchar; 1931cb0ef41Sopenharmony_ci if(sLength<0) { 1941cb0ef41Sopenharmony_ci for(;;) { 1951cb0ef41Sopenharmony_ci if((uchar=*s++)==0) { 1961cb0ef41Sopenharmony_ci remainingMatchLength_=length; 1971cb0ef41Sopenharmony_ci pos_=pos; 1981cb0ef41Sopenharmony_ci int32_t node; 1991cb0ef41Sopenharmony_ci return (length<0 && (node=*pos)>=kMinValueLead) ? 2001cb0ef41Sopenharmony_ci valueResult(node) : USTRINGTRIE_NO_VALUE; 2011cb0ef41Sopenharmony_ci } 2021cb0ef41Sopenharmony_ci if(length<0) { 2031cb0ef41Sopenharmony_ci remainingMatchLength_=length; 2041cb0ef41Sopenharmony_ci break; 2051cb0ef41Sopenharmony_ci } 2061cb0ef41Sopenharmony_ci if(uchar!=*pos) { 2071cb0ef41Sopenharmony_ci stop(); 2081cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 2091cb0ef41Sopenharmony_ci } 2101cb0ef41Sopenharmony_ci ++pos; 2111cb0ef41Sopenharmony_ci --length; 2121cb0ef41Sopenharmony_ci } 2131cb0ef41Sopenharmony_ci } else { 2141cb0ef41Sopenharmony_ci for(;;) { 2151cb0ef41Sopenharmony_ci if(sLength==0) { 2161cb0ef41Sopenharmony_ci remainingMatchLength_=length; 2171cb0ef41Sopenharmony_ci pos_=pos; 2181cb0ef41Sopenharmony_ci int32_t node; 2191cb0ef41Sopenharmony_ci return (length<0 && (node=*pos)>=kMinValueLead) ? 2201cb0ef41Sopenharmony_ci valueResult(node) : USTRINGTRIE_NO_VALUE; 2211cb0ef41Sopenharmony_ci } 2221cb0ef41Sopenharmony_ci uchar=*s++; 2231cb0ef41Sopenharmony_ci --sLength; 2241cb0ef41Sopenharmony_ci if(length<0) { 2251cb0ef41Sopenharmony_ci remainingMatchLength_=length; 2261cb0ef41Sopenharmony_ci break; 2271cb0ef41Sopenharmony_ci } 2281cb0ef41Sopenharmony_ci if(uchar!=*pos) { 2291cb0ef41Sopenharmony_ci stop(); 2301cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 2311cb0ef41Sopenharmony_ci } 2321cb0ef41Sopenharmony_ci ++pos; 2331cb0ef41Sopenharmony_ci --length; 2341cb0ef41Sopenharmony_ci } 2351cb0ef41Sopenharmony_ci } 2361cb0ef41Sopenharmony_ci int32_t node=*pos++; 2371cb0ef41Sopenharmony_ci for(;;) { 2381cb0ef41Sopenharmony_ci if(node<kMinLinearMatch) { 2391cb0ef41Sopenharmony_ci UStringTrieResult result=branchNext(pos, node, uchar); 2401cb0ef41Sopenharmony_ci if(result==USTRINGTRIE_NO_MATCH) { 2411cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 2421cb0ef41Sopenharmony_ci } 2431cb0ef41Sopenharmony_ci // Fetch the next input unit, if there is one. 2441cb0ef41Sopenharmony_ci if(sLength<0) { 2451cb0ef41Sopenharmony_ci if((uchar=*s++)==0) { 2461cb0ef41Sopenharmony_ci return result; 2471cb0ef41Sopenharmony_ci } 2481cb0ef41Sopenharmony_ci } else { 2491cb0ef41Sopenharmony_ci if(sLength==0) { 2501cb0ef41Sopenharmony_ci return result; 2511cb0ef41Sopenharmony_ci } 2521cb0ef41Sopenharmony_ci uchar=*s++; 2531cb0ef41Sopenharmony_ci --sLength; 2541cb0ef41Sopenharmony_ci } 2551cb0ef41Sopenharmony_ci if(result==USTRINGTRIE_FINAL_VALUE) { 2561cb0ef41Sopenharmony_ci // No further matching units. 2571cb0ef41Sopenharmony_ci stop(); 2581cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 2591cb0ef41Sopenharmony_ci } 2601cb0ef41Sopenharmony_ci pos=pos_; // branchNext() advanced pos and wrote it to pos_ . 2611cb0ef41Sopenharmony_ci node=*pos++; 2621cb0ef41Sopenharmony_ci } else if(node<kMinValueLead) { 2631cb0ef41Sopenharmony_ci // Match length+1 units. 2641cb0ef41Sopenharmony_ci length=node-kMinLinearMatch; // Actual match length minus 1. 2651cb0ef41Sopenharmony_ci if(uchar!=*pos) { 2661cb0ef41Sopenharmony_ci stop(); 2671cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 2681cb0ef41Sopenharmony_ci } 2691cb0ef41Sopenharmony_ci ++pos; 2701cb0ef41Sopenharmony_ci --length; 2711cb0ef41Sopenharmony_ci break; 2721cb0ef41Sopenharmony_ci } else if(node&kValueIsFinal) { 2731cb0ef41Sopenharmony_ci // No further matching units. 2741cb0ef41Sopenharmony_ci stop(); 2751cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 2761cb0ef41Sopenharmony_ci } else { 2771cb0ef41Sopenharmony_ci // Skip intermediate value. 2781cb0ef41Sopenharmony_ci pos=skipNodeValue(pos, node); 2791cb0ef41Sopenharmony_ci node&=kNodeTypeMask; 2801cb0ef41Sopenharmony_ci } 2811cb0ef41Sopenharmony_ci } 2821cb0ef41Sopenharmony_ci } 2831cb0ef41Sopenharmony_ci} 2841cb0ef41Sopenharmony_ci 2851cb0ef41Sopenharmony_ciconst char16_t * 2861cb0ef41Sopenharmony_ciUCharsTrie::findUniqueValueFromBranch(const char16_t *pos, int32_t length, 2871cb0ef41Sopenharmony_ci UBool haveUniqueValue, int32_t &uniqueValue) { 2881cb0ef41Sopenharmony_ci while(length>kMaxBranchLinearSubNodeLength) { 2891cb0ef41Sopenharmony_ci ++pos; // ignore the comparison unit 2901cb0ef41Sopenharmony_ci if(nullptr==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) { 2911cb0ef41Sopenharmony_ci return nullptr; 2921cb0ef41Sopenharmony_ci } 2931cb0ef41Sopenharmony_ci length=length-(length>>1); 2941cb0ef41Sopenharmony_ci pos=skipDelta(pos); 2951cb0ef41Sopenharmony_ci } 2961cb0ef41Sopenharmony_ci do { 2971cb0ef41Sopenharmony_ci ++pos; // ignore a comparison unit 2981cb0ef41Sopenharmony_ci // handle its value 2991cb0ef41Sopenharmony_ci int32_t node=*pos++; 3001cb0ef41Sopenharmony_ci UBool isFinal=(UBool)(node>>15); 3011cb0ef41Sopenharmony_ci node&=0x7fff; 3021cb0ef41Sopenharmony_ci int32_t value=readValue(pos, node); 3031cb0ef41Sopenharmony_ci pos=skipValue(pos, node); 3041cb0ef41Sopenharmony_ci if(isFinal) { 3051cb0ef41Sopenharmony_ci if(haveUniqueValue) { 3061cb0ef41Sopenharmony_ci if(value!=uniqueValue) { 3071cb0ef41Sopenharmony_ci return nullptr; 3081cb0ef41Sopenharmony_ci } 3091cb0ef41Sopenharmony_ci } else { 3101cb0ef41Sopenharmony_ci uniqueValue=value; 3111cb0ef41Sopenharmony_ci haveUniqueValue=true; 3121cb0ef41Sopenharmony_ci } 3131cb0ef41Sopenharmony_ci } else { 3141cb0ef41Sopenharmony_ci if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) { 3151cb0ef41Sopenharmony_ci return nullptr; 3161cb0ef41Sopenharmony_ci } 3171cb0ef41Sopenharmony_ci haveUniqueValue=true; 3181cb0ef41Sopenharmony_ci } 3191cb0ef41Sopenharmony_ci } while(--length>1); 3201cb0ef41Sopenharmony_ci return pos+1; // ignore the last comparison unit 3211cb0ef41Sopenharmony_ci} 3221cb0ef41Sopenharmony_ci 3231cb0ef41Sopenharmony_ciUBool 3241cb0ef41Sopenharmony_ciUCharsTrie::findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) { 3251cb0ef41Sopenharmony_ci int32_t node=*pos++; 3261cb0ef41Sopenharmony_ci for(;;) { 3271cb0ef41Sopenharmony_ci if(node<kMinLinearMatch) { 3281cb0ef41Sopenharmony_ci if(node==0) { 3291cb0ef41Sopenharmony_ci node=*pos++; 3301cb0ef41Sopenharmony_ci } 3311cb0ef41Sopenharmony_ci pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue); 3321cb0ef41Sopenharmony_ci if(pos==nullptr) { 3331cb0ef41Sopenharmony_ci return false; 3341cb0ef41Sopenharmony_ci } 3351cb0ef41Sopenharmony_ci haveUniqueValue=true; 3361cb0ef41Sopenharmony_ci node=*pos++; 3371cb0ef41Sopenharmony_ci } else if(node<kMinValueLead) { 3381cb0ef41Sopenharmony_ci // linear-match node 3391cb0ef41Sopenharmony_ci pos+=node-kMinLinearMatch+1; // Ignore the match units. 3401cb0ef41Sopenharmony_ci node=*pos++; 3411cb0ef41Sopenharmony_ci } else { 3421cb0ef41Sopenharmony_ci UBool isFinal=(UBool)(node>>15); 3431cb0ef41Sopenharmony_ci int32_t value; 3441cb0ef41Sopenharmony_ci if(isFinal) { 3451cb0ef41Sopenharmony_ci value=readValue(pos, node&0x7fff); 3461cb0ef41Sopenharmony_ci } else { 3471cb0ef41Sopenharmony_ci value=readNodeValue(pos, node); 3481cb0ef41Sopenharmony_ci } 3491cb0ef41Sopenharmony_ci if(haveUniqueValue) { 3501cb0ef41Sopenharmony_ci if(value!=uniqueValue) { 3511cb0ef41Sopenharmony_ci return false; 3521cb0ef41Sopenharmony_ci } 3531cb0ef41Sopenharmony_ci } else { 3541cb0ef41Sopenharmony_ci uniqueValue=value; 3551cb0ef41Sopenharmony_ci haveUniqueValue=true; 3561cb0ef41Sopenharmony_ci } 3571cb0ef41Sopenharmony_ci if(isFinal) { 3581cb0ef41Sopenharmony_ci return true; 3591cb0ef41Sopenharmony_ci } 3601cb0ef41Sopenharmony_ci pos=skipNodeValue(pos, node); 3611cb0ef41Sopenharmony_ci node&=kNodeTypeMask; 3621cb0ef41Sopenharmony_ci } 3631cb0ef41Sopenharmony_ci } 3641cb0ef41Sopenharmony_ci} 3651cb0ef41Sopenharmony_ci 3661cb0ef41Sopenharmony_ciint32_t 3671cb0ef41Sopenharmony_ciUCharsTrie::getNextUChars(Appendable &out) const { 3681cb0ef41Sopenharmony_ci const char16_t *pos=pos_; 3691cb0ef41Sopenharmony_ci if(pos==nullptr) { 3701cb0ef41Sopenharmony_ci return 0; 3711cb0ef41Sopenharmony_ci } 3721cb0ef41Sopenharmony_ci if(remainingMatchLength_>=0) { 3731cb0ef41Sopenharmony_ci out.appendCodeUnit(*pos); // Next unit of a pending linear-match node. 3741cb0ef41Sopenharmony_ci return 1; 3751cb0ef41Sopenharmony_ci } 3761cb0ef41Sopenharmony_ci int32_t node=*pos++; 3771cb0ef41Sopenharmony_ci if(node>=kMinValueLead) { 3781cb0ef41Sopenharmony_ci if(node&kValueIsFinal) { 3791cb0ef41Sopenharmony_ci return 0; 3801cb0ef41Sopenharmony_ci } else { 3811cb0ef41Sopenharmony_ci pos=skipNodeValue(pos, node); 3821cb0ef41Sopenharmony_ci node&=kNodeTypeMask; 3831cb0ef41Sopenharmony_ci } 3841cb0ef41Sopenharmony_ci } 3851cb0ef41Sopenharmony_ci if(node<kMinLinearMatch) { 3861cb0ef41Sopenharmony_ci if(node==0) { 3871cb0ef41Sopenharmony_ci node=*pos++; 3881cb0ef41Sopenharmony_ci } 3891cb0ef41Sopenharmony_ci out.reserveAppendCapacity(++node); 3901cb0ef41Sopenharmony_ci getNextBranchUChars(pos, node, out); 3911cb0ef41Sopenharmony_ci return node; 3921cb0ef41Sopenharmony_ci } else { 3931cb0ef41Sopenharmony_ci // First unit of the linear-match node. 3941cb0ef41Sopenharmony_ci out.appendCodeUnit(*pos); 3951cb0ef41Sopenharmony_ci return 1; 3961cb0ef41Sopenharmony_ci } 3971cb0ef41Sopenharmony_ci} 3981cb0ef41Sopenharmony_ci 3991cb0ef41Sopenharmony_civoid 4001cb0ef41Sopenharmony_ciUCharsTrie::getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out) { 4011cb0ef41Sopenharmony_ci while(length>kMaxBranchLinearSubNodeLength) { 4021cb0ef41Sopenharmony_ci ++pos; // ignore the comparison unit 4031cb0ef41Sopenharmony_ci getNextBranchUChars(jumpByDelta(pos), length>>1, out); 4041cb0ef41Sopenharmony_ci length=length-(length>>1); 4051cb0ef41Sopenharmony_ci pos=skipDelta(pos); 4061cb0ef41Sopenharmony_ci } 4071cb0ef41Sopenharmony_ci do { 4081cb0ef41Sopenharmony_ci out.appendCodeUnit(*pos++); 4091cb0ef41Sopenharmony_ci pos=skipValue(pos); 4101cb0ef41Sopenharmony_ci } while(--length>1); 4111cb0ef41Sopenharmony_ci out.appendCodeUnit(*pos); 4121cb0ef41Sopenharmony_ci} 4131cb0ef41Sopenharmony_ci 4141cb0ef41Sopenharmony_ciU_NAMESPACE_END 415