11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 31cb0ef41Sopenharmony_ci/* 41cb0ef41Sopenharmony_ci******************************************************************************* 51cb0ef41Sopenharmony_ci* Copyright (C) 2010-2011, International Business Machines 61cb0ef41Sopenharmony_ci* Corporation and others. All Rights Reserved. 71cb0ef41Sopenharmony_ci******************************************************************************* 81cb0ef41Sopenharmony_ci* file name: bytestrie.cpp 91cb0ef41Sopenharmony_ci* encoding: UTF-8 101cb0ef41Sopenharmony_ci* tab size: 8 (not used) 111cb0ef41Sopenharmony_ci* indentation:4 121cb0ef41Sopenharmony_ci* 131cb0ef41Sopenharmony_ci* created on: 2010sep25 141cb0ef41Sopenharmony_ci* created by: Markus W. Scherer 151cb0ef41Sopenharmony_ci*/ 161cb0ef41Sopenharmony_ci 171cb0ef41Sopenharmony_ci#include "unicode/utypes.h" 181cb0ef41Sopenharmony_ci#include "unicode/bytestream.h" 191cb0ef41Sopenharmony_ci#include "unicode/bytestrie.h" 201cb0ef41Sopenharmony_ci#include "unicode/uobject.h" 211cb0ef41Sopenharmony_ci#include "cmemory.h" 221cb0ef41Sopenharmony_ci#include "uassert.h" 231cb0ef41Sopenharmony_ci 241cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN 251cb0ef41Sopenharmony_ci 261cb0ef41Sopenharmony_ciBytesTrie::~BytesTrie() { 271cb0ef41Sopenharmony_ci uprv_free(ownedArray_); 281cb0ef41Sopenharmony_ci} 291cb0ef41Sopenharmony_ci 301cb0ef41Sopenharmony_ci// lead byte already shifted right by 1. 311cb0ef41Sopenharmony_ciint32_t 321cb0ef41Sopenharmony_ciBytesTrie::readValue(const uint8_t *pos, int32_t leadByte) { 331cb0ef41Sopenharmony_ci int32_t value; 341cb0ef41Sopenharmony_ci if(leadByte<kMinTwoByteValueLead) { 351cb0ef41Sopenharmony_ci value=leadByte-kMinOneByteValueLead; 361cb0ef41Sopenharmony_ci } else if(leadByte<kMinThreeByteValueLead) { 371cb0ef41Sopenharmony_ci value=((leadByte-kMinTwoByteValueLead)<<8)|*pos; 381cb0ef41Sopenharmony_ci } else if(leadByte<kFourByteValueLead) { 391cb0ef41Sopenharmony_ci value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1]; 401cb0ef41Sopenharmony_ci } else if(leadByte==kFourByteValueLead) { 411cb0ef41Sopenharmony_ci value=(pos[0]<<16)|(pos[1]<<8)|pos[2]; 421cb0ef41Sopenharmony_ci } else { 431cb0ef41Sopenharmony_ci value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3]; 441cb0ef41Sopenharmony_ci } 451cb0ef41Sopenharmony_ci return value; 461cb0ef41Sopenharmony_ci} 471cb0ef41Sopenharmony_ci 481cb0ef41Sopenharmony_ciconst uint8_t * 491cb0ef41Sopenharmony_ciBytesTrie::jumpByDelta(const uint8_t *pos) { 501cb0ef41Sopenharmony_ci int32_t delta=*pos++; 511cb0ef41Sopenharmony_ci if(delta<kMinTwoByteDeltaLead) { 521cb0ef41Sopenharmony_ci // nothing to do 531cb0ef41Sopenharmony_ci } else if(delta<kMinThreeByteDeltaLead) { 541cb0ef41Sopenharmony_ci delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++; 551cb0ef41Sopenharmony_ci } else if(delta<kFourByteDeltaLead) { 561cb0ef41Sopenharmony_ci delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1]; 571cb0ef41Sopenharmony_ci pos+=2; 581cb0ef41Sopenharmony_ci } else if(delta==kFourByteDeltaLead) { 591cb0ef41Sopenharmony_ci delta=(pos[0]<<16)|(pos[1]<<8)|pos[2]; 601cb0ef41Sopenharmony_ci pos+=3; 611cb0ef41Sopenharmony_ci } else { 621cb0ef41Sopenharmony_ci delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3]; 631cb0ef41Sopenharmony_ci pos+=4; 641cb0ef41Sopenharmony_ci } 651cb0ef41Sopenharmony_ci return pos+delta; 661cb0ef41Sopenharmony_ci} 671cb0ef41Sopenharmony_ci 681cb0ef41Sopenharmony_ciUStringTrieResult 691cb0ef41Sopenharmony_ciBytesTrie::current() const { 701cb0ef41Sopenharmony_ci const uint8_t *pos=pos_; 711cb0ef41Sopenharmony_ci if(pos==nullptr) { 721cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 731cb0ef41Sopenharmony_ci } else { 741cb0ef41Sopenharmony_ci int32_t node; 751cb0ef41Sopenharmony_ci return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ? 761cb0ef41Sopenharmony_ci valueResult(node) : USTRINGTRIE_NO_VALUE; 771cb0ef41Sopenharmony_ci } 781cb0ef41Sopenharmony_ci} 791cb0ef41Sopenharmony_ci 801cb0ef41Sopenharmony_ciUStringTrieResult 811cb0ef41Sopenharmony_ciBytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) { 821cb0ef41Sopenharmony_ci // Branch according to the current byte. 831cb0ef41Sopenharmony_ci if(length==0) { 841cb0ef41Sopenharmony_ci length=*pos++; 851cb0ef41Sopenharmony_ci } 861cb0ef41Sopenharmony_ci ++length; 871cb0ef41Sopenharmony_ci // The length of the branch is the number of bytes to select from. 881cb0ef41Sopenharmony_ci // The data structure encodes a binary search. 891cb0ef41Sopenharmony_ci while(length>kMaxBranchLinearSubNodeLength) { 901cb0ef41Sopenharmony_ci if(inByte<*pos++) { 911cb0ef41Sopenharmony_ci length>>=1; 921cb0ef41Sopenharmony_ci pos=jumpByDelta(pos); 931cb0ef41Sopenharmony_ci } else { 941cb0ef41Sopenharmony_ci length=length-(length>>1); 951cb0ef41Sopenharmony_ci pos=skipDelta(pos); 961cb0ef41Sopenharmony_ci } 971cb0ef41Sopenharmony_ci } 981cb0ef41Sopenharmony_ci // Drop down to linear search for the last few bytes. 991cb0ef41Sopenharmony_ci // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3 1001cb0ef41Sopenharmony_ci // and divides length by 2. 1011cb0ef41Sopenharmony_ci do { 1021cb0ef41Sopenharmony_ci if(inByte==*pos++) { 1031cb0ef41Sopenharmony_ci UStringTrieResult result; 1041cb0ef41Sopenharmony_ci int32_t node=*pos; 1051cb0ef41Sopenharmony_ci U_ASSERT(node>=kMinValueLead); 1061cb0ef41Sopenharmony_ci if(node&kValueIsFinal) { 1071cb0ef41Sopenharmony_ci // Leave the final value for getValue() to read. 1081cb0ef41Sopenharmony_ci result=USTRINGTRIE_FINAL_VALUE; 1091cb0ef41Sopenharmony_ci } else { 1101cb0ef41Sopenharmony_ci // Use the non-final value as the jump delta. 1111cb0ef41Sopenharmony_ci ++pos; 1121cb0ef41Sopenharmony_ci // int32_t delta=readValue(pos, node>>1); 1131cb0ef41Sopenharmony_ci node>>=1; 1141cb0ef41Sopenharmony_ci int32_t delta; 1151cb0ef41Sopenharmony_ci if(node<kMinTwoByteValueLead) { 1161cb0ef41Sopenharmony_ci delta=node-kMinOneByteValueLead; 1171cb0ef41Sopenharmony_ci } else if(node<kMinThreeByteValueLead) { 1181cb0ef41Sopenharmony_ci delta=((node-kMinTwoByteValueLead)<<8)|*pos++; 1191cb0ef41Sopenharmony_ci } else if(node<kFourByteValueLead) { 1201cb0ef41Sopenharmony_ci delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1]; 1211cb0ef41Sopenharmony_ci pos+=2; 1221cb0ef41Sopenharmony_ci } else if(node==kFourByteValueLead) { 1231cb0ef41Sopenharmony_ci delta=(pos[0]<<16)|(pos[1]<<8)|pos[2]; 1241cb0ef41Sopenharmony_ci pos+=3; 1251cb0ef41Sopenharmony_ci } else { 1261cb0ef41Sopenharmony_ci delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3]; 1271cb0ef41Sopenharmony_ci pos+=4; 1281cb0ef41Sopenharmony_ci } 1291cb0ef41Sopenharmony_ci // end readValue() 1301cb0ef41Sopenharmony_ci pos+=delta; 1311cb0ef41Sopenharmony_ci node=*pos; 1321cb0ef41Sopenharmony_ci result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; 1331cb0ef41Sopenharmony_ci } 1341cb0ef41Sopenharmony_ci pos_=pos; 1351cb0ef41Sopenharmony_ci return result; 1361cb0ef41Sopenharmony_ci } 1371cb0ef41Sopenharmony_ci --length; 1381cb0ef41Sopenharmony_ci pos=skipValue(pos); 1391cb0ef41Sopenharmony_ci } while(length>1); 1401cb0ef41Sopenharmony_ci if(inByte==*pos++) { 1411cb0ef41Sopenharmony_ci pos_=pos; 1421cb0ef41Sopenharmony_ci int32_t node=*pos; 1431cb0ef41Sopenharmony_ci return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; 1441cb0ef41Sopenharmony_ci } else { 1451cb0ef41Sopenharmony_ci stop(); 1461cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 1471cb0ef41Sopenharmony_ci } 1481cb0ef41Sopenharmony_ci} 1491cb0ef41Sopenharmony_ci 1501cb0ef41Sopenharmony_ciUStringTrieResult 1511cb0ef41Sopenharmony_ciBytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) { 1521cb0ef41Sopenharmony_ci for(;;) { 1531cb0ef41Sopenharmony_ci int32_t node=*pos++; 1541cb0ef41Sopenharmony_ci if(node<kMinLinearMatch) { 1551cb0ef41Sopenharmony_ci return branchNext(pos, node, inByte); 1561cb0ef41Sopenharmony_ci } else if(node<kMinValueLead) { 1571cb0ef41Sopenharmony_ci // Match the first of length+1 bytes. 1581cb0ef41Sopenharmony_ci int32_t length=node-kMinLinearMatch; // Actual match length minus 1. 1591cb0ef41Sopenharmony_ci if(inByte==*pos++) { 1601cb0ef41Sopenharmony_ci remainingMatchLength_=--length; 1611cb0ef41Sopenharmony_ci pos_=pos; 1621cb0ef41Sopenharmony_ci return (length<0 && (node=*pos)>=kMinValueLead) ? 1631cb0ef41Sopenharmony_ci valueResult(node) : USTRINGTRIE_NO_VALUE; 1641cb0ef41Sopenharmony_ci } else { 1651cb0ef41Sopenharmony_ci // No match. 1661cb0ef41Sopenharmony_ci break; 1671cb0ef41Sopenharmony_ci } 1681cb0ef41Sopenharmony_ci } else if(node&kValueIsFinal) { 1691cb0ef41Sopenharmony_ci // No further matching bytes. 1701cb0ef41Sopenharmony_ci break; 1711cb0ef41Sopenharmony_ci } else { 1721cb0ef41Sopenharmony_ci // Skip intermediate value. 1731cb0ef41Sopenharmony_ci pos=skipValue(pos, node); 1741cb0ef41Sopenharmony_ci // The next node must not also be a value node. 1751cb0ef41Sopenharmony_ci U_ASSERT(*pos<kMinValueLead); 1761cb0ef41Sopenharmony_ci } 1771cb0ef41Sopenharmony_ci } 1781cb0ef41Sopenharmony_ci stop(); 1791cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 1801cb0ef41Sopenharmony_ci} 1811cb0ef41Sopenharmony_ci 1821cb0ef41Sopenharmony_ciUStringTrieResult 1831cb0ef41Sopenharmony_ciBytesTrie::next(int32_t inByte) { 1841cb0ef41Sopenharmony_ci const uint8_t *pos=pos_; 1851cb0ef41Sopenharmony_ci if(pos==nullptr) { 1861cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 1871cb0ef41Sopenharmony_ci } 1881cb0ef41Sopenharmony_ci if(inByte<0) { 1891cb0ef41Sopenharmony_ci inByte+=0x100; 1901cb0ef41Sopenharmony_ci } 1911cb0ef41Sopenharmony_ci int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. 1921cb0ef41Sopenharmony_ci if(length>=0) { 1931cb0ef41Sopenharmony_ci // Remaining part of a linear-match node. 1941cb0ef41Sopenharmony_ci if(inByte==*pos++) { 1951cb0ef41Sopenharmony_ci remainingMatchLength_=--length; 1961cb0ef41Sopenharmony_ci pos_=pos; 1971cb0ef41Sopenharmony_ci int32_t node; 1981cb0ef41Sopenharmony_ci return (length<0 && (node=*pos)>=kMinValueLead) ? 1991cb0ef41Sopenharmony_ci valueResult(node) : USTRINGTRIE_NO_VALUE; 2001cb0ef41Sopenharmony_ci } else { 2011cb0ef41Sopenharmony_ci stop(); 2021cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 2031cb0ef41Sopenharmony_ci } 2041cb0ef41Sopenharmony_ci } 2051cb0ef41Sopenharmony_ci return nextImpl(pos, inByte); 2061cb0ef41Sopenharmony_ci} 2071cb0ef41Sopenharmony_ci 2081cb0ef41Sopenharmony_ciUStringTrieResult 2091cb0ef41Sopenharmony_ciBytesTrie::next(const char *s, int32_t sLength) { 2101cb0ef41Sopenharmony_ci if(sLength<0 ? *s==0 : sLength==0) { 2111cb0ef41Sopenharmony_ci // Empty input. 2121cb0ef41Sopenharmony_ci return current(); 2131cb0ef41Sopenharmony_ci } 2141cb0ef41Sopenharmony_ci const uint8_t *pos=pos_; 2151cb0ef41Sopenharmony_ci if(pos==nullptr) { 2161cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 2171cb0ef41Sopenharmony_ci } 2181cb0ef41Sopenharmony_ci int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. 2191cb0ef41Sopenharmony_ci for(;;) { 2201cb0ef41Sopenharmony_ci // Fetch the next input byte, if there is one. 2211cb0ef41Sopenharmony_ci // Continue a linear-match node without rechecking sLength<0. 2221cb0ef41Sopenharmony_ci int32_t inByte; 2231cb0ef41Sopenharmony_ci if(sLength<0) { 2241cb0ef41Sopenharmony_ci for(;;) { 2251cb0ef41Sopenharmony_ci if((inByte=*s++)==0) { 2261cb0ef41Sopenharmony_ci remainingMatchLength_=length; 2271cb0ef41Sopenharmony_ci pos_=pos; 2281cb0ef41Sopenharmony_ci int32_t node; 2291cb0ef41Sopenharmony_ci return (length<0 && (node=*pos)>=kMinValueLead) ? 2301cb0ef41Sopenharmony_ci valueResult(node) : USTRINGTRIE_NO_VALUE; 2311cb0ef41Sopenharmony_ci } 2321cb0ef41Sopenharmony_ci if(length<0) { 2331cb0ef41Sopenharmony_ci remainingMatchLength_=length; 2341cb0ef41Sopenharmony_ci break; 2351cb0ef41Sopenharmony_ci } 2361cb0ef41Sopenharmony_ci if(inByte!=*pos) { 2371cb0ef41Sopenharmony_ci stop(); 2381cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 2391cb0ef41Sopenharmony_ci } 2401cb0ef41Sopenharmony_ci ++pos; 2411cb0ef41Sopenharmony_ci --length; 2421cb0ef41Sopenharmony_ci } 2431cb0ef41Sopenharmony_ci } else { 2441cb0ef41Sopenharmony_ci for(;;) { 2451cb0ef41Sopenharmony_ci if(sLength==0) { 2461cb0ef41Sopenharmony_ci remainingMatchLength_=length; 2471cb0ef41Sopenharmony_ci pos_=pos; 2481cb0ef41Sopenharmony_ci int32_t node; 2491cb0ef41Sopenharmony_ci return (length<0 && (node=*pos)>=kMinValueLead) ? 2501cb0ef41Sopenharmony_ci valueResult(node) : USTRINGTRIE_NO_VALUE; 2511cb0ef41Sopenharmony_ci } 2521cb0ef41Sopenharmony_ci inByte=*s++; 2531cb0ef41Sopenharmony_ci --sLength; 2541cb0ef41Sopenharmony_ci if(length<0) { 2551cb0ef41Sopenharmony_ci remainingMatchLength_=length; 2561cb0ef41Sopenharmony_ci break; 2571cb0ef41Sopenharmony_ci } 2581cb0ef41Sopenharmony_ci if(inByte!=*pos) { 2591cb0ef41Sopenharmony_ci stop(); 2601cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 2611cb0ef41Sopenharmony_ci } 2621cb0ef41Sopenharmony_ci ++pos; 2631cb0ef41Sopenharmony_ci --length; 2641cb0ef41Sopenharmony_ci } 2651cb0ef41Sopenharmony_ci } 2661cb0ef41Sopenharmony_ci for(;;) { 2671cb0ef41Sopenharmony_ci int32_t node=*pos++; 2681cb0ef41Sopenharmony_ci if(node<kMinLinearMatch) { 2691cb0ef41Sopenharmony_ci UStringTrieResult result=branchNext(pos, node, inByte); 2701cb0ef41Sopenharmony_ci if(result==USTRINGTRIE_NO_MATCH) { 2711cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 2721cb0ef41Sopenharmony_ci } 2731cb0ef41Sopenharmony_ci // Fetch the next input byte, if there is one. 2741cb0ef41Sopenharmony_ci if(sLength<0) { 2751cb0ef41Sopenharmony_ci if((inByte=*s++)==0) { 2761cb0ef41Sopenharmony_ci return result; 2771cb0ef41Sopenharmony_ci } 2781cb0ef41Sopenharmony_ci } else { 2791cb0ef41Sopenharmony_ci if(sLength==0) { 2801cb0ef41Sopenharmony_ci return result; 2811cb0ef41Sopenharmony_ci } 2821cb0ef41Sopenharmony_ci inByte=*s++; 2831cb0ef41Sopenharmony_ci --sLength; 2841cb0ef41Sopenharmony_ci } 2851cb0ef41Sopenharmony_ci if(result==USTRINGTRIE_FINAL_VALUE) { 2861cb0ef41Sopenharmony_ci // No further matching bytes. 2871cb0ef41Sopenharmony_ci stop(); 2881cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 2891cb0ef41Sopenharmony_ci } 2901cb0ef41Sopenharmony_ci pos=pos_; // branchNext() advanced pos and wrote it to pos_ . 2911cb0ef41Sopenharmony_ci } else if(node<kMinValueLead) { 2921cb0ef41Sopenharmony_ci // Match length+1 bytes. 2931cb0ef41Sopenharmony_ci length=node-kMinLinearMatch; // Actual match length minus 1. 2941cb0ef41Sopenharmony_ci if(inByte!=*pos) { 2951cb0ef41Sopenharmony_ci stop(); 2961cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 2971cb0ef41Sopenharmony_ci } 2981cb0ef41Sopenharmony_ci ++pos; 2991cb0ef41Sopenharmony_ci --length; 3001cb0ef41Sopenharmony_ci break; 3011cb0ef41Sopenharmony_ci } else if(node&kValueIsFinal) { 3021cb0ef41Sopenharmony_ci // No further matching bytes. 3031cb0ef41Sopenharmony_ci stop(); 3041cb0ef41Sopenharmony_ci return USTRINGTRIE_NO_MATCH; 3051cb0ef41Sopenharmony_ci } else { 3061cb0ef41Sopenharmony_ci // Skip intermediate value. 3071cb0ef41Sopenharmony_ci pos=skipValue(pos, node); 3081cb0ef41Sopenharmony_ci // The next node must not also be a value node. 3091cb0ef41Sopenharmony_ci U_ASSERT(*pos<kMinValueLead); 3101cb0ef41Sopenharmony_ci } 3111cb0ef41Sopenharmony_ci } 3121cb0ef41Sopenharmony_ci } 3131cb0ef41Sopenharmony_ci} 3141cb0ef41Sopenharmony_ci 3151cb0ef41Sopenharmony_ciconst uint8_t * 3161cb0ef41Sopenharmony_ciBytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length, 3171cb0ef41Sopenharmony_ci UBool haveUniqueValue, int32_t &uniqueValue) { 3181cb0ef41Sopenharmony_ci while(length>kMaxBranchLinearSubNodeLength) { 3191cb0ef41Sopenharmony_ci ++pos; // ignore the comparison byte 3201cb0ef41Sopenharmony_ci if(nullptr==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) { 3211cb0ef41Sopenharmony_ci return nullptr; 3221cb0ef41Sopenharmony_ci } 3231cb0ef41Sopenharmony_ci length=length-(length>>1); 3241cb0ef41Sopenharmony_ci pos=skipDelta(pos); 3251cb0ef41Sopenharmony_ci } 3261cb0ef41Sopenharmony_ci do { 3271cb0ef41Sopenharmony_ci ++pos; // ignore a comparison byte 3281cb0ef41Sopenharmony_ci // handle its value 3291cb0ef41Sopenharmony_ci int32_t node=*pos++; 3301cb0ef41Sopenharmony_ci UBool isFinal=(UBool)(node&kValueIsFinal); 3311cb0ef41Sopenharmony_ci int32_t value=readValue(pos, node>>1); 3321cb0ef41Sopenharmony_ci pos=skipValue(pos, node); 3331cb0ef41Sopenharmony_ci if(isFinal) { 3341cb0ef41Sopenharmony_ci if(haveUniqueValue) { 3351cb0ef41Sopenharmony_ci if(value!=uniqueValue) { 3361cb0ef41Sopenharmony_ci return nullptr; 3371cb0ef41Sopenharmony_ci } 3381cb0ef41Sopenharmony_ci } else { 3391cb0ef41Sopenharmony_ci uniqueValue=value; 3401cb0ef41Sopenharmony_ci haveUniqueValue=true; 3411cb0ef41Sopenharmony_ci } 3421cb0ef41Sopenharmony_ci } else { 3431cb0ef41Sopenharmony_ci if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) { 3441cb0ef41Sopenharmony_ci return nullptr; 3451cb0ef41Sopenharmony_ci } 3461cb0ef41Sopenharmony_ci haveUniqueValue=true; 3471cb0ef41Sopenharmony_ci } 3481cb0ef41Sopenharmony_ci } while(--length>1); 3491cb0ef41Sopenharmony_ci return pos+1; // ignore the last comparison byte 3501cb0ef41Sopenharmony_ci} 3511cb0ef41Sopenharmony_ci 3521cb0ef41Sopenharmony_ciUBool 3531cb0ef41Sopenharmony_ciBytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) { 3541cb0ef41Sopenharmony_ci for(;;) { 3551cb0ef41Sopenharmony_ci int32_t node=*pos++; 3561cb0ef41Sopenharmony_ci if(node<kMinLinearMatch) { 3571cb0ef41Sopenharmony_ci if(node==0) { 3581cb0ef41Sopenharmony_ci node=*pos++; 3591cb0ef41Sopenharmony_ci } 3601cb0ef41Sopenharmony_ci pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue); 3611cb0ef41Sopenharmony_ci if(pos==nullptr) { 3621cb0ef41Sopenharmony_ci return false; 3631cb0ef41Sopenharmony_ci } 3641cb0ef41Sopenharmony_ci haveUniqueValue=true; 3651cb0ef41Sopenharmony_ci } else if(node<kMinValueLead) { 3661cb0ef41Sopenharmony_ci // linear-match node 3671cb0ef41Sopenharmony_ci pos+=node-kMinLinearMatch+1; // Ignore the match bytes. 3681cb0ef41Sopenharmony_ci } else { 3691cb0ef41Sopenharmony_ci UBool isFinal=(UBool)(node&kValueIsFinal); 3701cb0ef41Sopenharmony_ci int32_t value=readValue(pos, node>>1); 3711cb0ef41Sopenharmony_ci if(haveUniqueValue) { 3721cb0ef41Sopenharmony_ci if(value!=uniqueValue) { 3731cb0ef41Sopenharmony_ci return false; 3741cb0ef41Sopenharmony_ci } 3751cb0ef41Sopenharmony_ci } else { 3761cb0ef41Sopenharmony_ci uniqueValue=value; 3771cb0ef41Sopenharmony_ci haveUniqueValue=true; 3781cb0ef41Sopenharmony_ci } 3791cb0ef41Sopenharmony_ci if(isFinal) { 3801cb0ef41Sopenharmony_ci return true; 3811cb0ef41Sopenharmony_ci } 3821cb0ef41Sopenharmony_ci pos=skipValue(pos, node); 3831cb0ef41Sopenharmony_ci } 3841cb0ef41Sopenharmony_ci } 3851cb0ef41Sopenharmony_ci} 3861cb0ef41Sopenharmony_ci 3871cb0ef41Sopenharmony_ciint32_t 3881cb0ef41Sopenharmony_ciBytesTrie::getNextBytes(ByteSink &out) const { 3891cb0ef41Sopenharmony_ci const uint8_t *pos=pos_; 3901cb0ef41Sopenharmony_ci if(pos==nullptr) { 3911cb0ef41Sopenharmony_ci return 0; 3921cb0ef41Sopenharmony_ci } 3931cb0ef41Sopenharmony_ci if(remainingMatchLength_>=0) { 3941cb0ef41Sopenharmony_ci append(out, *pos); // Next byte of a pending linear-match node. 3951cb0ef41Sopenharmony_ci return 1; 3961cb0ef41Sopenharmony_ci } 3971cb0ef41Sopenharmony_ci int32_t node=*pos++; 3981cb0ef41Sopenharmony_ci if(node>=kMinValueLead) { 3991cb0ef41Sopenharmony_ci if(node&kValueIsFinal) { 4001cb0ef41Sopenharmony_ci return 0; 4011cb0ef41Sopenharmony_ci } else { 4021cb0ef41Sopenharmony_ci pos=skipValue(pos, node); 4031cb0ef41Sopenharmony_ci node=*pos++; 4041cb0ef41Sopenharmony_ci U_ASSERT(node<kMinValueLead); 4051cb0ef41Sopenharmony_ci } 4061cb0ef41Sopenharmony_ci } 4071cb0ef41Sopenharmony_ci if(node<kMinLinearMatch) { 4081cb0ef41Sopenharmony_ci if(node==0) { 4091cb0ef41Sopenharmony_ci node=*pos++; 4101cb0ef41Sopenharmony_ci } 4111cb0ef41Sopenharmony_ci getNextBranchBytes(pos, ++node, out); 4121cb0ef41Sopenharmony_ci return node; 4131cb0ef41Sopenharmony_ci } else { 4141cb0ef41Sopenharmony_ci // First byte of the linear-match node. 4151cb0ef41Sopenharmony_ci append(out, *pos); 4161cb0ef41Sopenharmony_ci return 1; 4171cb0ef41Sopenharmony_ci } 4181cb0ef41Sopenharmony_ci} 4191cb0ef41Sopenharmony_ci 4201cb0ef41Sopenharmony_civoid 4211cb0ef41Sopenharmony_ciBytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) { 4221cb0ef41Sopenharmony_ci while(length>kMaxBranchLinearSubNodeLength) { 4231cb0ef41Sopenharmony_ci ++pos; // ignore the comparison byte 4241cb0ef41Sopenharmony_ci getNextBranchBytes(jumpByDelta(pos), length>>1, out); 4251cb0ef41Sopenharmony_ci length=length-(length>>1); 4261cb0ef41Sopenharmony_ci pos=skipDelta(pos); 4271cb0ef41Sopenharmony_ci } 4281cb0ef41Sopenharmony_ci do { 4291cb0ef41Sopenharmony_ci append(out, *pos++); 4301cb0ef41Sopenharmony_ci pos=skipValue(pos); 4311cb0ef41Sopenharmony_ci } while(--length>1); 4321cb0ef41Sopenharmony_ci append(out, *pos); 4331cb0ef41Sopenharmony_ci} 4341cb0ef41Sopenharmony_ci 4351cb0ef41Sopenharmony_civoid 4361cb0ef41Sopenharmony_ciBytesTrie::append(ByteSink &out, int c) { 4371cb0ef41Sopenharmony_ci char ch=(char)c; 4381cb0ef41Sopenharmony_ci out.Append(&ch, 1); 4391cb0ef41Sopenharmony_ci} 4401cb0ef41Sopenharmony_ci 4411cb0ef41Sopenharmony_ciU_NAMESPACE_END 442