11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 31cb0ef41Sopenharmony_ci/* 41cb0ef41Sopenharmony_ci******************************************************************************* 51cb0ef41Sopenharmony_ci* Copyright (C) 2013-2015, International Business Machines 61cb0ef41Sopenharmony_ci* Corporation and others. All Rights Reserved. 71cb0ef41Sopenharmony_ci******************************************************************************* 81cb0ef41Sopenharmony_ci* collationfastlatin.cpp 91cb0ef41Sopenharmony_ci* 101cb0ef41Sopenharmony_ci* created on: 2013aug18 111cb0ef41Sopenharmony_ci* created by: Markus W. Scherer 121cb0ef41Sopenharmony_ci*/ 131cb0ef41Sopenharmony_ci 141cb0ef41Sopenharmony_ci#include "unicode/utypes.h" 151cb0ef41Sopenharmony_ci 161cb0ef41Sopenharmony_ci#if !UCONFIG_NO_COLLATION 171cb0ef41Sopenharmony_ci 181cb0ef41Sopenharmony_ci#include "unicode/ucol.h" 191cb0ef41Sopenharmony_ci#include "collationdata.h" 201cb0ef41Sopenharmony_ci#include "collationfastlatin.h" 211cb0ef41Sopenharmony_ci#include "collationsettings.h" 221cb0ef41Sopenharmony_ci#include "uassert.h" 231cb0ef41Sopenharmony_ci 241cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN 251cb0ef41Sopenharmony_ci 261cb0ef41Sopenharmony_ciint32_t 271cb0ef41Sopenharmony_ciCollationFastLatin::getOptions(const CollationData *data, const CollationSettings &settings, 281cb0ef41Sopenharmony_ci uint16_t *primaries, int32_t capacity) { 291cb0ef41Sopenharmony_ci const uint16_t *table = data->fastLatinTable; 301cb0ef41Sopenharmony_ci if(table == nullptr) { return -1; } 311cb0ef41Sopenharmony_ci U_ASSERT(capacity == LATIN_LIMIT); 321cb0ef41Sopenharmony_ci if(capacity != LATIN_LIMIT) { return -1; } 331cb0ef41Sopenharmony_ci 341cb0ef41Sopenharmony_ci uint32_t miniVarTop; 351cb0ef41Sopenharmony_ci if((settings.options & CollationSettings::ALTERNATE_MASK) == 0) { 361cb0ef41Sopenharmony_ci // No mini primaries are variable, set a variableTop just below the 371cb0ef41Sopenharmony_ci // lowest long mini primary. 381cb0ef41Sopenharmony_ci miniVarTop = MIN_LONG - 1; 391cb0ef41Sopenharmony_ci } else { 401cb0ef41Sopenharmony_ci int32_t headerLength = *table & 0xff; 411cb0ef41Sopenharmony_ci int32_t i = 1 + settings.getMaxVariable(); 421cb0ef41Sopenharmony_ci if(i >= headerLength) { 431cb0ef41Sopenharmony_ci return -1; // variableTop >= digits, should not occur 441cb0ef41Sopenharmony_ci } 451cb0ef41Sopenharmony_ci miniVarTop = table[i]; 461cb0ef41Sopenharmony_ci } 471cb0ef41Sopenharmony_ci 481cb0ef41Sopenharmony_ci UBool digitsAreReordered = false; 491cb0ef41Sopenharmony_ci if(settings.hasReordering()) { 501cb0ef41Sopenharmony_ci uint32_t prevStart = 0; 511cb0ef41Sopenharmony_ci uint32_t beforeDigitStart = 0; 521cb0ef41Sopenharmony_ci uint32_t digitStart = 0; 531cb0ef41Sopenharmony_ci uint32_t afterDigitStart = 0; 541cb0ef41Sopenharmony_ci for(int32_t group = UCOL_REORDER_CODE_FIRST; 551cb0ef41Sopenharmony_ci group < UCOL_REORDER_CODE_FIRST + CollationData::MAX_NUM_SPECIAL_REORDER_CODES; 561cb0ef41Sopenharmony_ci ++group) { 571cb0ef41Sopenharmony_ci uint32_t start = data->getFirstPrimaryForGroup(group); 581cb0ef41Sopenharmony_ci start = settings.reorder(start); 591cb0ef41Sopenharmony_ci if(group == UCOL_REORDER_CODE_DIGIT) { 601cb0ef41Sopenharmony_ci beforeDigitStart = prevStart; 611cb0ef41Sopenharmony_ci digitStart = start; 621cb0ef41Sopenharmony_ci } else if(start != 0) { 631cb0ef41Sopenharmony_ci if(start < prevStart) { 641cb0ef41Sopenharmony_ci // The permutation affects the groups up to Latin. 651cb0ef41Sopenharmony_ci return -1; 661cb0ef41Sopenharmony_ci } 671cb0ef41Sopenharmony_ci // In the future, there might be a special group between digits & Latin. 681cb0ef41Sopenharmony_ci if(digitStart != 0 && afterDigitStart == 0 && prevStart == beforeDigitStart) { 691cb0ef41Sopenharmony_ci afterDigitStart = start; 701cb0ef41Sopenharmony_ci } 711cb0ef41Sopenharmony_ci prevStart = start; 721cb0ef41Sopenharmony_ci } 731cb0ef41Sopenharmony_ci } 741cb0ef41Sopenharmony_ci uint32_t latinStart = data->getFirstPrimaryForGroup(USCRIPT_LATIN); 751cb0ef41Sopenharmony_ci latinStart = settings.reorder(latinStart); 761cb0ef41Sopenharmony_ci if(latinStart < prevStart) { 771cb0ef41Sopenharmony_ci return -1; 781cb0ef41Sopenharmony_ci } 791cb0ef41Sopenharmony_ci if(afterDigitStart == 0) { 801cb0ef41Sopenharmony_ci afterDigitStart = latinStart; 811cb0ef41Sopenharmony_ci } 821cb0ef41Sopenharmony_ci if(!(beforeDigitStart < digitStart && digitStart < afterDigitStart)) { 831cb0ef41Sopenharmony_ci digitsAreReordered = true; 841cb0ef41Sopenharmony_ci } 851cb0ef41Sopenharmony_ci } 861cb0ef41Sopenharmony_ci 871cb0ef41Sopenharmony_ci table += (table[0] & 0xff); // skip the header 881cb0ef41Sopenharmony_ci for(UChar32 c = 0; c < LATIN_LIMIT; ++c) { 891cb0ef41Sopenharmony_ci uint32_t p = table[c]; 901cb0ef41Sopenharmony_ci if(p >= MIN_SHORT) { 911cb0ef41Sopenharmony_ci p &= SHORT_PRIMARY_MASK; 921cb0ef41Sopenharmony_ci } else if(p > miniVarTop) { 931cb0ef41Sopenharmony_ci p &= LONG_PRIMARY_MASK; 941cb0ef41Sopenharmony_ci } else { 951cb0ef41Sopenharmony_ci p = 0; 961cb0ef41Sopenharmony_ci } 971cb0ef41Sopenharmony_ci primaries[c] = (uint16_t)p; 981cb0ef41Sopenharmony_ci } 991cb0ef41Sopenharmony_ci if(digitsAreReordered || (settings.options & CollationSettings::NUMERIC) != 0) { 1001cb0ef41Sopenharmony_ci // Bail out for digits. 1011cb0ef41Sopenharmony_ci for(UChar32 c = 0x30; c <= 0x39; ++c) { primaries[c] = 0; } 1021cb0ef41Sopenharmony_ci } 1031cb0ef41Sopenharmony_ci 1041cb0ef41Sopenharmony_ci // Shift the miniVarTop above other options. 1051cb0ef41Sopenharmony_ci return ((int32_t)miniVarTop << 16) | settings.options; 1061cb0ef41Sopenharmony_ci} 1071cb0ef41Sopenharmony_ci 1081cb0ef41Sopenharmony_ciint32_t 1091cb0ef41Sopenharmony_ciCollationFastLatin::compareUTF16(const uint16_t *table, const uint16_t *primaries, int32_t options, 1101cb0ef41Sopenharmony_ci const char16_t *left, int32_t leftLength, 1111cb0ef41Sopenharmony_ci const char16_t *right, int32_t rightLength) { 1121cb0ef41Sopenharmony_ci // This is a modified copy of CollationCompare::compareUpToQuaternary(), 1131cb0ef41Sopenharmony_ci // optimized for common Latin text. 1141cb0ef41Sopenharmony_ci // Keep them in sync! 1151cb0ef41Sopenharmony_ci // Keep compareUTF16() and compareUTF8() in sync very closely! 1161cb0ef41Sopenharmony_ci 1171cb0ef41Sopenharmony_ci U_ASSERT((table[0] >> 8) == VERSION); 1181cb0ef41Sopenharmony_ci table += (table[0] & 0xff); // skip the header 1191cb0ef41Sopenharmony_ci uint32_t variableTop = (uint32_t)options >> 16; // see getOptions() 1201cb0ef41Sopenharmony_ci options &= 0xffff; // needed for CollationSettings::getStrength() to work 1211cb0ef41Sopenharmony_ci 1221cb0ef41Sopenharmony_ci // Check for supported characters, fetch mini CEs, and compare primaries. 1231cb0ef41Sopenharmony_ci int32_t leftIndex = 0, rightIndex = 0; 1241cb0ef41Sopenharmony_ci /** 1251cb0ef41Sopenharmony_ci * Single mini CE or a pair. 1261cb0ef41Sopenharmony_ci * The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits. 1271cb0ef41Sopenharmony_ci * If there is only one, then it is in the lower bits, and the upper bits are 0. 1281cb0ef41Sopenharmony_ci */ 1291cb0ef41Sopenharmony_ci uint32_t leftPair = 0, rightPair = 0; 1301cb0ef41Sopenharmony_ci for(;;) { 1311cb0ef41Sopenharmony_ci // We fetch CEs until we get a non-ignorable primary or reach the end. 1321cb0ef41Sopenharmony_ci while(leftPair == 0) { 1331cb0ef41Sopenharmony_ci if(leftIndex == leftLength) { 1341cb0ef41Sopenharmony_ci leftPair = EOS; 1351cb0ef41Sopenharmony_ci break; 1361cb0ef41Sopenharmony_ci } 1371cb0ef41Sopenharmony_ci UChar32 c = left[leftIndex++]; 1381cb0ef41Sopenharmony_ci if(c <= LATIN_MAX) { 1391cb0ef41Sopenharmony_ci leftPair = primaries[c]; 1401cb0ef41Sopenharmony_ci if(leftPair != 0) { break; } 1411cb0ef41Sopenharmony_ci if(c <= 0x39 && c >= 0x30 && (options & CollationSettings::NUMERIC) != 0) { 1421cb0ef41Sopenharmony_ci return BAIL_OUT_RESULT; 1431cb0ef41Sopenharmony_ci } 1441cb0ef41Sopenharmony_ci leftPair = table[c]; 1451cb0ef41Sopenharmony_ci } else if(PUNCT_START <= c && c < PUNCT_LIMIT) { 1461cb0ef41Sopenharmony_ci leftPair = table[c - PUNCT_START + LATIN_LIMIT]; 1471cb0ef41Sopenharmony_ci } else { 1481cb0ef41Sopenharmony_ci leftPair = lookup(table, c); 1491cb0ef41Sopenharmony_ci } 1501cb0ef41Sopenharmony_ci if(leftPair >= MIN_SHORT) { 1511cb0ef41Sopenharmony_ci leftPair &= SHORT_PRIMARY_MASK; 1521cb0ef41Sopenharmony_ci break; 1531cb0ef41Sopenharmony_ci } else if(leftPair > variableTop) { 1541cb0ef41Sopenharmony_ci leftPair &= LONG_PRIMARY_MASK; 1551cb0ef41Sopenharmony_ci break; 1561cb0ef41Sopenharmony_ci } else { 1571cb0ef41Sopenharmony_ci leftPair = nextPair(table, c, leftPair, left, nullptr, leftIndex, leftLength); 1581cb0ef41Sopenharmony_ci if(leftPair == BAIL_OUT) { return BAIL_OUT_RESULT; } 1591cb0ef41Sopenharmony_ci leftPair = getPrimaries(variableTop, leftPair); 1601cb0ef41Sopenharmony_ci } 1611cb0ef41Sopenharmony_ci } 1621cb0ef41Sopenharmony_ci 1631cb0ef41Sopenharmony_ci while(rightPair == 0) { 1641cb0ef41Sopenharmony_ci if(rightIndex == rightLength) { 1651cb0ef41Sopenharmony_ci rightPair = EOS; 1661cb0ef41Sopenharmony_ci break; 1671cb0ef41Sopenharmony_ci } 1681cb0ef41Sopenharmony_ci UChar32 c = right[rightIndex++]; 1691cb0ef41Sopenharmony_ci if(c <= LATIN_MAX) { 1701cb0ef41Sopenharmony_ci rightPair = primaries[c]; 1711cb0ef41Sopenharmony_ci if(rightPair != 0) { break; } 1721cb0ef41Sopenharmony_ci if(c <= 0x39 && c >= 0x30 && (options & CollationSettings::NUMERIC) != 0) { 1731cb0ef41Sopenharmony_ci return BAIL_OUT_RESULT; 1741cb0ef41Sopenharmony_ci } 1751cb0ef41Sopenharmony_ci rightPair = table[c]; 1761cb0ef41Sopenharmony_ci } else if(PUNCT_START <= c && c < PUNCT_LIMIT) { 1771cb0ef41Sopenharmony_ci rightPair = table[c - PUNCT_START + LATIN_LIMIT]; 1781cb0ef41Sopenharmony_ci } else { 1791cb0ef41Sopenharmony_ci rightPair = lookup(table, c); 1801cb0ef41Sopenharmony_ci } 1811cb0ef41Sopenharmony_ci if(rightPair >= MIN_SHORT) { 1821cb0ef41Sopenharmony_ci rightPair &= SHORT_PRIMARY_MASK; 1831cb0ef41Sopenharmony_ci break; 1841cb0ef41Sopenharmony_ci } else if(rightPair > variableTop) { 1851cb0ef41Sopenharmony_ci rightPair &= LONG_PRIMARY_MASK; 1861cb0ef41Sopenharmony_ci break; 1871cb0ef41Sopenharmony_ci } else { 1881cb0ef41Sopenharmony_ci rightPair = nextPair(table, c, rightPair, right, nullptr, rightIndex, rightLength); 1891cb0ef41Sopenharmony_ci if(rightPair == BAIL_OUT) { return BAIL_OUT_RESULT; } 1901cb0ef41Sopenharmony_ci rightPair = getPrimaries(variableTop, rightPair); 1911cb0ef41Sopenharmony_ci } 1921cb0ef41Sopenharmony_ci } 1931cb0ef41Sopenharmony_ci 1941cb0ef41Sopenharmony_ci if(leftPair == rightPair) { 1951cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 1961cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 1971cb0ef41Sopenharmony_ci continue; 1981cb0ef41Sopenharmony_ci } 1991cb0ef41Sopenharmony_ci uint32_t leftPrimary = leftPair & 0xffff; 2001cb0ef41Sopenharmony_ci uint32_t rightPrimary = rightPair & 0xffff; 2011cb0ef41Sopenharmony_ci if(leftPrimary != rightPrimary) { 2021cb0ef41Sopenharmony_ci // Return the primary difference. 2031cb0ef41Sopenharmony_ci return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER; 2041cb0ef41Sopenharmony_ci } 2051cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 2061cb0ef41Sopenharmony_ci leftPair >>= 16; 2071cb0ef41Sopenharmony_ci rightPair >>= 16; 2081cb0ef41Sopenharmony_ci } 2091cb0ef41Sopenharmony_ci // In the following, we need to re-fetch each character because we did not buffer the CEs, 2101cb0ef41Sopenharmony_ci // but we know that the string is well-formed and 2111cb0ef41Sopenharmony_ci // only contains supported characters and mappings. 2121cb0ef41Sopenharmony_ci 2131cb0ef41Sopenharmony_ci // We might skip the secondary level but continue with the case level 2141cb0ef41Sopenharmony_ci // which is turned on separately. 2151cb0ef41Sopenharmony_ci if(CollationSettings::getStrength(options) >= UCOL_SECONDARY) { 2161cb0ef41Sopenharmony_ci leftIndex = rightIndex = 0; 2171cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 2181cb0ef41Sopenharmony_ci for(;;) { 2191cb0ef41Sopenharmony_ci while(leftPair == 0) { 2201cb0ef41Sopenharmony_ci if(leftIndex == leftLength) { 2211cb0ef41Sopenharmony_ci leftPair = EOS; 2221cb0ef41Sopenharmony_ci break; 2231cb0ef41Sopenharmony_ci } 2241cb0ef41Sopenharmony_ci UChar32 c = left[leftIndex++]; 2251cb0ef41Sopenharmony_ci if(c <= LATIN_MAX) { 2261cb0ef41Sopenharmony_ci leftPair = table[c]; 2271cb0ef41Sopenharmony_ci } else if(PUNCT_START <= c && c < PUNCT_LIMIT) { 2281cb0ef41Sopenharmony_ci leftPair = table[c - PUNCT_START + LATIN_LIMIT]; 2291cb0ef41Sopenharmony_ci } else { 2301cb0ef41Sopenharmony_ci leftPair = lookup(table, c); 2311cb0ef41Sopenharmony_ci } 2321cb0ef41Sopenharmony_ci if(leftPair >= MIN_SHORT) { 2331cb0ef41Sopenharmony_ci leftPair = getSecondariesFromOneShortCE(leftPair); 2341cb0ef41Sopenharmony_ci break; 2351cb0ef41Sopenharmony_ci } else if(leftPair > variableTop) { 2361cb0ef41Sopenharmony_ci leftPair = COMMON_SEC_PLUS_OFFSET; 2371cb0ef41Sopenharmony_ci break; 2381cb0ef41Sopenharmony_ci } else { 2391cb0ef41Sopenharmony_ci leftPair = nextPair(table, c, leftPair, left, nullptr, leftIndex, leftLength); 2401cb0ef41Sopenharmony_ci leftPair = getSecondaries(variableTop, leftPair); 2411cb0ef41Sopenharmony_ci } 2421cb0ef41Sopenharmony_ci } 2431cb0ef41Sopenharmony_ci 2441cb0ef41Sopenharmony_ci while(rightPair == 0) { 2451cb0ef41Sopenharmony_ci if(rightIndex == rightLength) { 2461cb0ef41Sopenharmony_ci rightPair = EOS; 2471cb0ef41Sopenharmony_ci break; 2481cb0ef41Sopenharmony_ci } 2491cb0ef41Sopenharmony_ci UChar32 c = right[rightIndex++]; 2501cb0ef41Sopenharmony_ci if(c <= LATIN_MAX) { 2511cb0ef41Sopenharmony_ci rightPair = table[c]; 2521cb0ef41Sopenharmony_ci } else if(PUNCT_START <= c && c < PUNCT_LIMIT) { 2531cb0ef41Sopenharmony_ci rightPair = table[c - PUNCT_START + LATIN_LIMIT]; 2541cb0ef41Sopenharmony_ci } else { 2551cb0ef41Sopenharmony_ci rightPair = lookup(table, c); 2561cb0ef41Sopenharmony_ci } 2571cb0ef41Sopenharmony_ci if(rightPair >= MIN_SHORT) { 2581cb0ef41Sopenharmony_ci rightPair = getSecondariesFromOneShortCE(rightPair); 2591cb0ef41Sopenharmony_ci break; 2601cb0ef41Sopenharmony_ci } else if(rightPair > variableTop) { 2611cb0ef41Sopenharmony_ci rightPair = COMMON_SEC_PLUS_OFFSET; 2621cb0ef41Sopenharmony_ci break; 2631cb0ef41Sopenharmony_ci } else { 2641cb0ef41Sopenharmony_ci rightPair = nextPair(table, c, rightPair, right, nullptr, rightIndex, rightLength); 2651cb0ef41Sopenharmony_ci rightPair = getSecondaries(variableTop, rightPair); 2661cb0ef41Sopenharmony_ci } 2671cb0ef41Sopenharmony_ci } 2681cb0ef41Sopenharmony_ci 2691cb0ef41Sopenharmony_ci if(leftPair == rightPair) { 2701cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 2711cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 2721cb0ef41Sopenharmony_ci continue; 2731cb0ef41Sopenharmony_ci } 2741cb0ef41Sopenharmony_ci uint32_t leftSecondary = leftPair & 0xffff; 2751cb0ef41Sopenharmony_ci uint32_t rightSecondary = rightPair & 0xffff; 2761cb0ef41Sopenharmony_ci if(leftSecondary != rightSecondary) { 2771cb0ef41Sopenharmony_ci if((options & CollationSettings::BACKWARD_SECONDARY) != 0) { 2781cb0ef41Sopenharmony_ci // Full support for backwards secondary requires backwards contraction matching 2791cb0ef41Sopenharmony_ci // and moving backwards between merge separators. 2801cb0ef41Sopenharmony_ci return BAIL_OUT_RESULT; 2811cb0ef41Sopenharmony_ci } 2821cb0ef41Sopenharmony_ci return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER; 2831cb0ef41Sopenharmony_ci } 2841cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 2851cb0ef41Sopenharmony_ci leftPair >>= 16; 2861cb0ef41Sopenharmony_ci rightPair >>= 16; 2871cb0ef41Sopenharmony_ci } 2881cb0ef41Sopenharmony_ci } 2891cb0ef41Sopenharmony_ci 2901cb0ef41Sopenharmony_ci if((options & CollationSettings::CASE_LEVEL) != 0) { 2911cb0ef41Sopenharmony_ci UBool strengthIsPrimary = CollationSettings::getStrength(options) == UCOL_PRIMARY; 2921cb0ef41Sopenharmony_ci leftIndex = rightIndex = 0; 2931cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 2941cb0ef41Sopenharmony_ci for(;;) { 2951cb0ef41Sopenharmony_ci while(leftPair == 0) { 2961cb0ef41Sopenharmony_ci if(leftIndex == leftLength) { 2971cb0ef41Sopenharmony_ci leftPair = EOS; 2981cb0ef41Sopenharmony_ci break; 2991cb0ef41Sopenharmony_ci } 3001cb0ef41Sopenharmony_ci UChar32 c = left[leftIndex++]; 3011cb0ef41Sopenharmony_ci leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c); 3021cb0ef41Sopenharmony_ci if(leftPair < MIN_LONG) { 3031cb0ef41Sopenharmony_ci leftPair = nextPair(table, c, leftPair, left, nullptr, leftIndex, leftLength); 3041cb0ef41Sopenharmony_ci } 3051cb0ef41Sopenharmony_ci leftPair = getCases(variableTop, strengthIsPrimary, leftPair); 3061cb0ef41Sopenharmony_ci } 3071cb0ef41Sopenharmony_ci 3081cb0ef41Sopenharmony_ci while(rightPair == 0) { 3091cb0ef41Sopenharmony_ci if(rightIndex == rightLength) { 3101cb0ef41Sopenharmony_ci rightPair = EOS; 3111cb0ef41Sopenharmony_ci break; 3121cb0ef41Sopenharmony_ci } 3131cb0ef41Sopenharmony_ci UChar32 c = right[rightIndex++]; 3141cb0ef41Sopenharmony_ci rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c); 3151cb0ef41Sopenharmony_ci if(rightPair < MIN_LONG) { 3161cb0ef41Sopenharmony_ci rightPair = nextPair(table, c, rightPair, right, nullptr, rightIndex, rightLength); 3171cb0ef41Sopenharmony_ci } 3181cb0ef41Sopenharmony_ci rightPair = getCases(variableTop, strengthIsPrimary, rightPair); 3191cb0ef41Sopenharmony_ci } 3201cb0ef41Sopenharmony_ci 3211cb0ef41Sopenharmony_ci if(leftPair == rightPair) { 3221cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 3231cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 3241cb0ef41Sopenharmony_ci continue; 3251cb0ef41Sopenharmony_ci } 3261cb0ef41Sopenharmony_ci uint32_t leftCase = leftPair & 0xffff; 3271cb0ef41Sopenharmony_ci uint32_t rightCase = rightPair & 0xffff; 3281cb0ef41Sopenharmony_ci if(leftCase != rightCase) { 3291cb0ef41Sopenharmony_ci if((options & CollationSettings::UPPER_FIRST) == 0) { 3301cb0ef41Sopenharmony_ci return (leftCase < rightCase) ? UCOL_LESS : UCOL_GREATER; 3311cb0ef41Sopenharmony_ci } else { 3321cb0ef41Sopenharmony_ci return (leftCase < rightCase) ? UCOL_GREATER : UCOL_LESS; 3331cb0ef41Sopenharmony_ci } 3341cb0ef41Sopenharmony_ci } 3351cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 3361cb0ef41Sopenharmony_ci leftPair >>= 16; 3371cb0ef41Sopenharmony_ci rightPair >>= 16; 3381cb0ef41Sopenharmony_ci } 3391cb0ef41Sopenharmony_ci } 3401cb0ef41Sopenharmony_ci if(CollationSettings::getStrength(options) <= UCOL_SECONDARY) { return UCOL_EQUAL; } 3411cb0ef41Sopenharmony_ci 3421cb0ef41Sopenharmony_ci // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off. 3431cb0ef41Sopenharmony_ci UBool withCaseBits = CollationSettings::isTertiaryWithCaseBits(options); 3441cb0ef41Sopenharmony_ci 3451cb0ef41Sopenharmony_ci leftIndex = rightIndex = 0; 3461cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 3471cb0ef41Sopenharmony_ci for(;;) { 3481cb0ef41Sopenharmony_ci while(leftPair == 0) { 3491cb0ef41Sopenharmony_ci if(leftIndex == leftLength) { 3501cb0ef41Sopenharmony_ci leftPair = EOS; 3511cb0ef41Sopenharmony_ci break; 3521cb0ef41Sopenharmony_ci } 3531cb0ef41Sopenharmony_ci UChar32 c = left[leftIndex++]; 3541cb0ef41Sopenharmony_ci leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c); 3551cb0ef41Sopenharmony_ci if(leftPair < MIN_LONG) { 3561cb0ef41Sopenharmony_ci leftPair = nextPair(table, c, leftPair, left, nullptr, leftIndex, leftLength); 3571cb0ef41Sopenharmony_ci } 3581cb0ef41Sopenharmony_ci leftPair = getTertiaries(variableTop, withCaseBits, leftPair); 3591cb0ef41Sopenharmony_ci } 3601cb0ef41Sopenharmony_ci 3611cb0ef41Sopenharmony_ci while(rightPair == 0) { 3621cb0ef41Sopenharmony_ci if(rightIndex == rightLength) { 3631cb0ef41Sopenharmony_ci rightPair = EOS; 3641cb0ef41Sopenharmony_ci break; 3651cb0ef41Sopenharmony_ci } 3661cb0ef41Sopenharmony_ci UChar32 c = right[rightIndex++]; 3671cb0ef41Sopenharmony_ci rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c); 3681cb0ef41Sopenharmony_ci if(rightPair < MIN_LONG) { 3691cb0ef41Sopenharmony_ci rightPair = nextPair(table, c, rightPair, right, nullptr, rightIndex, rightLength); 3701cb0ef41Sopenharmony_ci } 3711cb0ef41Sopenharmony_ci rightPair = getTertiaries(variableTop, withCaseBits, rightPair); 3721cb0ef41Sopenharmony_ci } 3731cb0ef41Sopenharmony_ci 3741cb0ef41Sopenharmony_ci if(leftPair == rightPair) { 3751cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 3761cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 3771cb0ef41Sopenharmony_ci continue; 3781cb0ef41Sopenharmony_ci } 3791cb0ef41Sopenharmony_ci uint32_t leftTertiary = leftPair & 0xffff; 3801cb0ef41Sopenharmony_ci uint32_t rightTertiary = rightPair & 0xffff; 3811cb0ef41Sopenharmony_ci if(leftTertiary != rightTertiary) { 3821cb0ef41Sopenharmony_ci if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) { 3831cb0ef41Sopenharmony_ci // Pass through EOS and MERGE_WEIGHT 3841cb0ef41Sopenharmony_ci // and keep real tertiary weights larger than the MERGE_WEIGHT. 3851cb0ef41Sopenharmony_ci // Tertiary CEs (secondary ignorables) are not supported in fast Latin. 3861cb0ef41Sopenharmony_ci if(leftTertiary > MERGE_WEIGHT) { 3871cb0ef41Sopenharmony_ci leftTertiary ^= CASE_MASK; 3881cb0ef41Sopenharmony_ci } 3891cb0ef41Sopenharmony_ci if(rightTertiary > MERGE_WEIGHT) { 3901cb0ef41Sopenharmony_ci rightTertiary ^= CASE_MASK; 3911cb0ef41Sopenharmony_ci } 3921cb0ef41Sopenharmony_ci } 3931cb0ef41Sopenharmony_ci return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER; 3941cb0ef41Sopenharmony_ci } 3951cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 3961cb0ef41Sopenharmony_ci leftPair >>= 16; 3971cb0ef41Sopenharmony_ci rightPair >>= 16; 3981cb0ef41Sopenharmony_ci } 3991cb0ef41Sopenharmony_ci if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_EQUAL; } 4001cb0ef41Sopenharmony_ci 4011cb0ef41Sopenharmony_ci leftIndex = rightIndex = 0; 4021cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 4031cb0ef41Sopenharmony_ci for(;;) { 4041cb0ef41Sopenharmony_ci while(leftPair == 0) { 4051cb0ef41Sopenharmony_ci if(leftIndex == leftLength) { 4061cb0ef41Sopenharmony_ci leftPair = EOS; 4071cb0ef41Sopenharmony_ci break; 4081cb0ef41Sopenharmony_ci } 4091cb0ef41Sopenharmony_ci UChar32 c = left[leftIndex++]; 4101cb0ef41Sopenharmony_ci leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c); 4111cb0ef41Sopenharmony_ci if(leftPair < MIN_LONG) { 4121cb0ef41Sopenharmony_ci leftPair = nextPair(table, c, leftPair, left, nullptr, leftIndex, leftLength); 4131cb0ef41Sopenharmony_ci } 4141cb0ef41Sopenharmony_ci leftPair = getQuaternaries(variableTop, leftPair); 4151cb0ef41Sopenharmony_ci } 4161cb0ef41Sopenharmony_ci 4171cb0ef41Sopenharmony_ci while(rightPair == 0) { 4181cb0ef41Sopenharmony_ci if(rightIndex == rightLength) { 4191cb0ef41Sopenharmony_ci rightPair = EOS; 4201cb0ef41Sopenharmony_ci break; 4211cb0ef41Sopenharmony_ci } 4221cb0ef41Sopenharmony_ci UChar32 c = right[rightIndex++]; 4231cb0ef41Sopenharmony_ci rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c); 4241cb0ef41Sopenharmony_ci if(rightPair < MIN_LONG) { 4251cb0ef41Sopenharmony_ci rightPair = nextPair(table, c, rightPair, right, nullptr, rightIndex, rightLength); 4261cb0ef41Sopenharmony_ci } 4271cb0ef41Sopenharmony_ci rightPair = getQuaternaries(variableTop, rightPair); 4281cb0ef41Sopenharmony_ci } 4291cb0ef41Sopenharmony_ci 4301cb0ef41Sopenharmony_ci if(leftPair == rightPair) { 4311cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 4321cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 4331cb0ef41Sopenharmony_ci continue; 4341cb0ef41Sopenharmony_ci } 4351cb0ef41Sopenharmony_ci uint32_t leftQuaternary = leftPair & 0xffff; 4361cb0ef41Sopenharmony_ci uint32_t rightQuaternary = rightPair & 0xffff; 4371cb0ef41Sopenharmony_ci if(leftQuaternary != rightQuaternary) { 4381cb0ef41Sopenharmony_ci return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER; 4391cb0ef41Sopenharmony_ci } 4401cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 4411cb0ef41Sopenharmony_ci leftPair >>= 16; 4421cb0ef41Sopenharmony_ci rightPair >>= 16; 4431cb0ef41Sopenharmony_ci } 4441cb0ef41Sopenharmony_ci return UCOL_EQUAL; 4451cb0ef41Sopenharmony_ci} 4461cb0ef41Sopenharmony_ci 4471cb0ef41Sopenharmony_ciint32_t 4481cb0ef41Sopenharmony_ciCollationFastLatin::compareUTF8(const uint16_t *table, const uint16_t *primaries, int32_t options, 4491cb0ef41Sopenharmony_ci const uint8_t *left, int32_t leftLength, 4501cb0ef41Sopenharmony_ci const uint8_t *right, int32_t rightLength) { 4511cb0ef41Sopenharmony_ci // Keep compareUTF16() and compareUTF8() in sync very closely! 4521cb0ef41Sopenharmony_ci 4531cb0ef41Sopenharmony_ci U_ASSERT((table[0] >> 8) == VERSION); 4541cb0ef41Sopenharmony_ci table += (table[0] & 0xff); // skip the header 4551cb0ef41Sopenharmony_ci uint32_t variableTop = (uint32_t)options >> 16; // see RuleBasedCollator::getFastLatinOptions() 4561cb0ef41Sopenharmony_ci options &= 0xffff; // needed for CollationSettings::getStrength() to work 4571cb0ef41Sopenharmony_ci 4581cb0ef41Sopenharmony_ci // Check for supported characters, fetch mini CEs, and compare primaries. 4591cb0ef41Sopenharmony_ci int32_t leftIndex = 0, rightIndex = 0; 4601cb0ef41Sopenharmony_ci /** 4611cb0ef41Sopenharmony_ci * Single mini CE or a pair. 4621cb0ef41Sopenharmony_ci * The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits. 4631cb0ef41Sopenharmony_ci * If there is only one, then it is in the lower bits, and the upper bits are 0. 4641cb0ef41Sopenharmony_ci */ 4651cb0ef41Sopenharmony_ci uint32_t leftPair = 0, rightPair = 0; 4661cb0ef41Sopenharmony_ci // Note: There is no need to assemble the code point. 4671cb0ef41Sopenharmony_ci // We only need to look up the table entry for the character, 4681cb0ef41Sopenharmony_ci // and nextPair() looks for whether c==0. 4691cb0ef41Sopenharmony_ci for(;;) { 4701cb0ef41Sopenharmony_ci // We fetch CEs until we get a non-ignorable primary or reach the end. 4711cb0ef41Sopenharmony_ci while(leftPair == 0) { 4721cb0ef41Sopenharmony_ci if(leftIndex == leftLength) { 4731cb0ef41Sopenharmony_ci leftPair = EOS; 4741cb0ef41Sopenharmony_ci break; 4751cb0ef41Sopenharmony_ci } 4761cb0ef41Sopenharmony_ci UChar32 c = left[leftIndex++]; 4771cb0ef41Sopenharmony_ci uint8_t t; 4781cb0ef41Sopenharmony_ci if(c <= 0x7f) { 4791cb0ef41Sopenharmony_ci leftPair = primaries[c]; 4801cb0ef41Sopenharmony_ci if(leftPair != 0) { break; } 4811cb0ef41Sopenharmony_ci if(c <= 0x39 && c >= 0x30 && (options & CollationSettings::NUMERIC) != 0) { 4821cb0ef41Sopenharmony_ci return BAIL_OUT_RESULT; 4831cb0ef41Sopenharmony_ci } 4841cb0ef41Sopenharmony_ci leftPair = table[c]; 4851cb0ef41Sopenharmony_ci } else if(c <= LATIN_MAX_UTF8_LEAD && 0xc2 <= c && leftIndex != leftLength && 4861cb0ef41Sopenharmony_ci 0x80 <= (t = left[leftIndex]) && t <= 0xbf) { 4871cb0ef41Sopenharmony_ci ++leftIndex; 4881cb0ef41Sopenharmony_ci c = ((c - 0xc2) << 6) + t; 4891cb0ef41Sopenharmony_ci leftPair = primaries[c]; 4901cb0ef41Sopenharmony_ci if(leftPair != 0) { break; } 4911cb0ef41Sopenharmony_ci leftPair = table[c]; 4921cb0ef41Sopenharmony_ci } else { 4931cb0ef41Sopenharmony_ci leftPair = lookupUTF8(table, c, left, leftIndex, leftLength); 4941cb0ef41Sopenharmony_ci } 4951cb0ef41Sopenharmony_ci if(leftPair >= MIN_SHORT) { 4961cb0ef41Sopenharmony_ci leftPair &= SHORT_PRIMARY_MASK; 4971cb0ef41Sopenharmony_ci break; 4981cb0ef41Sopenharmony_ci } else if(leftPair > variableTop) { 4991cb0ef41Sopenharmony_ci leftPair &= LONG_PRIMARY_MASK; 5001cb0ef41Sopenharmony_ci break; 5011cb0ef41Sopenharmony_ci } else { 5021cb0ef41Sopenharmony_ci leftPair = nextPair(table, c, leftPair, nullptr, left, leftIndex, leftLength); 5031cb0ef41Sopenharmony_ci if(leftPair == BAIL_OUT) { return BAIL_OUT_RESULT; } 5041cb0ef41Sopenharmony_ci leftPair = getPrimaries(variableTop, leftPair); 5051cb0ef41Sopenharmony_ci } 5061cb0ef41Sopenharmony_ci } 5071cb0ef41Sopenharmony_ci 5081cb0ef41Sopenharmony_ci while(rightPair == 0) { 5091cb0ef41Sopenharmony_ci if(rightIndex == rightLength) { 5101cb0ef41Sopenharmony_ci rightPair = EOS; 5111cb0ef41Sopenharmony_ci break; 5121cb0ef41Sopenharmony_ci } 5131cb0ef41Sopenharmony_ci UChar32 c = right[rightIndex++]; 5141cb0ef41Sopenharmony_ci uint8_t t; 5151cb0ef41Sopenharmony_ci if(c <= 0x7f) { 5161cb0ef41Sopenharmony_ci rightPair = primaries[c]; 5171cb0ef41Sopenharmony_ci if(rightPair != 0) { break; } 5181cb0ef41Sopenharmony_ci if(c <= 0x39 && c >= 0x30 && (options & CollationSettings::NUMERIC) != 0) { 5191cb0ef41Sopenharmony_ci return BAIL_OUT_RESULT; 5201cb0ef41Sopenharmony_ci } 5211cb0ef41Sopenharmony_ci rightPair = table[c]; 5221cb0ef41Sopenharmony_ci } else if(c <= LATIN_MAX_UTF8_LEAD && 0xc2 <= c && rightIndex != rightLength && 5231cb0ef41Sopenharmony_ci 0x80 <= (t = right[rightIndex]) && t <= 0xbf) { 5241cb0ef41Sopenharmony_ci ++rightIndex; 5251cb0ef41Sopenharmony_ci c = ((c - 0xc2) << 6) + t; 5261cb0ef41Sopenharmony_ci rightPair = primaries[c]; 5271cb0ef41Sopenharmony_ci if(rightPair != 0) { break; } 5281cb0ef41Sopenharmony_ci rightPair = table[c]; 5291cb0ef41Sopenharmony_ci } else { 5301cb0ef41Sopenharmony_ci rightPair = lookupUTF8(table, c, right, rightIndex, rightLength); 5311cb0ef41Sopenharmony_ci } 5321cb0ef41Sopenharmony_ci if(rightPair >= MIN_SHORT) { 5331cb0ef41Sopenharmony_ci rightPair &= SHORT_PRIMARY_MASK; 5341cb0ef41Sopenharmony_ci break; 5351cb0ef41Sopenharmony_ci } else if(rightPair > variableTop) { 5361cb0ef41Sopenharmony_ci rightPair &= LONG_PRIMARY_MASK; 5371cb0ef41Sopenharmony_ci break; 5381cb0ef41Sopenharmony_ci } else { 5391cb0ef41Sopenharmony_ci rightPair = nextPair(table, c, rightPair, nullptr, right, rightIndex, rightLength); 5401cb0ef41Sopenharmony_ci if(rightPair == BAIL_OUT) { return BAIL_OUT_RESULT; } 5411cb0ef41Sopenharmony_ci rightPair = getPrimaries(variableTop, rightPair); 5421cb0ef41Sopenharmony_ci } 5431cb0ef41Sopenharmony_ci } 5441cb0ef41Sopenharmony_ci 5451cb0ef41Sopenharmony_ci if(leftPair == rightPair) { 5461cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 5471cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 5481cb0ef41Sopenharmony_ci continue; 5491cb0ef41Sopenharmony_ci } 5501cb0ef41Sopenharmony_ci uint32_t leftPrimary = leftPair & 0xffff; 5511cb0ef41Sopenharmony_ci uint32_t rightPrimary = rightPair & 0xffff; 5521cb0ef41Sopenharmony_ci if(leftPrimary != rightPrimary) { 5531cb0ef41Sopenharmony_ci // Return the primary difference. 5541cb0ef41Sopenharmony_ci return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER; 5551cb0ef41Sopenharmony_ci } 5561cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 5571cb0ef41Sopenharmony_ci leftPair >>= 16; 5581cb0ef41Sopenharmony_ci rightPair >>= 16; 5591cb0ef41Sopenharmony_ci } 5601cb0ef41Sopenharmony_ci // In the following, we need to re-fetch each character because we did not buffer the CEs, 5611cb0ef41Sopenharmony_ci // but we know that the string is well-formed and 5621cb0ef41Sopenharmony_ci // only contains supported characters and mappings. 5631cb0ef41Sopenharmony_ci 5641cb0ef41Sopenharmony_ci // We might skip the secondary level but continue with the case level 5651cb0ef41Sopenharmony_ci // which is turned on separately. 5661cb0ef41Sopenharmony_ci if(CollationSettings::getStrength(options) >= UCOL_SECONDARY) { 5671cb0ef41Sopenharmony_ci leftIndex = rightIndex = 0; 5681cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 5691cb0ef41Sopenharmony_ci for(;;) { 5701cb0ef41Sopenharmony_ci while(leftPair == 0) { 5711cb0ef41Sopenharmony_ci if(leftIndex == leftLength) { 5721cb0ef41Sopenharmony_ci leftPair = EOS; 5731cb0ef41Sopenharmony_ci break; 5741cb0ef41Sopenharmony_ci } 5751cb0ef41Sopenharmony_ci UChar32 c = left[leftIndex++]; 5761cb0ef41Sopenharmony_ci if(c <= 0x7f) { 5771cb0ef41Sopenharmony_ci leftPair = table[c]; 5781cb0ef41Sopenharmony_ci } else if(c <= LATIN_MAX_UTF8_LEAD) { 5791cb0ef41Sopenharmony_ci leftPair = table[((c - 0xc2) << 6) + left[leftIndex++]]; 5801cb0ef41Sopenharmony_ci } else { 5811cb0ef41Sopenharmony_ci leftPair = lookupUTF8Unsafe(table, c, left, leftIndex); 5821cb0ef41Sopenharmony_ci } 5831cb0ef41Sopenharmony_ci if(leftPair >= MIN_SHORT) { 5841cb0ef41Sopenharmony_ci leftPair = getSecondariesFromOneShortCE(leftPair); 5851cb0ef41Sopenharmony_ci break; 5861cb0ef41Sopenharmony_ci } else if(leftPair > variableTop) { 5871cb0ef41Sopenharmony_ci leftPair = COMMON_SEC_PLUS_OFFSET; 5881cb0ef41Sopenharmony_ci break; 5891cb0ef41Sopenharmony_ci } else { 5901cb0ef41Sopenharmony_ci leftPair = nextPair(table, c, leftPair, nullptr, left, leftIndex, leftLength); 5911cb0ef41Sopenharmony_ci leftPair = getSecondaries(variableTop, leftPair); 5921cb0ef41Sopenharmony_ci } 5931cb0ef41Sopenharmony_ci } 5941cb0ef41Sopenharmony_ci 5951cb0ef41Sopenharmony_ci while(rightPair == 0) { 5961cb0ef41Sopenharmony_ci if(rightIndex == rightLength) { 5971cb0ef41Sopenharmony_ci rightPair = EOS; 5981cb0ef41Sopenharmony_ci break; 5991cb0ef41Sopenharmony_ci } 6001cb0ef41Sopenharmony_ci UChar32 c = right[rightIndex++]; 6011cb0ef41Sopenharmony_ci if(c <= 0x7f) { 6021cb0ef41Sopenharmony_ci rightPair = table[c]; 6031cb0ef41Sopenharmony_ci } else if(c <= LATIN_MAX_UTF8_LEAD) { 6041cb0ef41Sopenharmony_ci rightPair = table[((c - 0xc2) << 6) + right[rightIndex++]]; 6051cb0ef41Sopenharmony_ci } else { 6061cb0ef41Sopenharmony_ci rightPair = lookupUTF8Unsafe(table, c, right, rightIndex); 6071cb0ef41Sopenharmony_ci } 6081cb0ef41Sopenharmony_ci if(rightPair >= MIN_SHORT) { 6091cb0ef41Sopenharmony_ci rightPair = getSecondariesFromOneShortCE(rightPair); 6101cb0ef41Sopenharmony_ci break; 6111cb0ef41Sopenharmony_ci } else if(rightPair > variableTop) { 6121cb0ef41Sopenharmony_ci rightPair = COMMON_SEC_PLUS_OFFSET; 6131cb0ef41Sopenharmony_ci break; 6141cb0ef41Sopenharmony_ci } else { 6151cb0ef41Sopenharmony_ci rightPair = nextPair(table, c, rightPair, nullptr, right, rightIndex, rightLength); 6161cb0ef41Sopenharmony_ci rightPair = getSecondaries(variableTop, rightPair); 6171cb0ef41Sopenharmony_ci } 6181cb0ef41Sopenharmony_ci } 6191cb0ef41Sopenharmony_ci 6201cb0ef41Sopenharmony_ci if(leftPair == rightPair) { 6211cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 6221cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 6231cb0ef41Sopenharmony_ci continue; 6241cb0ef41Sopenharmony_ci } 6251cb0ef41Sopenharmony_ci uint32_t leftSecondary = leftPair & 0xffff; 6261cb0ef41Sopenharmony_ci uint32_t rightSecondary = rightPair & 0xffff; 6271cb0ef41Sopenharmony_ci if(leftSecondary != rightSecondary) { 6281cb0ef41Sopenharmony_ci if((options & CollationSettings::BACKWARD_SECONDARY) != 0) { 6291cb0ef41Sopenharmony_ci // Full support for backwards secondary requires backwards contraction matching 6301cb0ef41Sopenharmony_ci // and moving backwards between merge separators. 6311cb0ef41Sopenharmony_ci return BAIL_OUT_RESULT; 6321cb0ef41Sopenharmony_ci } 6331cb0ef41Sopenharmony_ci return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER; 6341cb0ef41Sopenharmony_ci } 6351cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 6361cb0ef41Sopenharmony_ci leftPair >>= 16; 6371cb0ef41Sopenharmony_ci rightPair >>= 16; 6381cb0ef41Sopenharmony_ci } 6391cb0ef41Sopenharmony_ci } 6401cb0ef41Sopenharmony_ci 6411cb0ef41Sopenharmony_ci if((options & CollationSettings::CASE_LEVEL) != 0) { 6421cb0ef41Sopenharmony_ci UBool strengthIsPrimary = CollationSettings::getStrength(options) == UCOL_PRIMARY; 6431cb0ef41Sopenharmony_ci leftIndex = rightIndex = 0; 6441cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 6451cb0ef41Sopenharmony_ci for(;;) { 6461cb0ef41Sopenharmony_ci while(leftPair == 0) { 6471cb0ef41Sopenharmony_ci if(leftIndex == leftLength) { 6481cb0ef41Sopenharmony_ci leftPair = EOS; 6491cb0ef41Sopenharmony_ci break; 6501cb0ef41Sopenharmony_ci } 6511cb0ef41Sopenharmony_ci UChar32 c = left[leftIndex++]; 6521cb0ef41Sopenharmony_ci leftPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, left, leftIndex); 6531cb0ef41Sopenharmony_ci if(leftPair < MIN_LONG) { 6541cb0ef41Sopenharmony_ci leftPair = nextPair(table, c, leftPair, nullptr, left, leftIndex, leftLength); 6551cb0ef41Sopenharmony_ci } 6561cb0ef41Sopenharmony_ci leftPair = getCases(variableTop, strengthIsPrimary, leftPair); 6571cb0ef41Sopenharmony_ci } 6581cb0ef41Sopenharmony_ci 6591cb0ef41Sopenharmony_ci while(rightPair == 0) { 6601cb0ef41Sopenharmony_ci if(rightIndex == rightLength) { 6611cb0ef41Sopenharmony_ci rightPair = EOS; 6621cb0ef41Sopenharmony_ci break; 6631cb0ef41Sopenharmony_ci } 6641cb0ef41Sopenharmony_ci UChar32 c = right[rightIndex++]; 6651cb0ef41Sopenharmony_ci rightPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, right, rightIndex); 6661cb0ef41Sopenharmony_ci if(rightPair < MIN_LONG) { 6671cb0ef41Sopenharmony_ci rightPair = nextPair(table, c, rightPair, nullptr, right, rightIndex, rightLength); 6681cb0ef41Sopenharmony_ci } 6691cb0ef41Sopenharmony_ci rightPair = getCases(variableTop, strengthIsPrimary, rightPair); 6701cb0ef41Sopenharmony_ci } 6711cb0ef41Sopenharmony_ci 6721cb0ef41Sopenharmony_ci if(leftPair == rightPair) { 6731cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 6741cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 6751cb0ef41Sopenharmony_ci continue; 6761cb0ef41Sopenharmony_ci } 6771cb0ef41Sopenharmony_ci uint32_t leftCase = leftPair & 0xffff; 6781cb0ef41Sopenharmony_ci uint32_t rightCase = rightPair & 0xffff; 6791cb0ef41Sopenharmony_ci if(leftCase != rightCase) { 6801cb0ef41Sopenharmony_ci if((options & CollationSettings::UPPER_FIRST) == 0) { 6811cb0ef41Sopenharmony_ci return (leftCase < rightCase) ? UCOL_LESS : UCOL_GREATER; 6821cb0ef41Sopenharmony_ci } else { 6831cb0ef41Sopenharmony_ci return (leftCase < rightCase) ? UCOL_GREATER : UCOL_LESS; 6841cb0ef41Sopenharmony_ci } 6851cb0ef41Sopenharmony_ci } 6861cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 6871cb0ef41Sopenharmony_ci leftPair >>= 16; 6881cb0ef41Sopenharmony_ci rightPair >>= 16; 6891cb0ef41Sopenharmony_ci } 6901cb0ef41Sopenharmony_ci } 6911cb0ef41Sopenharmony_ci if(CollationSettings::getStrength(options) <= UCOL_SECONDARY) { return UCOL_EQUAL; } 6921cb0ef41Sopenharmony_ci 6931cb0ef41Sopenharmony_ci // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off. 6941cb0ef41Sopenharmony_ci UBool withCaseBits = CollationSettings::isTertiaryWithCaseBits(options); 6951cb0ef41Sopenharmony_ci 6961cb0ef41Sopenharmony_ci leftIndex = rightIndex = 0; 6971cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 6981cb0ef41Sopenharmony_ci for(;;) { 6991cb0ef41Sopenharmony_ci while(leftPair == 0) { 7001cb0ef41Sopenharmony_ci if(leftIndex == leftLength) { 7011cb0ef41Sopenharmony_ci leftPair = EOS; 7021cb0ef41Sopenharmony_ci break; 7031cb0ef41Sopenharmony_ci } 7041cb0ef41Sopenharmony_ci UChar32 c = left[leftIndex++]; 7051cb0ef41Sopenharmony_ci leftPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, left, leftIndex); 7061cb0ef41Sopenharmony_ci if(leftPair < MIN_LONG) { 7071cb0ef41Sopenharmony_ci leftPair = nextPair(table, c, leftPair, nullptr, left, leftIndex, leftLength); 7081cb0ef41Sopenharmony_ci } 7091cb0ef41Sopenharmony_ci leftPair = getTertiaries(variableTop, withCaseBits, leftPair); 7101cb0ef41Sopenharmony_ci } 7111cb0ef41Sopenharmony_ci 7121cb0ef41Sopenharmony_ci while(rightPair == 0) { 7131cb0ef41Sopenharmony_ci if(rightIndex == rightLength) { 7141cb0ef41Sopenharmony_ci rightPair = EOS; 7151cb0ef41Sopenharmony_ci break; 7161cb0ef41Sopenharmony_ci } 7171cb0ef41Sopenharmony_ci UChar32 c = right[rightIndex++]; 7181cb0ef41Sopenharmony_ci rightPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, right, rightIndex); 7191cb0ef41Sopenharmony_ci if(rightPair < MIN_LONG) { 7201cb0ef41Sopenharmony_ci rightPair = nextPair(table, c, rightPair, nullptr, right, rightIndex, rightLength); 7211cb0ef41Sopenharmony_ci } 7221cb0ef41Sopenharmony_ci rightPair = getTertiaries(variableTop, withCaseBits, rightPair); 7231cb0ef41Sopenharmony_ci } 7241cb0ef41Sopenharmony_ci 7251cb0ef41Sopenharmony_ci if(leftPair == rightPair) { 7261cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 7271cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 7281cb0ef41Sopenharmony_ci continue; 7291cb0ef41Sopenharmony_ci } 7301cb0ef41Sopenharmony_ci uint32_t leftTertiary = leftPair & 0xffff; 7311cb0ef41Sopenharmony_ci uint32_t rightTertiary = rightPair & 0xffff; 7321cb0ef41Sopenharmony_ci if(leftTertiary != rightTertiary) { 7331cb0ef41Sopenharmony_ci if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) { 7341cb0ef41Sopenharmony_ci // Pass through EOS and MERGE_WEIGHT 7351cb0ef41Sopenharmony_ci // and keep real tertiary weights larger than the MERGE_WEIGHT. 7361cb0ef41Sopenharmony_ci // Tertiary CEs (secondary ignorables) are not supported in fast Latin. 7371cb0ef41Sopenharmony_ci if(leftTertiary > MERGE_WEIGHT) { 7381cb0ef41Sopenharmony_ci leftTertiary ^= CASE_MASK; 7391cb0ef41Sopenharmony_ci } 7401cb0ef41Sopenharmony_ci if(rightTertiary > MERGE_WEIGHT) { 7411cb0ef41Sopenharmony_ci rightTertiary ^= CASE_MASK; 7421cb0ef41Sopenharmony_ci } 7431cb0ef41Sopenharmony_ci } 7441cb0ef41Sopenharmony_ci return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER; 7451cb0ef41Sopenharmony_ci } 7461cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 7471cb0ef41Sopenharmony_ci leftPair >>= 16; 7481cb0ef41Sopenharmony_ci rightPair >>= 16; 7491cb0ef41Sopenharmony_ci } 7501cb0ef41Sopenharmony_ci if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_EQUAL; } 7511cb0ef41Sopenharmony_ci 7521cb0ef41Sopenharmony_ci leftIndex = rightIndex = 0; 7531cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 7541cb0ef41Sopenharmony_ci for(;;) { 7551cb0ef41Sopenharmony_ci while(leftPair == 0) { 7561cb0ef41Sopenharmony_ci if(leftIndex == leftLength) { 7571cb0ef41Sopenharmony_ci leftPair = EOS; 7581cb0ef41Sopenharmony_ci break; 7591cb0ef41Sopenharmony_ci } 7601cb0ef41Sopenharmony_ci UChar32 c = left[leftIndex++]; 7611cb0ef41Sopenharmony_ci leftPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, left, leftIndex); 7621cb0ef41Sopenharmony_ci if(leftPair < MIN_LONG) { 7631cb0ef41Sopenharmony_ci leftPair = nextPair(table, c, leftPair, nullptr, left, leftIndex, leftLength); 7641cb0ef41Sopenharmony_ci } 7651cb0ef41Sopenharmony_ci leftPair = getQuaternaries(variableTop, leftPair); 7661cb0ef41Sopenharmony_ci } 7671cb0ef41Sopenharmony_ci 7681cb0ef41Sopenharmony_ci while(rightPair == 0) { 7691cb0ef41Sopenharmony_ci if(rightIndex == rightLength) { 7701cb0ef41Sopenharmony_ci rightPair = EOS; 7711cb0ef41Sopenharmony_ci break; 7721cb0ef41Sopenharmony_ci } 7731cb0ef41Sopenharmony_ci UChar32 c = right[rightIndex++]; 7741cb0ef41Sopenharmony_ci rightPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, right, rightIndex); 7751cb0ef41Sopenharmony_ci if(rightPair < MIN_LONG) { 7761cb0ef41Sopenharmony_ci rightPair = nextPair(table, c, rightPair, nullptr, right, rightIndex, rightLength); 7771cb0ef41Sopenharmony_ci } 7781cb0ef41Sopenharmony_ci rightPair = getQuaternaries(variableTop, rightPair); 7791cb0ef41Sopenharmony_ci } 7801cb0ef41Sopenharmony_ci 7811cb0ef41Sopenharmony_ci if(leftPair == rightPair) { 7821cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 7831cb0ef41Sopenharmony_ci leftPair = rightPair = 0; 7841cb0ef41Sopenharmony_ci continue; 7851cb0ef41Sopenharmony_ci } 7861cb0ef41Sopenharmony_ci uint32_t leftQuaternary = leftPair & 0xffff; 7871cb0ef41Sopenharmony_ci uint32_t rightQuaternary = rightPair & 0xffff; 7881cb0ef41Sopenharmony_ci if(leftQuaternary != rightQuaternary) { 7891cb0ef41Sopenharmony_ci return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER; 7901cb0ef41Sopenharmony_ci } 7911cb0ef41Sopenharmony_ci if(leftPair == EOS) { break; } 7921cb0ef41Sopenharmony_ci leftPair >>= 16; 7931cb0ef41Sopenharmony_ci rightPair >>= 16; 7941cb0ef41Sopenharmony_ci } 7951cb0ef41Sopenharmony_ci return UCOL_EQUAL; 7961cb0ef41Sopenharmony_ci} 7971cb0ef41Sopenharmony_ci 7981cb0ef41Sopenharmony_ciuint32_t 7991cb0ef41Sopenharmony_ciCollationFastLatin::lookup(const uint16_t *table, UChar32 c) { 8001cb0ef41Sopenharmony_ci U_ASSERT(c > LATIN_MAX); 8011cb0ef41Sopenharmony_ci if(PUNCT_START <= c && c < PUNCT_LIMIT) { 8021cb0ef41Sopenharmony_ci return table[c - PUNCT_START + LATIN_LIMIT]; 8031cb0ef41Sopenharmony_ci } else if(c == 0xfffe) { 8041cb0ef41Sopenharmony_ci return MERGE_WEIGHT; 8051cb0ef41Sopenharmony_ci } else if(c == 0xffff) { 8061cb0ef41Sopenharmony_ci return MAX_SHORT | COMMON_SEC | LOWER_CASE | COMMON_TER; 8071cb0ef41Sopenharmony_ci } else { 8081cb0ef41Sopenharmony_ci return BAIL_OUT; 8091cb0ef41Sopenharmony_ci } 8101cb0ef41Sopenharmony_ci} 8111cb0ef41Sopenharmony_ci 8121cb0ef41Sopenharmony_ciuint32_t 8131cb0ef41Sopenharmony_ciCollationFastLatin::lookupUTF8(const uint16_t *table, UChar32 c, 8141cb0ef41Sopenharmony_ci const uint8_t *s8, int32_t &sIndex, int32_t sLength) { 8151cb0ef41Sopenharmony_ci // The caller handled ASCII and valid/supported Latin. 8161cb0ef41Sopenharmony_ci U_ASSERT(c > 0x7f); 8171cb0ef41Sopenharmony_ci int32_t i2 = sIndex + 1; 8181cb0ef41Sopenharmony_ci if(i2 < sLength || sLength < 0) { 8191cb0ef41Sopenharmony_ci uint8_t t1 = s8[sIndex]; 8201cb0ef41Sopenharmony_ci uint8_t t2 = s8[i2]; 8211cb0ef41Sopenharmony_ci sIndex += 2; 8221cb0ef41Sopenharmony_ci if(c == 0xe2 && t1 == 0x80 && 0x80 <= t2 && t2 <= 0xbf) { 8231cb0ef41Sopenharmony_ci return table[(LATIN_LIMIT - 0x80) + t2]; // 2000..203F -> 0180..01BF 8241cb0ef41Sopenharmony_ci } else if(c == 0xef && t1 == 0xbf) { 8251cb0ef41Sopenharmony_ci if(t2 == 0xbe) { 8261cb0ef41Sopenharmony_ci return MERGE_WEIGHT; // U+FFFE 8271cb0ef41Sopenharmony_ci } else if(t2 == 0xbf) { 8281cb0ef41Sopenharmony_ci return MAX_SHORT | COMMON_SEC | LOWER_CASE | COMMON_TER; // U+FFFF 8291cb0ef41Sopenharmony_ci } 8301cb0ef41Sopenharmony_ci } 8311cb0ef41Sopenharmony_ci } 8321cb0ef41Sopenharmony_ci return BAIL_OUT; 8331cb0ef41Sopenharmony_ci} 8341cb0ef41Sopenharmony_ci 8351cb0ef41Sopenharmony_ciuint32_t 8361cb0ef41Sopenharmony_ciCollationFastLatin::lookupUTF8Unsafe(const uint16_t *table, UChar32 c, 8371cb0ef41Sopenharmony_ci const uint8_t *s8, int32_t &sIndex) { 8381cb0ef41Sopenharmony_ci // The caller handled ASCII. 8391cb0ef41Sopenharmony_ci // The string is well-formed and contains only supported characters. 8401cb0ef41Sopenharmony_ci U_ASSERT(c > 0x7f); 8411cb0ef41Sopenharmony_ci if(c <= LATIN_MAX_UTF8_LEAD) { 8421cb0ef41Sopenharmony_ci return table[((c - 0xc2) << 6) + s8[sIndex++]]; // 0080..017F 8431cb0ef41Sopenharmony_ci } 8441cb0ef41Sopenharmony_ci uint8_t t2 = s8[sIndex + 1]; 8451cb0ef41Sopenharmony_ci sIndex += 2; 8461cb0ef41Sopenharmony_ci if(c == 0xe2) { 8471cb0ef41Sopenharmony_ci return table[(LATIN_LIMIT - 0x80) + t2]; // 2000..203F -> 0180..01BF 8481cb0ef41Sopenharmony_ci } else if(t2 == 0xbe) { 8491cb0ef41Sopenharmony_ci return MERGE_WEIGHT; // U+FFFE 8501cb0ef41Sopenharmony_ci } else { 8511cb0ef41Sopenharmony_ci return MAX_SHORT | COMMON_SEC | LOWER_CASE | COMMON_TER; // U+FFFF 8521cb0ef41Sopenharmony_ci } 8531cb0ef41Sopenharmony_ci} 8541cb0ef41Sopenharmony_ci 8551cb0ef41Sopenharmony_ciuint32_t 8561cb0ef41Sopenharmony_ciCollationFastLatin::nextPair(const uint16_t *table, UChar32 c, uint32_t ce, 8571cb0ef41Sopenharmony_ci const char16_t *s16, const uint8_t *s8, int32_t &sIndex, int32_t &sLength) { 8581cb0ef41Sopenharmony_ci if(ce >= MIN_LONG || ce < CONTRACTION) { 8591cb0ef41Sopenharmony_ci return ce; // simple or special mini CE 8601cb0ef41Sopenharmony_ci } else if(ce >= EXPANSION) { 8611cb0ef41Sopenharmony_ci int32_t index = NUM_FAST_CHARS + (ce & INDEX_MASK); 8621cb0ef41Sopenharmony_ci return ((uint32_t)table[index + 1] << 16) | table[index]; 8631cb0ef41Sopenharmony_ci } else /* ce >= CONTRACTION */ { 8641cb0ef41Sopenharmony_ci if(c == 0 && sLength < 0) { 8651cb0ef41Sopenharmony_ci sLength = sIndex - 1; 8661cb0ef41Sopenharmony_ci return EOS; 8671cb0ef41Sopenharmony_ci } 8681cb0ef41Sopenharmony_ci // Contraction list: Default mapping followed by 8691cb0ef41Sopenharmony_ci // 0 or more single-character contraction suffix mappings. 8701cb0ef41Sopenharmony_ci int32_t index = NUM_FAST_CHARS + (ce & INDEX_MASK); 8711cb0ef41Sopenharmony_ci if(sIndex != sLength) { 8721cb0ef41Sopenharmony_ci // Read the next character. 8731cb0ef41Sopenharmony_ci int32_t c2; 8741cb0ef41Sopenharmony_ci int32_t nextIndex = sIndex; 8751cb0ef41Sopenharmony_ci if(s16 != nullptr) { 8761cb0ef41Sopenharmony_ci c2 = s16[nextIndex++]; 8771cb0ef41Sopenharmony_ci if(c2 > LATIN_MAX) { 8781cb0ef41Sopenharmony_ci if(PUNCT_START <= c2 && c2 < PUNCT_LIMIT) { 8791cb0ef41Sopenharmony_ci c2 = c2 - PUNCT_START + LATIN_LIMIT; // 2000..203F -> 0180..01BF 8801cb0ef41Sopenharmony_ci } else if(c2 == 0xfffe || c2 == 0xffff) { 8811cb0ef41Sopenharmony_ci c2 = -1; // U+FFFE & U+FFFF cannot occur in contractions. 8821cb0ef41Sopenharmony_ci } else { 8831cb0ef41Sopenharmony_ci return BAIL_OUT; 8841cb0ef41Sopenharmony_ci } 8851cb0ef41Sopenharmony_ci } 8861cb0ef41Sopenharmony_ci } else { 8871cb0ef41Sopenharmony_ci c2 = s8[nextIndex++]; 8881cb0ef41Sopenharmony_ci if(c2 > 0x7f) { 8891cb0ef41Sopenharmony_ci uint8_t t; 8901cb0ef41Sopenharmony_ci if(c2 <= 0xc5 && 0xc2 <= c2 && nextIndex != sLength && 8911cb0ef41Sopenharmony_ci 0x80 <= (t = s8[nextIndex]) && t <= 0xbf) { 8921cb0ef41Sopenharmony_ci c2 = ((c2 - 0xc2) << 6) + t; // 0080..017F 8931cb0ef41Sopenharmony_ci ++nextIndex; 8941cb0ef41Sopenharmony_ci } else { 8951cb0ef41Sopenharmony_ci int32_t i2 = nextIndex + 1; 8961cb0ef41Sopenharmony_ci if(i2 < sLength || sLength < 0) { 8971cb0ef41Sopenharmony_ci if(c2 == 0xe2 && s8[nextIndex] == 0x80 && 8981cb0ef41Sopenharmony_ci 0x80 <= (t = s8[i2]) && t <= 0xbf) { 8991cb0ef41Sopenharmony_ci c2 = (LATIN_LIMIT - 0x80) + t; // 2000..203F -> 0180..01BF 9001cb0ef41Sopenharmony_ci } else if(c2 == 0xef && s8[nextIndex] == 0xbf && 9011cb0ef41Sopenharmony_ci ((t = s8[i2]) == 0xbe || t == 0xbf)) { 9021cb0ef41Sopenharmony_ci c2 = -1; // U+FFFE & U+FFFF cannot occur in contractions. 9031cb0ef41Sopenharmony_ci } else { 9041cb0ef41Sopenharmony_ci return BAIL_OUT; 9051cb0ef41Sopenharmony_ci } 9061cb0ef41Sopenharmony_ci } else { 9071cb0ef41Sopenharmony_ci return BAIL_OUT; 9081cb0ef41Sopenharmony_ci } 9091cb0ef41Sopenharmony_ci nextIndex += 2; 9101cb0ef41Sopenharmony_ci } 9111cb0ef41Sopenharmony_ci } 9121cb0ef41Sopenharmony_ci } 9131cb0ef41Sopenharmony_ci if(c2 == 0 && sLength < 0) { 9141cb0ef41Sopenharmony_ci sLength = sIndex; 9151cb0ef41Sopenharmony_ci c2 = -1; 9161cb0ef41Sopenharmony_ci } 9171cb0ef41Sopenharmony_ci // Look for the next character in the contraction suffix list, 9181cb0ef41Sopenharmony_ci // which is in ascending order of single suffix characters. 9191cb0ef41Sopenharmony_ci int32_t i = index; 9201cb0ef41Sopenharmony_ci int32_t head = table[i]; // first skip the default mapping 9211cb0ef41Sopenharmony_ci int32_t x; 9221cb0ef41Sopenharmony_ci do { 9231cb0ef41Sopenharmony_ci i += head >> CONTR_LENGTH_SHIFT; 9241cb0ef41Sopenharmony_ci head = table[i]; 9251cb0ef41Sopenharmony_ci x = head & CONTR_CHAR_MASK; 9261cb0ef41Sopenharmony_ci } while(x < c2); 9271cb0ef41Sopenharmony_ci if(x == c2) { 9281cb0ef41Sopenharmony_ci index = i; 9291cb0ef41Sopenharmony_ci sIndex = nextIndex; 9301cb0ef41Sopenharmony_ci } 9311cb0ef41Sopenharmony_ci } 9321cb0ef41Sopenharmony_ci // Return the CE or CEs for the default or contraction mapping. 9331cb0ef41Sopenharmony_ci int32_t length = table[index] >> CONTR_LENGTH_SHIFT; 9341cb0ef41Sopenharmony_ci if(length == 1) { 9351cb0ef41Sopenharmony_ci return BAIL_OUT; 9361cb0ef41Sopenharmony_ci } 9371cb0ef41Sopenharmony_ci ce = table[index + 1]; 9381cb0ef41Sopenharmony_ci if(length == 2) { 9391cb0ef41Sopenharmony_ci return ce; 9401cb0ef41Sopenharmony_ci } else { 9411cb0ef41Sopenharmony_ci return ((uint32_t)table[index + 2] << 16) | ce; 9421cb0ef41Sopenharmony_ci } 9431cb0ef41Sopenharmony_ci } 9441cb0ef41Sopenharmony_ci} 9451cb0ef41Sopenharmony_ci 9461cb0ef41Sopenharmony_ciuint32_t 9471cb0ef41Sopenharmony_ciCollationFastLatin::getSecondaries(uint32_t variableTop, uint32_t pair) { 9481cb0ef41Sopenharmony_ci if(pair <= 0xffff) { 9491cb0ef41Sopenharmony_ci // one mini CE 9501cb0ef41Sopenharmony_ci if(pair >= MIN_SHORT) { 9511cb0ef41Sopenharmony_ci pair = getSecondariesFromOneShortCE(pair); 9521cb0ef41Sopenharmony_ci } else if(pair > variableTop) { 9531cb0ef41Sopenharmony_ci pair = COMMON_SEC_PLUS_OFFSET; 9541cb0ef41Sopenharmony_ci } else if(pair >= MIN_LONG) { 9551cb0ef41Sopenharmony_ci pair = 0; // variable 9561cb0ef41Sopenharmony_ci } 9571cb0ef41Sopenharmony_ci // else special mini CE 9581cb0ef41Sopenharmony_ci } else { 9591cb0ef41Sopenharmony_ci uint32_t ce = pair & 0xffff; 9601cb0ef41Sopenharmony_ci if(ce >= MIN_SHORT) { 9611cb0ef41Sopenharmony_ci pair = (pair & TWO_SECONDARIES_MASK) + TWO_SEC_OFFSETS; 9621cb0ef41Sopenharmony_ci } else if(ce > variableTop) { 9631cb0ef41Sopenharmony_ci pair = TWO_COMMON_SEC_PLUS_OFFSET; 9641cb0ef41Sopenharmony_ci } else { 9651cb0ef41Sopenharmony_ci U_ASSERT(ce >= MIN_LONG); 9661cb0ef41Sopenharmony_ci pair = 0; // variable 9671cb0ef41Sopenharmony_ci } 9681cb0ef41Sopenharmony_ci } 9691cb0ef41Sopenharmony_ci return pair; 9701cb0ef41Sopenharmony_ci} 9711cb0ef41Sopenharmony_ci 9721cb0ef41Sopenharmony_ciuint32_t 9731cb0ef41Sopenharmony_ciCollationFastLatin::getCases(uint32_t variableTop, UBool strengthIsPrimary, uint32_t pair) { 9741cb0ef41Sopenharmony_ci // Primary+caseLevel: Ignore case level weights of primary ignorables. 9751cb0ef41Sopenharmony_ci // Otherwise: Ignore case level weights of secondary ignorables. 9761cb0ef41Sopenharmony_ci // For details see the comments in the CollationCompare class. 9771cb0ef41Sopenharmony_ci // Tertiary CEs (secondary ignorables) are not supported in fast Latin. 9781cb0ef41Sopenharmony_ci if(pair <= 0xffff) { 9791cb0ef41Sopenharmony_ci // one mini CE 9801cb0ef41Sopenharmony_ci if(pair >= MIN_SHORT) { 9811cb0ef41Sopenharmony_ci // A high secondary weight means we really have two CEs, 9821cb0ef41Sopenharmony_ci // a primary CE and a secondary CE. 9831cb0ef41Sopenharmony_ci uint32_t ce = pair; 9841cb0ef41Sopenharmony_ci pair &= CASE_MASK; // explicit weight of primary CE 9851cb0ef41Sopenharmony_ci if(!strengthIsPrimary && (ce & SECONDARY_MASK) >= MIN_SEC_HIGH) { 9861cb0ef41Sopenharmony_ci pair |= LOWER_CASE << 16; // implied weight of secondary CE 9871cb0ef41Sopenharmony_ci } 9881cb0ef41Sopenharmony_ci } else if(pair > variableTop) { 9891cb0ef41Sopenharmony_ci pair = LOWER_CASE; 9901cb0ef41Sopenharmony_ci } else if(pair >= MIN_LONG) { 9911cb0ef41Sopenharmony_ci pair = 0; // variable 9921cb0ef41Sopenharmony_ci } 9931cb0ef41Sopenharmony_ci // else special mini CE 9941cb0ef41Sopenharmony_ci } else { 9951cb0ef41Sopenharmony_ci // two mini CEs, same primary groups, neither expands like above 9961cb0ef41Sopenharmony_ci uint32_t ce = pair & 0xffff; 9971cb0ef41Sopenharmony_ci if(ce >= MIN_SHORT) { 9981cb0ef41Sopenharmony_ci if(strengthIsPrimary && (pair & (SHORT_PRIMARY_MASK << 16)) == 0) { 9991cb0ef41Sopenharmony_ci pair &= CASE_MASK; 10001cb0ef41Sopenharmony_ci } else { 10011cb0ef41Sopenharmony_ci pair &= TWO_CASES_MASK; 10021cb0ef41Sopenharmony_ci } 10031cb0ef41Sopenharmony_ci } else if(ce > variableTop) { 10041cb0ef41Sopenharmony_ci pair = TWO_LOWER_CASES; 10051cb0ef41Sopenharmony_ci } else { 10061cb0ef41Sopenharmony_ci U_ASSERT(ce >= MIN_LONG); 10071cb0ef41Sopenharmony_ci pair = 0; // variable 10081cb0ef41Sopenharmony_ci } 10091cb0ef41Sopenharmony_ci } 10101cb0ef41Sopenharmony_ci return pair; 10111cb0ef41Sopenharmony_ci} 10121cb0ef41Sopenharmony_ci 10131cb0ef41Sopenharmony_ciuint32_t 10141cb0ef41Sopenharmony_ciCollationFastLatin::getTertiaries(uint32_t variableTop, UBool withCaseBits, uint32_t pair) { 10151cb0ef41Sopenharmony_ci if(pair <= 0xffff) { 10161cb0ef41Sopenharmony_ci // one mini CE 10171cb0ef41Sopenharmony_ci if(pair >= MIN_SHORT) { 10181cb0ef41Sopenharmony_ci // A high secondary weight means we really have two CEs, 10191cb0ef41Sopenharmony_ci // a primary CE and a secondary CE. 10201cb0ef41Sopenharmony_ci uint32_t ce = pair; 10211cb0ef41Sopenharmony_ci if(withCaseBits) { 10221cb0ef41Sopenharmony_ci pair = (pair & CASE_AND_TERTIARY_MASK) + TER_OFFSET; 10231cb0ef41Sopenharmony_ci if((ce & SECONDARY_MASK) >= MIN_SEC_HIGH) { 10241cb0ef41Sopenharmony_ci pair |= (LOWER_CASE | COMMON_TER_PLUS_OFFSET) << 16; 10251cb0ef41Sopenharmony_ci } 10261cb0ef41Sopenharmony_ci } else { 10271cb0ef41Sopenharmony_ci pair = (pair & TERTIARY_MASK) + TER_OFFSET; 10281cb0ef41Sopenharmony_ci if((ce & SECONDARY_MASK) >= MIN_SEC_HIGH) { 10291cb0ef41Sopenharmony_ci pair |= COMMON_TER_PLUS_OFFSET << 16; 10301cb0ef41Sopenharmony_ci } 10311cb0ef41Sopenharmony_ci } 10321cb0ef41Sopenharmony_ci } else if(pair > variableTop) { 10331cb0ef41Sopenharmony_ci pair = (pair & TERTIARY_MASK) + TER_OFFSET; 10341cb0ef41Sopenharmony_ci if(withCaseBits) { 10351cb0ef41Sopenharmony_ci pair |= LOWER_CASE; 10361cb0ef41Sopenharmony_ci } 10371cb0ef41Sopenharmony_ci } else if(pair >= MIN_LONG) { 10381cb0ef41Sopenharmony_ci pair = 0; // variable 10391cb0ef41Sopenharmony_ci } 10401cb0ef41Sopenharmony_ci // else special mini CE 10411cb0ef41Sopenharmony_ci } else { 10421cb0ef41Sopenharmony_ci // two mini CEs, same primary groups, neither expands like above 10431cb0ef41Sopenharmony_ci uint32_t ce = pair & 0xffff; 10441cb0ef41Sopenharmony_ci if(ce >= MIN_SHORT) { 10451cb0ef41Sopenharmony_ci if(withCaseBits) { 10461cb0ef41Sopenharmony_ci pair &= TWO_CASES_MASK | TWO_TERTIARIES_MASK; 10471cb0ef41Sopenharmony_ci } else { 10481cb0ef41Sopenharmony_ci pair &= TWO_TERTIARIES_MASK; 10491cb0ef41Sopenharmony_ci } 10501cb0ef41Sopenharmony_ci pair += TWO_TER_OFFSETS; 10511cb0ef41Sopenharmony_ci } else if(ce > variableTop) { 10521cb0ef41Sopenharmony_ci pair = (pair & TWO_TERTIARIES_MASK) + TWO_TER_OFFSETS; 10531cb0ef41Sopenharmony_ci if(withCaseBits) { 10541cb0ef41Sopenharmony_ci pair |= TWO_LOWER_CASES; 10551cb0ef41Sopenharmony_ci } 10561cb0ef41Sopenharmony_ci } else { 10571cb0ef41Sopenharmony_ci U_ASSERT(ce >= MIN_LONG); 10581cb0ef41Sopenharmony_ci pair = 0; // variable 10591cb0ef41Sopenharmony_ci } 10601cb0ef41Sopenharmony_ci } 10611cb0ef41Sopenharmony_ci return pair; 10621cb0ef41Sopenharmony_ci} 10631cb0ef41Sopenharmony_ci 10641cb0ef41Sopenharmony_ciuint32_t 10651cb0ef41Sopenharmony_ciCollationFastLatin::getQuaternaries(uint32_t variableTop, uint32_t pair) { 10661cb0ef41Sopenharmony_ci // Return the primary weight of a variable CE, 10671cb0ef41Sopenharmony_ci // or the maximum primary weight for a non-variable, not-completely-ignorable CE. 10681cb0ef41Sopenharmony_ci if(pair <= 0xffff) { 10691cb0ef41Sopenharmony_ci // one mini CE 10701cb0ef41Sopenharmony_ci if(pair >= MIN_SHORT) { 10711cb0ef41Sopenharmony_ci // A high secondary weight means we really have two CEs, 10721cb0ef41Sopenharmony_ci // a primary CE and a secondary CE. 10731cb0ef41Sopenharmony_ci if((pair & SECONDARY_MASK) >= MIN_SEC_HIGH) { 10741cb0ef41Sopenharmony_ci pair = TWO_SHORT_PRIMARIES_MASK; 10751cb0ef41Sopenharmony_ci } else { 10761cb0ef41Sopenharmony_ci pair = SHORT_PRIMARY_MASK; 10771cb0ef41Sopenharmony_ci } 10781cb0ef41Sopenharmony_ci } else if(pair > variableTop) { 10791cb0ef41Sopenharmony_ci pair = SHORT_PRIMARY_MASK; 10801cb0ef41Sopenharmony_ci } else if(pair >= MIN_LONG) { 10811cb0ef41Sopenharmony_ci pair &= LONG_PRIMARY_MASK; // variable 10821cb0ef41Sopenharmony_ci } 10831cb0ef41Sopenharmony_ci // else special mini CE 10841cb0ef41Sopenharmony_ci } else { 10851cb0ef41Sopenharmony_ci // two mini CEs, same primary groups, neither expands like above 10861cb0ef41Sopenharmony_ci uint32_t ce = pair & 0xffff; 10871cb0ef41Sopenharmony_ci if(ce > variableTop) { 10881cb0ef41Sopenharmony_ci pair = TWO_SHORT_PRIMARIES_MASK; 10891cb0ef41Sopenharmony_ci } else { 10901cb0ef41Sopenharmony_ci U_ASSERT(ce >= MIN_LONG); 10911cb0ef41Sopenharmony_ci pair &= TWO_LONG_PRIMARIES_MASK; // variable 10921cb0ef41Sopenharmony_ci } 10931cb0ef41Sopenharmony_ci } 10941cb0ef41Sopenharmony_ci return pair; 10951cb0ef41Sopenharmony_ci} 10961cb0ef41Sopenharmony_ci 10971cb0ef41Sopenharmony_ciU_NAMESPACE_END 10981cb0ef41Sopenharmony_ci 10991cb0ef41Sopenharmony_ci#endif // !UCONFIG_NO_COLLATION 1100