11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
31cb0ef41Sopenharmony_ci/*
41cb0ef41Sopenharmony_ci*******************************************************************************
51cb0ef41Sopenharmony_ci* Copyright (C) 2010-2014, International Business Machines
61cb0ef41Sopenharmony_ci* Corporation and others.  All Rights Reserved.
71cb0ef41Sopenharmony_ci*******************************************************************************
81cb0ef41Sopenharmony_ci* collation.cpp
91cb0ef41Sopenharmony_ci*
101cb0ef41Sopenharmony_ci* created on: 2010oct27
111cb0ef41Sopenharmony_ci* created by: Markus W. Scherer
121cb0ef41Sopenharmony_ci*/
131cb0ef41Sopenharmony_ci
141cb0ef41Sopenharmony_ci#include "unicode/utypes.h"
151cb0ef41Sopenharmony_ci
161cb0ef41Sopenharmony_ci#if !UCONFIG_NO_COLLATION
171cb0ef41Sopenharmony_ci
181cb0ef41Sopenharmony_ci#include "collation.h"
191cb0ef41Sopenharmony_ci#include "uassert.h"
201cb0ef41Sopenharmony_ci
211cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN
221cb0ef41Sopenharmony_ci
231cb0ef41Sopenharmony_ciuint32_t
241cb0ef41Sopenharmony_ciCollation::incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
251cb0ef41Sopenharmony_ci    // Extract the second byte, minus the minimum byte value,
261cb0ef41Sopenharmony_ci    // plus the offset, modulo the number of usable byte values, plus the minimum.
271cb0ef41Sopenharmony_ci    // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
281cb0ef41Sopenharmony_ci    uint32_t primary;
291cb0ef41Sopenharmony_ci    if(isCompressible) {
301cb0ef41Sopenharmony_ci        offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;
311cb0ef41Sopenharmony_ci        primary = (uint32_t)((offset % 251) + 4) << 16;
321cb0ef41Sopenharmony_ci        offset /= 251;
331cb0ef41Sopenharmony_ci    } else {
341cb0ef41Sopenharmony_ci        offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;
351cb0ef41Sopenharmony_ci        primary = (uint32_t)((offset % 254) + 2) << 16;
361cb0ef41Sopenharmony_ci        offset /= 254;
371cb0ef41Sopenharmony_ci    }
381cb0ef41Sopenharmony_ci    // First byte, assume no further overflow.
391cb0ef41Sopenharmony_ci    return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));
401cb0ef41Sopenharmony_ci}
411cb0ef41Sopenharmony_ci
421cb0ef41Sopenharmony_ciuint32_t
431cb0ef41Sopenharmony_ciCollation::incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
441cb0ef41Sopenharmony_ci    // Extract the third byte, minus the minimum byte value,
451cb0ef41Sopenharmony_ci    // plus the offset, modulo the number of usable byte values, plus the minimum.
461cb0ef41Sopenharmony_ci    offset += ((int32_t)(basePrimary >> 8) & 0xff) - 2;
471cb0ef41Sopenharmony_ci    uint32_t primary = (uint32_t)((offset % 254) + 2) << 8;
481cb0ef41Sopenharmony_ci    offset /= 254;
491cb0ef41Sopenharmony_ci    // Same with the second byte,
501cb0ef41Sopenharmony_ci    // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
511cb0ef41Sopenharmony_ci    if(isCompressible) {
521cb0ef41Sopenharmony_ci        offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;
531cb0ef41Sopenharmony_ci        primary |= (uint32_t)((offset % 251) + 4) << 16;
541cb0ef41Sopenharmony_ci        offset /= 251;
551cb0ef41Sopenharmony_ci    } else {
561cb0ef41Sopenharmony_ci        offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;
571cb0ef41Sopenharmony_ci        primary |= (uint32_t)((offset % 254) + 2) << 16;
581cb0ef41Sopenharmony_ci        offset /= 254;
591cb0ef41Sopenharmony_ci    }
601cb0ef41Sopenharmony_ci    // First byte, assume no further overflow.
611cb0ef41Sopenharmony_ci    return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));
621cb0ef41Sopenharmony_ci}
631cb0ef41Sopenharmony_ci
641cb0ef41Sopenharmony_ciuint32_t
651cb0ef41Sopenharmony_ciCollation::decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
661cb0ef41Sopenharmony_ci    // Extract the second byte, minus the minimum byte value,
671cb0ef41Sopenharmony_ci    // minus the step, modulo the number of usable byte values, plus the minimum.
681cb0ef41Sopenharmony_ci    // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
691cb0ef41Sopenharmony_ci    // Assume no further underflow for the first byte.
701cb0ef41Sopenharmony_ci    U_ASSERT(0 < step && step <= 0x7f);
711cb0ef41Sopenharmony_ci    int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - step;
721cb0ef41Sopenharmony_ci    if(isCompressible) {
731cb0ef41Sopenharmony_ci        if(byte2 < 4) {
741cb0ef41Sopenharmony_ci            byte2 += 251;
751cb0ef41Sopenharmony_ci            basePrimary -= 0x1000000;
761cb0ef41Sopenharmony_ci        }
771cb0ef41Sopenharmony_ci    } else {
781cb0ef41Sopenharmony_ci        if(byte2 < 2) {
791cb0ef41Sopenharmony_ci            byte2 += 254;
801cb0ef41Sopenharmony_ci            basePrimary -= 0x1000000;
811cb0ef41Sopenharmony_ci        }
821cb0ef41Sopenharmony_ci    }
831cb0ef41Sopenharmony_ci    return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16);
841cb0ef41Sopenharmony_ci}
851cb0ef41Sopenharmony_ci
861cb0ef41Sopenharmony_ciuint32_t
871cb0ef41Sopenharmony_ciCollation::decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
881cb0ef41Sopenharmony_ci    // Extract the third byte, minus the minimum byte value,
891cb0ef41Sopenharmony_ci    // minus the step, modulo the number of usable byte values, plus the minimum.
901cb0ef41Sopenharmony_ci    U_ASSERT(0 < step && step <= 0x7f);
911cb0ef41Sopenharmony_ci    int32_t byte3 = ((int32_t)(basePrimary >> 8) & 0xff) - step;
921cb0ef41Sopenharmony_ci    if(byte3 >= 2) {
931cb0ef41Sopenharmony_ci        return (basePrimary & 0xffff0000) | ((uint32_t)byte3 << 8);
941cb0ef41Sopenharmony_ci    }
951cb0ef41Sopenharmony_ci    byte3 += 254;
961cb0ef41Sopenharmony_ci    // Same with the second byte,
971cb0ef41Sopenharmony_ci    // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
981cb0ef41Sopenharmony_ci    int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - 1;
991cb0ef41Sopenharmony_ci    if(isCompressible) {
1001cb0ef41Sopenharmony_ci        if(byte2 < 4) {
1011cb0ef41Sopenharmony_ci            byte2 = 0xfe;
1021cb0ef41Sopenharmony_ci            basePrimary -= 0x1000000;
1031cb0ef41Sopenharmony_ci        }
1041cb0ef41Sopenharmony_ci    } else {
1051cb0ef41Sopenharmony_ci        if(byte2 < 2) {
1061cb0ef41Sopenharmony_ci            byte2 = 0xff;
1071cb0ef41Sopenharmony_ci            basePrimary -= 0x1000000;
1081cb0ef41Sopenharmony_ci        }
1091cb0ef41Sopenharmony_ci    }
1101cb0ef41Sopenharmony_ci    // First byte, assume no further underflow.
1111cb0ef41Sopenharmony_ci    return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16) | ((uint32_t)byte3 << 8);
1121cb0ef41Sopenharmony_ci}
1131cb0ef41Sopenharmony_ci
1141cb0ef41Sopenharmony_ciuint32_t
1151cb0ef41Sopenharmony_ciCollation::getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE) {
1161cb0ef41Sopenharmony_ci    uint32_t p = (uint32_t)(dataCE >> 32);  // three-byte primary pppppp00
1171cb0ef41Sopenharmony_ci    int32_t lower32 = (int32_t)dataCE;  // base code point b & step s: bbbbbbss (bit 7: isCompressible)
1181cb0ef41Sopenharmony_ci    int32_t offset = (c - (lower32 >> 8)) * (lower32 & 0x7f);  // delta * increment
1191cb0ef41Sopenharmony_ci    UBool isCompressible = (lower32 & 0x80) != 0;
1201cb0ef41Sopenharmony_ci    return Collation::incThreeBytePrimaryByOffset(p, isCompressible, offset);
1211cb0ef41Sopenharmony_ci}
1221cb0ef41Sopenharmony_ci
1231cb0ef41Sopenharmony_ciuint32_t
1241cb0ef41Sopenharmony_ciCollation::unassignedPrimaryFromCodePoint(UChar32 c) {
1251cb0ef41Sopenharmony_ci    // Create a gap before U+0000. Use c=-1 for [first unassigned].
1261cb0ef41Sopenharmony_ci    ++c;
1271cb0ef41Sopenharmony_ci    // Fourth byte: 18 values, every 14th byte value (gap of 13).
1281cb0ef41Sopenharmony_ci    uint32_t primary = 2 + (c % 18) * 14;
1291cb0ef41Sopenharmony_ci    c /= 18;
1301cb0ef41Sopenharmony_ci    // Third byte: 254 values.
1311cb0ef41Sopenharmony_ci    primary |= (2 + (c % 254)) << 8;
1321cb0ef41Sopenharmony_ci    c /= 254;
1331cb0ef41Sopenharmony_ci    // Second byte: 251 values 04..FE excluding the primary compression bytes.
1341cb0ef41Sopenharmony_ci    primary |= (4 + (c % 251)) << 16;
1351cb0ef41Sopenharmony_ci    // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18).
1361cb0ef41Sopenharmony_ci    return primary | (UNASSIGNED_IMPLICIT_BYTE << 24);
1371cb0ef41Sopenharmony_ci}
1381cb0ef41Sopenharmony_ci
1391cb0ef41Sopenharmony_ciU_NAMESPACE_END
1401cb0ef41Sopenharmony_ci
1411cb0ef41Sopenharmony_ci#endif  // !UCONFIG_NO_COLLATION
142