11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 31cb0ef41Sopenharmony_ci/* 41cb0ef41Sopenharmony_ci******************************************************************************* 51cb0ef41Sopenharmony_ci* Copyright (C) 2010-2014, International Business Machines 61cb0ef41Sopenharmony_ci* Corporation and others. All Rights Reserved. 71cb0ef41Sopenharmony_ci******************************************************************************* 81cb0ef41Sopenharmony_ci* collation.cpp 91cb0ef41Sopenharmony_ci* 101cb0ef41Sopenharmony_ci* created on: 2010oct27 111cb0ef41Sopenharmony_ci* created by: Markus W. Scherer 121cb0ef41Sopenharmony_ci*/ 131cb0ef41Sopenharmony_ci 141cb0ef41Sopenharmony_ci#include "unicode/utypes.h" 151cb0ef41Sopenharmony_ci 161cb0ef41Sopenharmony_ci#if !UCONFIG_NO_COLLATION 171cb0ef41Sopenharmony_ci 181cb0ef41Sopenharmony_ci#include "collation.h" 191cb0ef41Sopenharmony_ci#include "uassert.h" 201cb0ef41Sopenharmony_ci 211cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN 221cb0ef41Sopenharmony_ci 231cb0ef41Sopenharmony_ciuint32_t 241cb0ef41Sopenharmony_ciCollation::incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) { 251cb0ef41Sopenharmony_ci // Extract the second byte, minus the minimum byte value, 261cb0ef41Sopenharmony_ci // plus the offset, modulo the number of usable byte values, plus the minimum. 271cb0ef41Sopenharmony_ci // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 281cb0ef41Sopenharmony_ci uint32_t primary; 291cb0ef41Sopenharmony_ci if(isCompressible) { 301cb0ef41Sopenharmony_ci offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4; 311cb0ef41Sopenharmony_ci primary = (uint32_t)((offset % 251) + 4) << 16; 321cb0ef41Sopenharmony_ci offset /= 251; 331cb0ef41Sopenharmony_ci } else { 341cb0ef41Sopenharmony_ci offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2; 351cb0ef41Sopenharmony_ci primary = (uint32_t)((offset % 254) + 2) << 16; 361cb0ef41Sopenharmony_ci offset /= 254; 371cb0ef41Sopenharmony_ci } 381cb0ef41Sopenharmony_ci // First byte, assume no further overflow. 391cb0ef41Sopenharmony_ci return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24)); 401cb0ef41Sopenharmony_ci} 411cb0ef41Sopenharmony_ci 421cb0ef41Sopenharmony_ciuint32_t 431cb0ef41Sopenharmony_ciCollation::incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) { 441cb0ef41Sopenharmony_ci // Extract the third byte, minus the minimum byte value, 451cb0ef41Sopenharmony_ci // plus the offset, modulo the number of usable byte values, plus the minimum. 461cb0ef41Sopenharmony_ci offset += ((int32_t)(basePrimary >> 8) & 0xff) - 2; 471cb0ef41Sopenharmony_ci uint32_t primary = (uint32_t)((offset % 254) + 2) << 8; 481cb0ef41Sopenharmony_ci offset /= 254; 491cb0ef41Sopenharmony_ci // Same with the second byte, 501cb0ef41Sopenharmony_ci // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 511cb0ef41Sopenharmony_ci if(isCompressible) { 521cb0ef41Sopenharmony_ci offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4; 531cb0ef41Sopenharmony_ci primary |= (uint32_t)((offset % 251) + 4) << 16; 541cb0ef41Sopenharmony_ci offset /= 251; 551cb0ef41Sopenharmony_ci } else { 561cb0ef41Sopenharmony_ci offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2; 571cb0ef41Sopenharmony_ci primary |= (uint32_t)((offset % 254) + 2) << 16; 581cb0ef41Sopenharmony_ci offset /= 254; 591cb0ef41Sopenharmony_ci } 601cb0ef41Sopenharmony_ci // First byte, assume no further overflow. 611cb0ef41Sopenharmony_ci return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24)); 621cb0ef41Sopenharmony_ci} 631cb0ef41Sopenharmony_ci 641cb0ef41Sopenharmony_ciuint32_t 651cb0ef41Sopenharmony_ciCollation::decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) { 661cb0ef41Sopenharmony_ci // Extract the second byte, minus the minimum byte value, 671cb0ef41Sopenharmony_ci // minus the step, modulo the number of usable byte values, plus the minimum. 681cb0ef41Sopenharmony_ci // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 691cb0ef41Sopenharmony_ci // Assume no further underflow for the first byte. 701cb0ef41Sopenharmony_ci U_ASSERT(0 < step && step <= 0x7f); 711cb0ef41Sopenharmony_ci int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - step; 721cb0ef41Sopenharmony_ci if(isCompressible) { 731cb0ef41Sopenharmony_ci if(byte2 < 4) { 741cb0ef41Sopenharmony_ci byte2 += 251; 751cb0ef41Sopenharmony_ci basePrimary -= 0x1000000; 761cb0ef41Sopenharmony_ci } 771cb0ef41Sopenharmony_ci } else { 781cb0ef41Sopenharmony_ci if(byte2 < 2) { 791cb0ef41Sopenharmony_ci byte2 += 254; 801cb0ef41Sopenharmony_ci basePrimary -= 0x1000000; 811cb0ef41Sopenharmony_ci } 821cb0ef41Sopenharmony_ci } 831cb0ef41Sopenharmony_ci return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16); 841cb0ef41Sopenharmony_ci} 851cb0ef41Sopenharmony_ci 861cb0ef41Sopenharmony_ciuint32_t 871cb0ef41Sopenharmony_ciCollation::decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) { 881cb0ef41Sopenharmony_ci // Extract the third byte, minus the minimum byte value, 891cb0ef41Sopenharmony_ci // minus the step, modulo the number of usable byte values, plus the minimum. 901cb0ef41Sopenharmony_ci U_ASSERT(0 < step && step <= 0x7f); 911cb0ef41Sopenharmony_ci int32_t byte3 = ((int32_t)(basePrimary >> 8) & 0xff) - step; 921cb0ef41Sopenharmony_ci if(byte3 >= 2) { 931cb0ef41Sopenharmony_ci return (basePrimary & 0xffff0000) | ((uint32_t)byte3 << 8); 941cb0ef41Sopenharmony_ci } 951cb0ef41Sopenharmony_ci byte3 += 254; 961cb0ef41Sopenharmony_ci // Same with the second byte, 971cb0ef41Sopenharmony_ci // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 981cb0ef41Sopenharmony_ci int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - 1; 991cb0ef41Sopenharmony_ci if(isCompressible) { 1001cb0ef41Sopenharmony_ci if(byte2 < 4) { 1011cb0ef41Sopenharmony_ci byte2 = 0xfe; 1021cb0ef41Sopenharmony_ci basePrimary -= 0x1000000; 1031cb0ef41Sopenharmony_ci } 1041cb0ef41Sopenharmony_ci } else { 1051cb0ef41Sopenharmony_ci if(byte2 < 2) { 1061cb0ef41Sopenharmony_ci byte2 = 0xff; 1071cb0ef41Sopenharmony_ci basePrimary -= 0x1000000; 1081cb0ef41Sopenharmony_ci } 1091cb0ef41Sopenharmony_ci } 1101cb0ef41Sopenharmony_ci // First byte, assume no further underflow. 1111cb0ef41Sopenharmony_ci return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16) | ((uint32_t)byte3 << 8); 1121cb0ef41Sopenharmony_ci} 1131cb0ef41Sopenharmony_ci 1141cb0ef41Sopenharmony_ciuint32_t 1151cb0ef41Sopenharmony_ciCollation::getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE) { 1161cb0ef41Sopenharmony_ci uint32_t p = (uint32_t)(dataCE >> 32); // three-byte primary pppppp00 1171cb0ef41Sopenharmony_ci int32_t lower32 = (int32_t)dataCE; // base code point b & step s: bbbbbbss (bit 7: isCompressible) 1181cb0ef41Sopenharmony_ci int32_t offset = (c - (lower32 >> 8)) * (lower32 & 0x7f); // delta * increment 1191cb0ef41Sopenharmony_ci UBool isCompressible = (lower32 & 0x80) != 0; 1201cb0ef41Sopenharmony_ci return Collation::incThreeBytePrimaryByOffset(p, isCompressible, offset); 1211cb0ef41Sopenharmony_ci} 1221cb0ef41Sopenharmony_ci 1231cb0ef41Sopenharmony_ciuint32_t 1241cb0ef41Sopenharmony_ciCollation::unassignedPrimaryFromCodePoint(UChar32 c) { 1251cb0ef41Sopenharmony_ci // Create a gap before U+0000. Use c=-1 for [first unassigned]. 1261cb0ef41Sopenharmony_ci ++c; 1271cb0ef41Sopenharmony_ci // Fourth byte: 18 values, every 14th byte value (gap of 13). 1281cb0ef41Sopenharmony_ci uint32_t primary = 2 + (c % 18) * 14; 1291cb0ef41Sopenharmony_ci c /= 18; 1301cb0ef41Sopenharmony_ci // Third byte: 254 values. 1311cb0ef41Sopenharmony_ci primary |= (2 + (c % 254)) << 8; 1321cb0ef41Sopenharmony_ci c /= 254; 1331cb0ef41Sopenharmony_ci // Second byte: 251 values 04..FE excluding the primary compression bytes. 1341cb0ef41Sopenharmony_ci primary |= (4 + (c % 251)) << 16; 1351cb0ef41Sopenharmony_ci // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18). 1361cb0ef41Sopenharmony_ci return primary | (UNASSIGNED_IMPLICIT_BYTE << 24); 1371cb0ef41Sopenharmony_ci} 1381cb0ef41Sopenharmony_ci 1391cb0ef41Sopenharmony_ciU_NAMESPACE_END 1401cb0ef41Sopenharmony_ci 1411cb0ef41Sopenharmony_ci#endif // !UCONFIG_NO_COLLATION 142