12e5b6d6dSopenharmony_ci// © 2017 and later: Unicode, Inc. and others. 22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 32e5b6d6dSopenharmony_ci/* 42e5b6d6dSopenharmony_ci******************************************************************************* 52e5b6d6dSopenharmony_ci* Copyright (C) 2012-2015, International Business Machines 62e5b6d6dSopenharmony_ci* Corporation and others. All Rights Reserved. 72e5b6d6dSopenharmony_ci******************************************************************************* 82e5b6d6dSopenharmony_ci* collationbasedatabuilder.h 92e5b6d6dSopenharmony_ci* 102e5b6d6dSopenharmony_ci* created on: 2012aug11 112e5b6d6dSopenharmony_ci* created by: Markus W. Scherer 122e5b6d6dSopenharmony_ci*/ 132e5b6d6dSopenharmony_ci 142e5b6d6dSopenharmony_ci#ifndef __COLLATIONBASEDATABUILDER_H__ 152e5b6d6dSopenharmony_ci#define __COLLATIONBASEDATABUILDER_H__ 162e5b6d6dSopenharmony_ci 172e5b6d6dSopenharmony_ci#include "unicode/utypes.h" 182e5b6d6dSopenharmony_ci 192e5b6d6dSopenharmony_ci#if !UCONFIG_NO_COLLATION 202e5b6d6dSopenharmony_ci 212e5b6d6dSopenharmony_ci#include "unicode/uniset.h" 222e5b6d6dSopenharmony_ci#include "unicode/unistr.h" 232e5b6d6dSopenharmony_ci#include "unicode/uscript.h" 242e5b6d6dSopenharmony_ci#include "collation.h" 252e5b6d6dSopenharmony_ci#include "collationdata.h" 262e5b6d6dSopenharmony_ci#include "collationdatabuilder.h" 272e5b6d6dSopenharmony_ci#include "normalizer2impl.h" 282e5b6d6dSopenharmony_ci#include "utrie2.h" 292e5b6d6dSopenharmony_ci#include "uvectr32.h" 302e5b6d6dSopenharmony_ci#include "uvectr64.h" 312e5b6d6dSopenharmony_ci#include "uvector.h" 322e5b6d6dSopenharmony_ci 332e5b6d6dSopenharmony_ciU_NAMESPACE_BEGIN 342e5b6d6dSopenharmony_ci 352e5b6d6dSopenharmony_ci/** 362e5b6d6dSopenharmony_ci * Low-level base CollationData builder. 372e5b6d6dSopenharmony_ci */ 382e5b6d6dSopenharmony_ciclass U_I18N_API CollationBaseDataBuilder : public CollationDataBuilder { 392e5b6d6dSopenharmony_cipublic: 402e5b6d6dSopenharmony_ci CollationBaseDataBuilder(UBool icu4xMode, UErrorCode &errorCode); 412e5b6d6dSopenharmony_ci 422e5b6d6dSopenharmony_ci virtual ~CollationBaseDataBuilder(); 432e5b6d6dSopenharmony_ci 442e5b6d6dSopenharmony_ci void init(UErrorCode &errorCode); 452e5b6d6dSopenharmony_ci 462e5b6d6dSopenharmony_ci /** 472e5b6d6dSopenharmony_ci * Sets the Han ranges as ranges of offset CE32s. 482e5b6d6dSopenharmony_ci * Note: Unihan extension A sorts after the other BMP ranges. 492e5b6d6dSopenharmony_ci * See http://www.unicode.org/reports/tr10/#Implicit_Weights 502e5b6d6dSopenharmony_ci * 512e5b6d6dSopenharmony_ci * @param ranges array of ranges of [:Unified_Ideograph:] in collation order, 522e5b6d6dSopenharmony_ci * as (start, end) code point pairs 532e5b6d6dSopenharmony_ci * @param length number of code points (not pairs) 542e5b6d6dSopenharmony_ci * @param errorCode in/out error code 552e5b6d6dSopenharmony_ci */ 562e5b6d6dSopenharmony_ci void initHanRanges(const UChar32 ranges[], int32_t length, UErrorCode &errorCode); 572e5b6d6dSopenharmony_ci 582e5b6d6dSopenharmony_ci void setNumericPrimary(uint32_t np) { numericPrimary = np; } 592e5b6d6dSopenharmony_ci 602e5b6d6dSopenharmony_ci virtual UBool isCompressibleLeadByte(uint32_t b) const; 612e5b6d6dSopenharmony_ci 622e5b6d6dSopenharmony_ci void setCompressibleLeadByte(uint32_t b); 632e5b6d6dSopenharmony_ci 642e5b6d6dSopenharmony_ci static int32_t diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible); 652e5b6d6dSopenharmony_ci static int32_t diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible); 662e5b6d6dSopenharmony_ci 672e5b6d6dSopenharmony_ci virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode); 682e5b6d6dSopenharmony_ci 692e5b6d6dSopenharmony_ci void addRootElements(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode); 702e5b6d6dSopenharmony_ci void addRootElement(int64_t ce, UErrorCode &errorCode); 712e5b6d6dSopenharmony_ci 722e5b6d6dSopenharmony_ci void addScriptStart(int32_t script, uint32_t p); 732e5b6d6dSopenharmony_ci 742e5b6d6dSopenharmony_ci virtual void build(CollationData &data, UErrorCode &errorCode); 752e5b6d6dSopenharmony_ci 762e5b6d6dSopenharmony_ci void buildRootElementsTable(UVector32 &table, UErrorCode &errorCode); 772e5b6d6dSopenharmony_ci 782e5b6d6dSopenharmony_ciprivate: 792e5b6d6dSopenharmony_ci int32_t writeRootElementsRange( 802e5b6d6dSopenharmony_ci uint32_t prevPrimary, uint32_t p, int32_t i, 812e5b6d6dSopenharmony_ci UVector32 &table, UErrorCode &errorCode); 822e5b6d6dSopenharmony_ci 832e5b6d6dSopenharmony_ci // Flags for which primary-weight lead bytes are compressible. 842e5b6d6dSopenharmony_ci UBool compressibleBytes[256]; 852e5b6d6dSopenharmony_ci uint32_t numericPrimary; 862e5b6d6dSopenharmony_ci uint32_t firstHanPrimary; 872e5b6d6dSopenharmony_ci uint32_t lastHanPrimary; 882e5b6d6dSopenharmony_ci int32_t hanStep; 892e5b6d6dSopenharmony_ci UVector64 rootElements; 902e5b6d6dSopenharmony_ci uint16_t scriptsIndex[USCRIPT_CODE_LIMIT + 16]; // need exactly this many 912e5b6d6dSopenharmony_ci uint16_t scriptStarts[USCRIPT_CODE_LIMIT + 16]; // should be safely more than needed 922e5b6d6dSopenharmony_ci int32_t scriptStartsLength; 932e5b6d6dSopenharmony_ci}; 942e5b6d6dSopenharmony_ci 952e5b6d6dSopenharmony_ciU_NAMESPACE_END 962e5b6d6dSopenharmony_ci 972e5b6d6dSopenharmony_ci#endif // !UCONFIG_NO_COLLATION 982e5b6d6dSopenharmony_ci#endif // __COLLATIONBASEDATABUILDER_H__ 99