12e5b6d6dSopenharmony_ci// © 2017 and later: Unicode, Inc. and others.
22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
32e5b6d6dSopenharmony_ci/*
42e5b6d6dSopenharmony_ci*******************************************************************************
52e5b6d6dSopenharmony_ci* Copyright (C) 2012-2015, International Business Machines
62e5b6d6dSopenharmony_ci* Corporation and others.  All Rights Reserved.
72e5b6d6dSopenharmony_ci*******************************************************************************
82e5b6d6dSopenharmony_ci* collationbasedatabuilder.h
92e5b6d6dSopenharmony_ci*
102e5b6d6dSopenharmony_ci* created on: 2012aug11
112e5b6d6dSopenharmony_ci* created by: Markus W. Scherer
122e5b6d6dSopenharmony_ci*/
132e5b6d6dSopenharmony_ci
142e5b6d6dSopenharmony_ci#ifndef __COLLATIONBASEDATABUILDER_H__
152e5b6d6dSopenharmony_ci#define __COLLATIONBASEDATABUILDER_H__
162e5b6d6dSopenharmony_ci
172e5b6d6dSopenharmony_ci#include "unicode/utypes.h"
182e5b6d6dSopenharmony_ci
192e5b6d6dSopenharmony_ci#if !UCONFIG_NO_COLLATION
202e5b6d6dSopenharmony_ci
212e5b6d6dSopenharmony_ci#include "unicode/uniset.h"
222e5b6d6dSopenharmony_ci#include "unicode/unistr.h"
232e5b6d6dSopenharmony_ci#include "unicode/uscript.h"
242e5b6d6dSopenharmony_ci#include "collation.h"
252e5b6d6dSopenharmony_ci#include "collationdata.h"
262e5b6d6dSopenharmony_ci#include "collationdatabuilder.h"
272e5b6d6dSopenharmony_ci#include "normalizer2impl.h"
282e5b6d6dSopenharmony_ci#include "utrie2.h"
292e5b6d6dSopenharmony_ci#include "uvectr32.h"
302e5b6d6dSopenharmony_ci#include "uvectr64.h"
312e5b6d6dSopenharmony_ci#include "uvector.h"
322e5b6d6dSopenharmony_ci
332e5b6d6dSopenharmony_ciU_NAMESPACE_BEGIN
342e5b6d6dSopenharmony_ci
352e5b6d6dSopenharmony_ci/**
362e5b6d6dSopenharmony_ci * Low-level base CollationData builder.
372e5b6d6dSopenharmony_ci */
382e5b6d6dSopenharmony_ciclass U_I18N_API CollationBaseDataBuilder : public CollationDataBuilder {
392e5b6d6dSopenharmony_cipublic:
402e5b6d6dSopenharmony_ci    CollationBaseDataBuilder(UBool icu4xMode, UErrorCode &errorCode);
412e5b6d6dSopenharmony_ci
422e5b6d6dSopenharmony_ci    virtual ~CollationBaseDataBuilder();
432e5b6d6dSopenharmony_ci
442e5b6d6dSopenharmony_ci    void init(UErrorCode &errorCode);
452e5b6d6dSopenharmony_ci
462e5b6d6dSopenharmony_ci    /**
472e5b6d6dSopenharmony_ci     * Sets the Han ranges as ranges of offset CE32s.
482e5b6d6dSopenharmony_ci     * Note: Unihan extension A sorts after the other BMP ranges.
492e5b6d6dSopenharmony_ci     * See http://www.unicode.org/reports/tr10/#Implicit_Weights
502e5b6d6dSopenharmony_ci     *
512e5b6d6dSopenharmony_ci     * @param ranges array of ranges of [:Unified_Ideograph:] in collation order,
522e5b6d6dSopenharmony_ci     *               as (start, end) code point pairs
532e5b6d6dSopenharmony_ci     * @param length number of code points (not pairs)
542e5b6d6dSopenharmony_ci     * @param errorCode in/out error code
552e5b6d6dSopenharmony_ci     */
562e5b6d6dSopenharmony_ci    void initHanRanges(const UChar32 ranges[], int32_t length, UErrorCode &errorCode);
572e5b6d6dSopenharmony_ci
582e5b6d6dSopenharmony_ci    void setNumericPrimary(uint32_t np) { numericPrimary = np; }
592e5b6d6dSopenharmony_ci
602e5b6d6dSopenharmony_ci    virtual UBool isCompressibleLeadByte(uint32_t b) const;
612e5b6d6dSopenharmony_ci
622e5b6d6dSopenharmony_ci    void setCompressibleLeadByte(uint32_t b);
632e5b6d6dSopenharmony_ci
642e5b6d6dSopenharmony_ci    static int32_t diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible);
652e5b6d6dSopenharmony_ci    static int32_t diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible);
662e5b6d6dSopenharmony_ci
672e5b6d6dSopenharmony_ci    virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode);
682e5b6d6dSopenharmony_ci
692e5b6d6dSopenharmony_ci    void addRootElements(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode);
702e5b6d6dSopenharmony_ci    void addRootElement(int64_t ce, UErrorCode &errorCode);
712e5b6d6dSopenharmony_ci
722e5b6d6dSopenharmony_ci    void addScriptStart(int32_t script, uint32_t p);
732e5b6d6dSopenharmony_ci
742e5b6d6dSopenharmony_ci    virtual void build(CollationData &data, UErrorCode &errorCode);
752e5b6d6dSopenharmony_ci
762e5b6d6dSopenharmony_ci    void buildRootElementsTable(UVector32 &table, UErrorCode &errorCode);
772e5b6d6dSopenharmony_ci
782e5b6d6dSopenharmony_ciprivate:
792e5b6d6dSopenharmony_ci    int32_t writeRootElementsRange(
802e5b6d6dSopenharmony_ci            uint32_t prevPrimary, uint32_t p, int32_t i,
812e5b6d6dSopenharmony_ci            UVector32 &table, UErrorCode &errorCode);
822e5b6d6dSopenharmony_ci
832e5b6d6dSopenharmony_ci    // Flags for which primary-weight lead bytes are compressible.
842e5b6d6dSopenharmony_ci    UBool compressibleBytes[256];
852e5b6d6dSopenharmony_ci    uint32_t numericPrimary;
862e5b6d6dSopenharmony_ci    uint32_t firstHanPrimary;
872e5b6d6dSopenharmony_ci    uint32_t lastHanPrimary;
882e5b6d6dSopenharmony_ci    int32_t hanStep;
892e5b6d6dSopenharmony_ci    UVector64 rootElements;
902e5b6d6dSopenharmony_ci    uint16_t scriptsIndex[USCRIPT_CODE_LIMIT + 16];  // need exactly this many
912e5b6d6dSopenharmony_ci    uint16_t scriptStarts[USCRIPT_CODE_LIMIT + 16];  // should be safely more than needed
922e5b6d6dSopenharmony_ci    int32_t scriptStartsLength;
932e5b6d6dSopenharmony_ci};
942e5b6d6dSopenharmony_ci
952e5b6d6dSopenharmony_ciU_NAMESPACE_END
962e5b6d6dSopenharmony_ci
972e5b6d6dSopenharmony_ci#endif  // !UCONFIG_NO_COLLATION
982e5b6d6dSopenharmony_ci#endif  // __COLLATIONBASEDATABUILDER_H__
99