1cb93a386Sopenharmony_ci// © 2019 and later: Unicode, Inc. and others.
2cb93a386Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
3cb93a386Sopenharmony_ci
4cb93a386Sopenharmony_ci// locdistance.h
5cb93a386Sopenharmony_ci// created: 2019may08 Markus W. Scherer
6cb93a386Sopenharmony_ci
7cb93a386Sopenharmony_ci#ifndef __LOCDISTANCE_H__
8cb93a386Sopenharmony_ci#define __LOCDISTANCE_H__
9cb93a386Sopenharmony_ci
10cb93a386Sopenharmony_ci#include "unicode/utypes.h"
11cb93a386Sopenharmony_ci#include "unicode/bytestrie.h"
12cb93a386Sopenharmony_ci#include "unicode/localematcher.h"
13cb93a386Sopenharmony_ci#include "unicode/locid.h"
14cb93a386Sopenharmony_ci#include "unicode/uobject.h"
15cb93a386Sopenharmony_ci#include "lsr.h"
16cb93a386Sopenharmony_ci
17cb93a386Sopenharmony_ciU_NAMESPACE_BEGIN
18cb93a386Sopenharmony_ci
19cb93a386Sopenharmony_cistruct LocaleDistanceData;
20cb93a386Sopenharmony_ci
21cb93a386Sopenharmony_ci/**
22cb93a386Sopenharmony_ci * Offline-built data for LocaleMatcher.
23cb93a386Sopenharmony_ci * Mostly but not only the data for mapping locales to their maximized forms.
24cb93a386Sopenharmony_ci */
25cb93a386Sopenharmony_ciclass LocaleDistance final : public UMemory {
26cb93a386Sopenharmony_cipublic:
27cb93a386Sopenharmony_ci    static const LocaleDistance *getSingleton(UErrorCode &errorCode);
28cb93a386Sopenharmony_ci
29cb93a386Sopenharmony_ci    static int32_t shiftDistance(int32_t distance) {
30cb93a386Sopenharmony_ci        return distance << DISTANCE_SHIFT;
31cb93a386Sopenharmony_ci    }
32cb93a386Sopenharmony_ci
33cb93a386Sopenharmony_ci    static int32_t getShiftedDistance(int32_t indexAndDistance) {
34cb93a386Sopenharmony_ci        return indexAndDistance & DISTANCE_MASK;
35cb93a386Sopenharmony_ci    }
36cb93a386Sopenharmony_ci
37cb93a386Sopenharmony_ci    static double getDistanceDouble(int32_t indexAndDistance) {
38cb93a386Sopenharmony_ci        double shiftedDistance = getShiftedDistance(indexAndDistance);
39cb93a386Sopenharmony_ci        return shiftedDistance / (1 << DISTANCE_SHIFT);
40cb93a386Sopenharmony_ci    }
41cb93a386Sopenharmony_ci
42cb93a386Sopenharmony_ci    static int32_t getDistanceFloor(int32_t indexAndDistance) {
43cb93a386Sopenharmony_ci        return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
44cb93a386Sopenharmony_ci    }
45cb93a386Sopenharmony_ci
46cb93a386Sopenharmony_ci    static int32_t getIndex(int32_t indexAndDistance) {
47cb93a386Sopenharmony_ci        // assert indexAndDistance >= 0;
48cb93a386Sopenharmony_ci        return indexAndDistance >> INDEX_SHIFT;
49cb93a386Sopenharmony_ci    }
50cb93a386Sopenharmony_ci
51cb93a386Sopenharmony_ci    /**
52cb93a386Sopenharmony_ci     * Finds the supported LSR with the smallest distance from the desired one.
53cb93a386Sopenharmony_ci     * Equivalent LSR subtags must be normalized into a canonical form.
54cb93a386Sopenharmony_ci     *
55cb93a386Sopenharmony_ci     * <p>Returns the index of the lowest-distance supported LSR in the high bits
56cb93a386Sopenharmony_ci     * (negative if none has a distance below the threshold),
57cb93a386Sopenharmony_ci     * and its distance (0..ABOVE_THRESHOLD) in the low bits.
58cb93a386Sopenharmony_ci     */
59cb93a386Sopenharmony_ci    int32_t getBestIndexAndDistance(const LSR &desired,
60cb93a386Sopenharmony_ci                                    const LSR **supportedLSRs, int32_t supportedLSRsLength,
61cb93a386Sopenharmony_ci                                    int32_t shiftedThreshold,
62cb93a386Sopenharmony_ci                                    ULocMatchFavorSubtag favorSubtag,
63cb93a386Sopenharmony_ci                                    ULocMatchDirection direction) const;
64cb93a386Sopenharmony_ci
65cb93a386Sopenharmony_ci    UBool isParadigmLSR(const LSR &lsr) const;
66cb93a386Sopenharmony_ci
67cb93a386Sopenharmony_ci    int32_t getDefaultScriptDistance() const {
68cb93a386Sopenharmony_ci        return defaultScriptDistance;
69cb93a386Sopenharmony_ci    }
70cb93a386Sopenharmony_ci
71cb93a386Sopenharmony_ci    int32_t getDefaultDemotionPerDesiredLocale() const {
72cb93a386Sopenharmony_ci        return defaultDemotionPerDesiredLocale;
73cb93a386Sopenharmony_ci    }
74cb93a386Sopenharmony_ci
75cb93a386Sopenharmony_ciprivate:
76cb93a386Sopenharmony_ci    // The distance is shifted left to gain some fraction bits.
77cb93a386Sopenharmony_ci    static constexpr int32_t DISTANCE_SHIFT = 3;
78cb93a386Sopenharmony_ci    static constexpr int32_t DISTANCE_FRACTION_MASK = 7;
79cb93a386Sopenharmony_ci    // 7 bits for 0..100
80cb93a386Sopenharmony_ci    static constexpr int32_t DISTANCE_INT_SHIFT = 7;
81cb93a386Sopenharmony_ci    static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT;
82cb93a386Sopenharmony_ci    static constexpr int32_t DISTANCE_MASK = 0x3ff;
83cb93a386Sopenharmony_ci    // tic constexpr int32_t MAX_INDEX = 0x1fffff;  // avoids sign bit
84cb93a386Sopenharmony_ci    static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
85cb93a386Sopenharmony_ci
86cb93a386Sopenharmony_ci    LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely);
87cb93a386Sopenharmony_ci    LocaleDistance(const LocaleDistance &other) = delete;
88cb93a386Sopenharmony_ci    LocaleDistance &operator=(const LocaleDistance &other) = delete;
89cb93a386Sopenharmony_ci
90cb93a386Sopenharmony_ci    static void initLocaleDistance(UErrorCode &errorCode);
91cb93a386Sopenharmony_ci
92cb93a386Sopenharmony_ci    UBool isMatch(const LSR &desired, const LSR &supported,
93cb93a386Sopenharmony_ci                  int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
94cb93a386Sopenharmony_ci        const LSR *pSupp = &supported;
95cb93a386Sopenharmony_ci        return getBestIndexAndDistance(
96cb93a386Sopenharmony_ci            desired, &pSupp, 1,
97cb93a386Sopenharmony_ci            shiftedThreshold, favorSubtag, ULOCMATCH_DIRECTION_WITH_ONE_WAY) >= 0;
98cb93a386Sopenharmony_ci    }
99cb93a386Sopenharmony_ci
100cb93a386Sopenharmony_ci    static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
101cb93a386Sopenharmony_ci                                            const char *desired, const char *supported);
102cb93a386Sopenharmony_ci
103cb93a386Sopenharmony_ci    static int32_t getRegionPartitionsDistance(
104cb93a386Sopenharmony_ci        BytesTrie &iter, uint64_t startState,
105cb93a386Sopenharmony_ci        const char *desiredPartitions, const char *supportedPartitions,
106cb93a386Sopenharmony_ci        int32_t threshold);
107cb93a386Sopenharmony_ci
108cb93a386Sopenharmony_ci    static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
109cb93a386Sopenharmony_ci
110cb93a386Sopenharmony_ci    static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
111cb93a386Sopenharmony_ci
112cb93a386Sopenharmony_ci    const char *partitionsForRegion(const LSR &lsr) const {
113cb93a386Sopenharmony_ci        // ill-formed region -> one non-matching string
114cb93a386Sopenharmony_ci        int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
115cb93a386Sopenharmony_ci        return partitionArrays[pIndex];
116cb93a386Sopenharmony_ci    }
117cb93a386Sopenharmony_ci
118cb93a386Sopenharmony_ci    int32_t getDefaultRegionDistance() const {
119cb93a386Sopenharmony_ci        return defaultRegionDistance;
120cb93a386Sopenharmony_ci    }
121cb93a386Sopenharmony_ci
122cb93a386Sopenharmony_ci    const XLikelySubtags &likelySubtags;
123cb93a386Sopenharmony_ci
124cb93a386Sopenharmony_ci    // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
125cb93a386Sopenharmony_ci    // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
126cb93a386Sopenharmony_ci    // There is also a trie value for each subsequence of whole subtags.
127cb93a386Sopenharmony_ci    // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
128cb93a386Sopenharmony_ci    BytesTrie trie;
129cb93a386Sopenharmony_ci
130cb93a386Sopenharmony_ci    /**
131cb93a386Sopenharmony_ci     * Maps each region to zero or more single-character partitions.
132cb93a386Sopenharmony_ci     */
133cb93a386Sopenharmony_ci    const uint8_t *regionToPartitionsIndex;
134cb93a386Sopenharmony_ci    const char **partitionArrays;
135cb93a386Sopenharmony_ci
136cb93a386Sopenharmony_ci    /**
137cb93a386Sopenharmony_ci     * Used to get the paradigm region for a cluster, if there is one.
138cb93a386Sopenharmony_ci     */
139cb93a386Sopenharmony_ci    const LSR *paradigmLSRs;
140cb93a386Sopenharmony_ci    int32_t paradigmLSRsLength;
141cb93a386Sopenharmony_ci
142cb93a386Sopenharmony_ci    int32_t defaultLanguageDistance;
143cb93a386Sopenharmony_ci    int32_t defaultScriptDistance;
144cb93a386Sopenharmony_ci    int32_t defaultRegionDistance;
145cb93a386Sopenharmony_ci    int32_t minRegionDistance;
146cb93a386Sopenharmony_ci    int32_t defaultDemotionPerDesiredLocale;
147cb93a386Sopenharmony_ci};
148cb93a386Sopenharmony_ci
149cb93a386Sopenharmony_ciU_NAMESPACE_END
150cb93a386Sopenharmony_ci
151cb93a386Sopenharmony_ci#endif  // __LOCDISTANCE_H__
152