1cb93a386Sopenharmony_ci// © 2019 and later: Unicode, Inc. and others. 2cb93a386Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 3cb93a386Sopenharmony_ci 4cb93a386Sopenharmony_ci// locdistance.h 5cb93a386Sopenharmony_ci// created: 2019may08 Markus W. Scherer 6cb93a386Sopenharmony_ci 7cb93a386Sopenharmony_ci#ifndef __LOCDISTANCE_H__ 8cb93a386Sopenharmony_ci#define __LOCDISTANCE_H__ 9cb93a386Sopenharmony_ci 10cb93a386Sopenharmony_ci#include "unicode/utypes.h" 11cb93a386Sopenharmony_ci#include "unicode/bytestrie.h" 12cb93a386Sopenharmony_ci#include "unicode/localematcher.h" 13cb93a386Sopenharmony_ci#include "unicode/locid.h" 14cb93a386Sopenharmony_ci#include "unicode/uobject.h" 15cb93a386Sopenharmony_ci#include "lsr.h" 16cb93a386Sopenharmony_ci 17cb93a386Sopenharmony_ciU_NAMESPACE_BEGIN 18cb93a386Sopenharmony_ci 19cb93a386Sopenharmony_cistruct LocaleDistanceData; 20cb93a386Sopenharmony_ci 21cb93a386Sopenharmony_ci/** 22cb93a386Sopenharmony_ci * Offline-built data for LocaleMatcher. 23cb93a386Sopenharmony_ci * Mostly but not only the data for mapping locales to their maximized forms. 24cb93a386Sopenharmony_ci */ 25cb93a386Sopenharmony_ciclass LocaleDistance final : public UMemory { 26cb93a386Sopenharmony_cipublic: 27cb93a386Sopenharmony_ci static const LocaleDistance *getSingleton(UErrorCode &errorCode); 28cb93a386Sopenharmony_ci 29cb93a386Sopenharmony_ci static int32_t shiftDistance(int32_t distance) { 30cb93a386Sopenharmony_ci return distance << DISTANCE_SHIFT; 31cb93a386Sopenharmony_ci } 32cb93a386Sopenharmony_ci 33cb93a386Sopenharmony_ci static int32_t getShiftedDistance(int32_t indexAndDistance) { 34cb93a386Sopenharmony_ci return indexAndDistance & DISTANCE_MASK; 35cb93a386Sopenharmony_ci } 36cb93a386Sopenharmony_ci 37cb93a386Sopenharmony_ci static double getDistanceDouble(int32_t indexAndDistance) { 38cb93a386Sopenharmony_ci double shiftedDistance = getShiftedDistance(indexAndDistance); 39cb93a386Sopenharmony_ci return shiftedDistance / (1 << DISTANCE_SHIFT); 40cb93a386Sopenharmony_ci } 41cb93a386Sopenharmony_ci 42cb93a386Sopenharmony_ci static int32_t getDistanceFloor(int32_t indexAndDistance) { 43cb93a386Sopenharmony_ci return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT; 44cb93a386Sopenharmony_ci } 45cb93a386Sopenharmony_ci 46cb93a386Sopenharmony_ci static int32_t getIndex(int32_t indexAndDistance) { 47cb93a386Sopenharmony_ci // assert indexAndDistance >= 0; 48cb93a386Sopenharmony_ci return indexAndDistance >> INDEX_SHIFT; 49cb93a386Sopenharmony_ci } 50cb93a386Sopenharmony_ci 51cb93a386Sopenharmony_ci /** 52cb93a386Sopenharmony_ci * Finds the supported LSR with the smallest distance from the desired one. 53cb93a386Sopenharmony_ci * Equivalent LSR subtags must be normalized into a canonical form. 54cb93a386Sopenharmony_ci * 55cb93a386Sopenharmony_ci * <p>Returns the index of the lowest-distance supported LSR in the high bits 56cb93a386Sopenharmony_ci * (negative if none has a distance below the threshold), 57cb93a386Sopenharmony_ci * and its distance (0..ABOVE_THRESHOLD) in the low bits. 58cb93a386Sopenharmony_ci */ 59cb93a386Sopenharmony_ci int32_t getBestIndexAndDistance(const LSR &desired, 60cb93a386Sopenharmony_ci const LSR **supportedLSRs, int32_t supportedLSRsLength, 61cb93a386Sopenharmony_ci int32_t shiftedThreshold, 62cb93a386Sopenharmony_ci ULocMatchFavorSubtag favorSubtag, 63cb93a386Sopenharmony_ci ULocMatchDirection direction) const; 64cb93a386Sopenharmony_ci 65cb93a386Sopenharmony_ci UBool isParadigmLSR(const LSR &lsr) const; 66cb93a386Sopenharmony_ci 67cb93a386Sopenharmony_ci int32_t getDefaultScriptDistance() const { 68cb93a386Sopenharmony_ci return defaultScriptDistance; 69cb93a386Sopenharmony_ci } 70cb93a386Sopenharmony_ci 71cb93a386Sopenharmony_ci int32_t getDefaultDemotionPerDesiredLocale() const { 72cb93a386Sopenharmony_ci return defaultDemotionPerDesiredLocale; 73cb93a386Sopenharmony_ci } 74cb93a386Sopenharmony_ci 75cb93a386Sopenharmony_ciprivate: 76cb93a386Sopenharmony_ci // The distance is shifted left to gain some fraction bits. 77cb93a386Sopenharmony_ci static constexpr int32_t DISTANCE_SHIFT = 3; 78cb93a386Sopenharmony_ci static constexpr int32_t DISTANCE_FRACTION_MASK = 7; 79cb93a386Sopenharmony_ci // 7 bits for 0..100 80cb93a386Sopenharmony_ci static constexpr int32_t DISTANCE_INT_SHIFT = 7; 81cb93a386Sopenharmony_ci static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT; 82cb93a386Sopenharmony_ci static constexpr int32_t DISTANCE_MASK = 0x3ff; 83cb93a386Sopenharmony_ci // tic constexpr int32_t MAX_INDEX = 0x1fffff; // avoids sign bit 84cb93a386Sopenharmony_ci static constexpr int32_t INDEX_NEG_1 = 0xfffffc00; 85cb93a386Sopenharmony_ci 86cb93a386Sopenharmony_ci LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely); 87cb93a386Sopenharmony_ci LocaleDistance(const LocaleDistance &other) = delete; 88cb93a386Sopenharmony_ci LocaleDistance &operator=(const LocaleDistance &other) = delete; 89cb93a386Sopenharmony_ci 90cb93a386Sopenharmony_ci static void initLocaleDistance(UErrorCode &errorCode); 91cb93a386Sopenharmony_ci 92cb93a386Sopenharmony_ci UBool isMatch(const LSR &desired, const LSR &supported, 93cb93a386Sopenharmony_ci int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const { 94cb93a386Sopenharmony_ci const LSR *pSupp = &supported; 95cb93a386Sopenharmony_ci return getBestIndexAndDistance( 96cb93a386Sopenharmony_ci desired, &pSupp, 1, 97cb93a386Sopenharmony_ci shiftedThreshold, favorSubtag, ULOCMATCH_DIRECTION_WITH_ONE_WAY) >= 0; 98cb93a386Sopenharmony_ci } 99cb93a386Sopenharmony_ci 100cb93a386Sopenharmony_ci static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState, 101cb93a386Sopenharmony_ci const char *desired, const char *supported); 102cb93a386Sopenharmony_ci 103cb93a386Sopenharmony_ci static int32_t getRegionPartitionsDistance( 104cb93a386Sopenharmony_ci BytesTrie &iter, uint64_t startState, 105cb93a386Sopenharmony_ci const char *desiredPartitions, const char *supportedPartitions, 106cb93a386Sopenharmony_ci int32_t threshold); 107cb93a386Sopenharmony_ci 108cb93a386Sopenharmony_ci static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState); 109cb93a386Sopenharmony_ci 110cb93a386Sopenharmony_ci static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue); 111cb93a386Sopenharmony_ci 112cb93a386Sopenharmony_ci const char *partitionsForRegion(const LSR &lsr) const { 113cb93a386Sopenharmony_ci // ill-formed region -> one non-matching string 114cb93a386Sopenharmony_ci int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex]; 115cb93a386Sopenharmony_ci return partitionArrays[pIndex]; 116cb93a386Sopenharmony_ci } 117cb93a386Sopenharmony_ci 118cb93a386Sopenharmony_ci int32_t getDefaultRegionDistance() const { 119cb93a386Sopenharmony_ci return defaultRegionDistance; 120cb93a386Sopenharmony_ci } 121cb93a386Sopenharmony_ci 122cb93a386Sopenharmony_ci const XLikelySubtags &likelySubtags; 123cb93a386Sopenharmony_ci 124cb93a386Sopenharmony_ci // The trie maps each dlang+slang+dscript+sscript+dregion+sregion 125cb93a386Sopenharmony_ci // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance. 126cb93a386Sopenharmony_ci // There is also a trie value for each subsequence of whole subtags. 127cb93a386Sopenharmony_ci // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"". 128cb93a386Sopenharmony_ci BytesTrie trie; 129cb93a386Sopenharmony_ci 130cb93a386Sopenharmony_ci /** 131cb93a386Sopenharmony_ci * Maps each region to zero or more single-character partitions. 132cb93a386Sopenharmony_ci */ 133cb93a386Sopenharmony_ci const uint8_t *regionToPartitionsIndex; 134cb93a386Sopenharmony_ci const char **partitionArrays; 135cb93a386Sopenharmony_ci 136cb93a386Sopenharmony_ci /** 137cb93a386Sopenharmony_ci * Used to get the paradigm region for a cluster, if there is one. 138cb93a386Sopenharmony_ci */ 139cb93a386Sopenharmony_ci const LSR *paradigmLSRs; 140cb93a386Sopenharmony_ci int32_t paradigmLSRsLength; 141cb93a386Sopenharmony_ci 142cb93a386Sopenharmony_ci int32_t defaultLanguageDistance; 143cb93a386Sopenharmony_ci int32_t defaultScriptDistance; 144cb93a386Sopenharmony_ci int32_t defaultRegionDistance; 145cb93a386Sopenharmony_ci int32_t minRegionDistance; 146cb93a386Sopenharmony_ci int32_t defaultDemotionPerDesiredLocale; 147cb93a386Sopenharmony_ci}; 148cb93a386Sopenharmony_ci 149cb93a386Sopenharmony_ciU_NAMESPACE_END 150cb93a386Sopenharmony_ci 151cb93a386Sopenharmony_ci#endif // __LOCDISTANCE_H__ 152