12e5b6d6dSopenharmony_ci// © 2017 and later: Unicode, Inc. and others. 22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 32e5b6d6dSopenharmony_ci 42e5b6d6dSopenharmony_ci// ucasemap_imp.h 52e5b6d6dSopenharmony_ci// created: 2017feb08 Markus W. Scherer 62e5b6d6dSopenharmony_ci 72e5b6d6dSopenharmony_ci#ifndef __UCASEMAP_IMP_H__ 82e5b6d6dSopenharmony_ci#define __UCASEMAP_IMP_H__ 92e5b6d6dSopenharmony_ci 102e5b6d6dSopenharmony_ci#include "unicode/utypes.h" 112e5b6d6dSopenharmony_ci#include "unicode/ucasemap.h" 122e5b6d6dSopenharmony_ci#include "unicode/uchar.h" 132e5b6d6dSopenharmony_ci#include "ucase.h" 142e5b6d6dSopenharmony_ci 152e5b6d6dSopenharmony_ci/** 162e5b6d6dSopenharmony_ci * Bit mask for the titlecasing iterator options bit field. 172e5b6d6dSopenharmony_ci * Currently only 3 out of 8 values are used: 182e5b6d6dSopenharmony_ci * 0 (words), U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. 192e5b6d6dSopenharmony_ci * See stringoptions.h. 202e5b6d6dSopenharmony_ci * @internal 212e5b6d6dSopenharmony_ci */ 222e5b6d6dSopenharmony_ci#define U_TITLECASE_ITERATOR_MASK 0xe0 232e5b6d6dSopenharmony_ci 242e5b6d6dSopenharmony_ci/** 252e5b6d6dSopenharmony_ci * Bit mask for the titlecasing index adjustment options bit set. 262e5b6d6dSopenharmony_ci * Currently two bits are defined: 272e5b6d6dSopenharmony_ci * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED. 282e5b6d6dSopenharmony_ci * See stringoptions.h. 292e5b6d6dSopenharmony_ci * @internal 302e5b6d6dSopenharmony_ci */ 312e5b6d6dSopenharmony_ci#define U_TITLECASE_ADJUSTMENT_MASK 0x600 322e5b6d6dSopenharmony_ci 332e5b6d6dSopenharmony_ci/** 342e5b6d6dSopenharmony_ci * Internal API, used by u_strcasecmp() etc. 352e5b6d6dSopenharmony_ci * Compare strings case-insensitively, 362e5b6d6dSopenharmony_ci * in code point order or code unit order. 372e5b6d6dSopenharmony_ci */ 382e5b6d6dSopenharmony_ciU_CFUNC int32_t 392e5b6d6dSopenharmony_ciu_strcmpFold(const UChar *s1, int32_t length1, 402e5b6d6dSopenharmony_ci const UChar *s2, int32_t length2, 412e5b6d6dSopenharmony_ci uint32_t options, 422e5b6d6dSopenharmony_ci UErrorCode *pErrorCode); 432e5b6d6dSopenharmony_ci 442e5b6d6dSopenharmony_ci/** 452e5b6d6dSopenharmony_ci * Internal API, used for detecting length of 462e5b6d6dSopenharmony_ci * shared prefix case-insensitively. 472e5b6d6dSopenharmony_ci * @param s1 input string 1 482e5b6d6dSopenharmony_ci * @param length1 length of string 1, or -1 (NULL terminated) 492e5b6d6dSopenharmony_ci * @param s2 input string 2 502e5b6d6dSopenharmony_ci * @param length2 length of string 2, or -1 (NULL terminated) 512e5b6d6dSopenharmony_ci * @param options compare options 522e5b6d6dSopenharmony_ci * @param matchLen1 (output) length of partial prefix match in s1 532e5b6d6dSopenharmony_ci * @param matchLen2 (output) length of partial prefix match in s2 542e5b6d6dSopenharmony_ci * @param pErrorCode receives error status 552e5b6d6dSopenharmony_ci */ 562e5b6d6dSopenharmony_ciU_CAPI void 572e5b6d6dSopenharmony_ciu_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, 582e5b6d6dSopenharmony_ci const UChar *s2, int32_t length2, 592e5b6d6dSopenharmony_ci uint32_t options, 602e5b6d6dSopenharmony_ci int32_t *matchLen1, int32_t *matchLen2, 612e5b6d6dSopenharmony_ci UErrorCode *pErrorCode); 622e5b6d6dSopenharmony_ci 632e5b6d6dSopenharmony_ci#ifdef __cplusplus 642e5b6d6dSopenharmony_ci 652e5b6d6dSopenharmony_ciU_NAMESPACE_BEGIN 662e5b6d6dSopenharmony_ci 672e5b6d6dSopenharmony_ciclass BreakIterator; // unicode/brkiter.h 682e5b6d6dSopenharmony_ciclass ByteSink; 692e5b6d6dSopenharmony_ciclass Locale; // unicode/locid.h 702e5b6d6dSopenharmony_ci 712e5b6d6dSopenharmony_ci/** Returns true if the options are valid. Otherwise false, and sets an error. */ 722e5b6d6dSopenharmony_ciinline UBool ustrcase_checkTitleAdjustmentOptions(uint32_t options, UErrorCode &errorCode) { 732e5b6d6dSopenharmony_ci if (U_FAILURE(errorCode)) { return false; } 742e5b6d6dSopenharmony_ci if ((options & U_TITLECASE_ADJUSTMENT_MASK) == U_TITLECASE_ADJUSTMENT_MASK) { 752e5b6d6dSopenharmony_ci // Both options together. 762e5b6d6dSopenharmony_ci errorCode = U_ILLEGAL_ARGUMENT_ERROR; 772e5b6d6dSopenharmony_ci return false; 782e5b6d6dSopenharmony_ci } 792e5b6d6dSopenharmony_ci return true; 802e5b6d6dSopenharmony_ci} 812e5b6d6dSopenharmony_ci 822e5b6d6dSopenharmony_ciinline UBool ustrcase_isLNS(UChar32 c) { 832e5b6d6dSopenharmony_ci // Letter, number, symbol, 842e5b6d6dSopenharmony_ci // or a private use code point because those are typically used as letters or numbers. 852e5b6d6dSopenharmony_ci // Consider modifier letters only if they are cased. 862e5b6d6dSopenharmony_ci const uint32_t LNS = (U_GC_L_MASK|U_GC_N_MASK|U_GC_S_MASK|U_GC_CO_MASK) & ~U_GC_LM_MASK; 872e5b6d6dSopenharmony_ci int gc = u_charType(c); 882e5b6d6dSopenharmony_ci return (U_MASK(gc) & LNS) != 0 || (gc == U_MODIFIER_LETTER && ucase_getType(c) != UCASE_NONE); 892e5b6d6dSopenharmony_ci} 902e5b6d6dSopenharmony_ci 912e5b6d6dSopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION 922e5b6d6dSopenharmony_ci 932e5b6d6dSopenharmony_ci/** Returns nullptr if error. Pass in either locale or locID, not both. */ 942e5b6d6dSopenharmony_ciU_CFUNC 952e5b6d6dSopenharmony_ciBreakIterator *ustrcase_getTitleBreakIterator( 962e5b6d6dSopenharmony_ci const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter, 972e5b6d6dSopenharmony_ci LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode); 982e5b6d6dSopenharmony_ci 992e5b6d6dSopenharmony_ci#endif 1002e5b6d6dSopenharmony_ci 1012e5b6d6dSopenharmony_ciU_NAMESPACE_END 1022e5b6d6dSopenharmony_ci 1032e5b6d6dSopenharmony_ci#include "unicode/unistr.h" // for UStringCaseMapper 1042e5b6d6dSopenharmony_ci 1052e5b6d6dSopenharmony_ci/* 1062e5b6d6dSopenharmony_ci * Internal string casing functions implementing 1072e5b6d6dSopenharmony_ci * ustring.h/ustrcase.cpp and UnicodeString case mapping functions. 1082e5b6d6dSopenharmony_ci */ 1092e5b6d6dSopenharmony_ci 1102e5b6d6dSopenharmony_cistruct UCaseMap : public icu::UMemory { 1112e5b6d6dSopenharmony_ci /** Implements most of ucasemap_open(). */ 1122e5b6d6dSopenharmony_ci UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode); 1132e5b6d6dSopenharmony_ci ~UCaseMap(); 1142e5b6d6dSopenharmony_ci 1152e5b6d6dSopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION 1162e5b6d6dSopenharmony_ci icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */ 1172e5b6d6dSopenharmony_ci#endif 1182e5b6d6dSopenharmony_ci char locale[32]; 1192e5b6d6dSopenharmony_ci int32_t caseLocale; 1202e5b6d6dSopenharmony_ci uint32_t options; 1212e5b6d6dSopenharmony_ci}; 1222e5b6d6dSopenharmony_ci 1232e5b6d6dSopenharmony_ci#if UCONFIG_NO_BREAK_ITERATION 1242e5b6d6dSopenharmony_ci# define UCASEMAP_BREAK_ITERATOR_PARAM 1252e5b6d6dSopenharmony_ci# define UCASEMAP_BREAK_ITERATOR_UNUSED 1262e5b6d6dSopenharmony_ci# define UCASEMAP_BREAK_ITERATOR 1272e5b6d6dSopenharmony_ci# define UCASEMAP_BREAK_ITERATOR_NULL 1282e5b6d6dSopenharmony_ci#else 1292e5b6d6dSopenharmony_ci# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter, 1302e5b6d6dSopenharmony_ci# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *, 1312e5b6d6dSopenharmony_ci# define UCASEMAP_BREAK_ITERATOR iter, 1322e5b6d6dSopenharmony_ci# define UCASEMAP_BREAK_ITERATOR_NULL NULL, 1332e5b6d6dSopenharmony_ci#endif 1342e5b6d6dSopenharmony_ci 1352e5b6d6dSopenharmony_ciU_CFUNC int32_t 1362e5b6d6dSopenharmony_ciustrcase_getCaseLocale(const char *locale); 1372e5b6d6dSopenharmony_ci 1382e5b6d6dSopenharmony_ci// TODO: swap src / dest if approved for new public api 1392e5b6d6dSopenharmony_ci/** Implements UStringCaseMapper. */ 1402e5b6d6dSopenharmony_ciU_CFUNC int32_t U_CALLCONV 1412e5b6d6dSopenharmony_ciustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM 1422e5b6d6dSopenharmony_ci UChar *dest, int32_t destCapacity, 1432e5b6d6dSopenharmony_ci const UChar *src, int32_t srcLength, 1442e5b6d6dSopenharmony_ci icu::Edits *edits, 1452e5b6d6dSopenharmony_ci UErrorCode &errorCode); 1462e5b6d6dSopenharmony_ci 1472e5b6d6dSopenharmony_ci/** Implements UStringCaseMapper. */ 1482e5b6d6dSopenharmony_ciU_CFUNC int32_t U_CALLCONV 1492e5b6d6dSopenharmony_ciustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM 1502e5b6d6dSopenharmony_ci UChar *dest, int32_t destCapacity, 1512e5b6d6dSopenharmony_ci const UChar *src, int32_t srcLength, 1522e5b6d6dSopenharmony_ci icu::Edits *edits, 1532e5b6d6dSopenharmony_ci UErrorCode &errorCode); 1542e5b6d6dSopenharmony_ci 1552e5b6d6dSopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION 1562e5b6d6dSopenharmony_ci 1572e5b6d6dSopenharmony_ci/** Implements UStringCaseMapper. */ 1582e5b6d6dSopenharmony_ciU_CFUNC int32_t U_CALLCONV 1592e5b6d6dSopenharmony_ciustrcase_internalToTitle(int32_t caseLocale, uint32_t options, 1602e5b6d6dSopenharmony_ci icu::BreakIterator *iter, 1612e5b6d6dSopenharmony_ci UChar *dest, int32_t destCapacity, 1622e5b6d6dSopenharmony_ci const UChar *src, int32_t srcLength, 1632e5b6d6dSopenharmony_ci icu::Edits *edits, 1642e5b6d6dSopenharmony_ci UErrorCode &errorCode); 1652e5b6d6dSopenharmony_ci 1662e5b6d6dSopenharmony_ci#endif 1672e5b6d6dSopenharmony_ci 1682e5b6d6dSopenharmony_ci/** Implements UStringCaseMapper. */ 1692e5b6d6dSopenharmony_ciU_CFUNC int32_t U_CALLCONV 1702e5b6d6dSopenharmony_ciustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM 1712e5b6d6dSopenharmony_ci UChar *dest, int32_t destCapacity, 1722e5b6d6dSopenharmony_ci const UChar *src, int32_t srcLength, 1732e5b6d6dSopenharmony_ci icu::Edits *edits, 1742e5b6d6dSopenharmony_ci UErrorCode &errorCode); 1752e5b6d6dSopenharmony_ci 1762e5b6d6dSopenharmony_ci/** 1772e5b6d6dSopenharmony_ci * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz(). 1782e5b6d6dSopenharmony_ci * Implements argument checking. 1792e5b6d6dSopenharmony_ci */ 1802e5b6d6dSopenharmony_ciU_CFUNC int32_t 1812e5b6d6dSopenharmony_ciustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM 1822e5b6d6dSopenharmony_ci UChar *dest, int32_t destCapacity, 1832e5b6d6dSopenharmony_ci const UChar *src, int32_t srcLength, 1842e5b6d6dSopenharmony_ci UStringCaseMapper *stringCaseMapper, 1852e5b6d6dSopenharmony_ci icu::Edits *edits, 1862e5b6d6dSopenharmony_ci UErrorCode &errorCode); 1872e5b6d6dSopenharmony_ci 1882e5b6d6dSopenharmony_ci/** 1892e5b6d6dSopenharmony_ci * Common string case mapping implementation for old-fashioned u_strToXyz() functions 1902e5b6d6dSopenharmony_ci * that allow the source string to overlap the destination buffer. 1912e5b6d6dSopenharmony_ci * Implements argument checking and internally works with an intermediate buffer if necessary. 1922e5b6d6dSopenharmony_ci */ 1932e5b6d6dSopenharmony_ciU_CFUNC int32_t 1942e5b6d6dSopenharmony_ciustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM 1952e5b6d6dSopenharmony_ci UChar *dest, int32_t destCapacity, 1962e5b6d6dSopenharmony_ci const UChar *src, int32_t srcLength, 1972e5b6d6dSopenharmony_ci UStringCaseMapper *stringCaseMapper, 1982e5b6d6dSopenharmony_ci UErrorCode &errorCode); 1992e5b6d6dSopenharmony_ci 2002e5b6d6dSopenharmony_ci/** 2012e5b6d6dSopenharmony_ci * UTF-8 string case mapping function type, used by ucasemap_mapUTF8(). 2022e5b6d6dSopenharmony_ci * UTF-8 version of UStringCaseMapper. 2032e5b6d6dSopenharmony_ci * All error checking must be done. 2042e5b6d6dSopenharmony_ci * The UCaseMap must be fully initialized, with locale and/or iter set as needed. 2052e5b6d6dSopenharmony_ci */ 2062e5b6d6dSopenharmony_citypedef void U_CALLCONV 2072e5b6d6dSopenharmony_ciUTF8CaseMapper(int32_t caseLocale, uint32_t options, 2082e5b6d6dSopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION 2092e5b6d6dSopenharmony_ci icu::BreakIterator *iter, 2102e5b6d6dSopenharmony_ci#endif 2112e5b6d6dSopenharmony_ci const uint8_t *src, int32_t srcLength, 2122e5b6d6dSopenharmony_ci icu::ByteSink &sink, icu::Edits *edits, 2132e5b6d6dSopenharmony_ci UErrorCode &errorCode); 2142e5b6d6dSopenharmony_ci 2152e5b6d6dSopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION 2162e5b6d6dSopenharmony_ci 2172e5b6d6dSopenharmony_ci/** Implements UTF8CaseMapper. */ 2182e5b6d6dSopenharmony_ciU_CFUNC void U_CALLCONV 2192e5b6d6dSopenharmony_ciucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options, 2202e5b6d6dSopenharmony_ci icu::BreakIterator *iter, 2212e5b6d6dSopenharmony_ci const uint8_t *src, int32_t srcLength, 2222e5b6d6dSopenharmony_ci icu::ByteSink &sink, icu::Edits *edits, 2232e5b6d6dSopenharmony_ci UErrorCode &errorCode); 2242e5b6d6dSopenharmony_ci 2252e5b6d6dSopenharmony_ci#endif 2262e5b6d6dSopenharmony_ci 2272e5b6d6dSopenharmony_civoid 2282e5b6d6dSopenharmony_ciucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM 2292e5b6d6dSopenharmony_ci const char *src, int32_t srcLength, 2302e5b6d6dSopenharmony_ci UTF8CaseMapper *stringCaseMapper, 2312e5b6d6dSopenharmony_ci icu::ByteSink &sink, icu::Edits *edits, 2322e5b6d6dSopenharmony_ci UErrorCode &errorCode); 2332e5b6d6dSopenharmony_ci 2342e5b6d6dSopenharmony_ci/** 2352e5b6d6dSopenharmony_ci * Implements argument checking and buffer handling 2362e5b6d6dSopenharmony_ci * for UTF-8 string case mapping as a common function. 2372e5b6d6dSopenharmony_ci */ 2382e5b6d6dSopenharmony_ciint32_t 2392e5b6d6dSopenharmony_ciucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM 2402e5b6d6dSopenharmony_ci char *dest, int32_t destCapacity, 2412e5b6d6dSopenharmony_ci const char *src, int32_t srcLength, 2422e5b6d6dSopenharmony_ci UTF8CaseMapper *stringCaseMapper, 2432e5b6d6dSopenharmony_ci icu::Edits *edits, 2442e5b6d6dSopenharmony_ci UErrorCode &errorCode); 2452e5b6d6dSopenharmony_ci 2462e5b6d6dSopenharmony_ciU_NAMESPACE_BEGIN 2472e5b6d6dSopenharmony_cinamespace GreekUpper { 2482e5b6d6dSopenharmony_ci 2492e5b6d6dSopenharmony_ci// Data bits. 2502e5b6d6dSopenharmony_cistatic const uint32_t UPPER_MASK = 0x3ff; 2512e5b6d6dSopenharmony_cistatic const uint32_t HAS_VOWEL = 0x1000; 2522e5b6d6dSopenharmony_cistatic const uint32_t HAS_YPOGEGRAMMENI = 0x2000; 2532e5b6d6dSopenharmony_cistatic const uint32_t HAS_ACCENT = 0x4000; 2542e5b6d6dSopenharmony_cistatic const uint32_t HAS_DIALYTIKA = 0x8000; 2552e5b6d6dSopenharmony_ci// Further bits during data building and processing, not stored in the data map. 2562e5b6d6dSopenharmony_cistatic const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000; 2572e5b6d6dSopenharmony_cistatic const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000; 2582e5b6d6dSopenharmony_ci 2592e5b6d6dSopenharmony_cistatic const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT; 2602e5b6d6dSopenharmony_cistatic const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA = 2612e5b6d6dSopenharmony_ci HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA; 2622e5b6d6dSopenharmony_cistatic const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA; 2632e5b6d6dSopenharmony_ci 2642e5b6d6dSopenharmony_ci// State bits. 2652e5b6d6dSopenharmony_cistatic const uint32_t AFTER_CASED = 1; 2662e5b6d6dSopenharmony_cistatic const uint32_t AFTER_VOWEL_WITH_ACCENT = 2; 2672e5b6d6dSopenharmony_ci 2682e5b6d6dSopenharmony_ciuint32_t getLetterData(UChar32 c); 2692e5b6d6dSopenharmony_ci 2702e5b6d6dSopenharmony_ci/** 2712e5b6d6dSopenharmony_ci * Returns a non-zero value for each of the Greek combining diacritics 2722e5b6d6dSopenharmony_ci * listed in The Unicode Standard, version 8, chapter 7.2 Greek, 2732e5b6d6dSopenharmony_ci * plus some perispomeni look-alikes. 2742e5b6d6dSopenharmony_ci */ 2752e5b6d6dSopenharmony_ciuint32_t getDiacriticData(UChar32 c); 2762e5b6d6dSopenharmony_ci 2772e5b6d6dSopenharmony_ci} // namespace GreekUpper 2782e5b6d6dSopenharmony_ciU_NAMESPACE_END 2792e5b6d6dSopenharmony_ci 2802e5b6d6dSopenharmony_ci#endif // __cplusplus 2812e5b6d6dSopenharmony_ci 2822e5b6d6dSopenharmony_ci#endif // __UCASEMAP_IMP_H__ 283