12e5b6d6dSopenharmony_ci// © 2017 and later: Unicode, Inc. and others.
22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
32e5b6d6dSopenharmony_ci
42e5b6d6dSopenharmony_ci// ucasemap_imp.h
52e5b6d6dSopenharmony_ci// created: 2017feb08 Markus W. Scherer
62e5b6d6dSopenharmony_ci
72e5b6d6dSopenharmony_ci#ifndef __UCASEMAP_IMP_H__
82e5b6d6dSopenharmony_ci#define __UCASEMAP_IMP_H__
92e5b6d6dSopenharmony_ci
102e5b6d6dSopenharmony_ci#include "unicode/utypes.h"
112e5b6d6dSopenharmony_ci#include "unicode/ucasemap.h"
122e5b6d6dSopenharmony_ci#include "unicode/uchar.h"
132e5b6d6dSopenharmony_ci#include "ucase.h"
142e5b6d6dSopenharmony_ci
152e5b6d6dSopenharmony_ci/**
162e5b6d6dSopenharmony_ci * Bit mask for the titlecasing iterator options bit field.
172e5b6d6dSopenharmony_ci * Currently only 3 out of 8 values are used:
182e5b6d6dSopenharmony_ci * 0 (words), U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
192e5b6d6dSopenharmony_ci * See stringoptions.h.
202e5b6d6dSopenharmony_ci * @internal
212e5b6d6dSopenharmony_ci */
222e5b6d6dSopenharmony_ci#define U_TITLECASE_ITERATOR_MASK 0xe0
232e5b6d6dSopenharmony_ci
242e5b6d6dSopenharmony_ci/**
252e5b6d6dSopenharmony_ci * Bit mask for the titlecasing index adjustment options bit set.
262e5b6d6dSopenharmony_ci * Currently two bits are defined:
272e5b6d6dSopenharmony_ci * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED.
282e5b6d6dSopenharmony_ci * See stringoptions.h.
292e5b6d6dSopenharmony_ci * @internal
302e5b6d6dSopenharmony_ci */
312e5b6d6dSopenharmony_ci#define U_TITLECASE_ADJUSTMENT_MASK 0x600
322e5b6d6dSopenharmony_ci
332e5b6d6dSopenharmony_ci/**
342e5b6d6dSopenharmony_ci * Internal API, used by u_strcasecmp() etc.
352e5b6d6dSopenharmony_ci * Compare strings case-insensitively,
362e5b6d6dSopenharmony_ci * in code point order or code unit order.
372e5b6d6dSopenharmony_ci */
382e5b6d6dSopenharmony_ciU_CFUNC int32_t
392e5b6d6dSopenharmony_ciu_strcmpFold(const UChar *s1, int32_t length1,
402e5b6d6dSopenharmony_ci             const UChar *s2, int32_t length2,
412e5b6d6dSopenharmony_ci             uint32_t options,
422e5b6d6dSopenharmony_ci             UErrorCode *pErrorCode);
432e5b6d6dSopenharmony_ci
442e5b6d6dSopenharmony_ci/**
452e5b6d6dSopenharmony_ci * Internal API, used for detecting length of
462e5b6d6dSopenharmony_ci * shared prefix case-insensitively.
472e5b6d6dSopenharmony_ci * @param s1            input string 1
482e5b6d6dSopenharmony_ci * @param length1       length of string 1, or -1 (NULL terminated)
492e5b6d6dSopenharmony_ci * @param s2            input string 2
502e5b6d6dSopenharmony_ci * @param length2       length of string 2, or -1 (NULL terminated)
512e5b6d6dSopenharmony_ci * @param options       compare options
522e5b6d6dSopenharmony_ci * @param matchLen1     (output) length of partial prefix match in s1
532e5b6d6dSopenharmony_ci * @param matchLen2     (output) length of partial prefix match in s2
542e5b6d6dSopenharmony_ci * @param pErrorCode    receives error status
552e5b6d6dSopenharmony_ci */
562e5b6d6dSopenharmony_ciU_CAPI void
572e5b6d6dSopenharmony_ciu_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
582e5b6d6dSopenharmony_ci                             const UChar *s2, int32_t length2,
592e5b6d6dSopenharmony_ci                             uint32_t options,
602e5b6d6dSopenharmony_ci                             int32_t *matchLen1, int32_t *matchLen2,
612e5b6d6dSopenharmony_ci                             UErrorCode *pErrorCode);
622e5b6d6dSopenharmony_ci
632e5b6d6dSopenharmony_ci#ifdef __cplusplus
642e5b6d6dSopenharmony_ci
652e5b6d6dSopenharmony_ciU_NAMESPACE_BEGIN
662e5b6d6dSopenharmony_ci
672e5b6d6dSopenharmony_ciclass BreakIterator;        // unicode/brkiter.h
682e5b6d6dSopenharmony_ciclass ByteSink;
692e5b6d6dSopenharmony_ciclass Locale;               // unicode/locid.h
702e5b6d6dSopenharmony_ci
712e5b6d6dSopenharmony_ci/** Returns true if the options are valid. Otherwise false, and sets an error. */
722e5b6d6dSopenharmony_ciinline UBool ustrcase_checkTitleAdjustmentOptions(uint32_t options, UErrorCode &errorCode) {
732e5b6d6dSopenharmony_ci    if (U_FAILURE(errorCode)) { return false; }
742e5b6d6dSopenharmony_ci    if ((options & U_TITLECASE_ADJUSTMENT_MASK) == U_TITLECASE_ADJUSTMENT_MASK) {
752e5b6d6dSopenharmony_ci        // Both options together.
762e5b6d6dSopenharmony_ci        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
772e5b6d6dSopenharmony_ci        return false;
782e5b6d6dSopenharmony_ci    }
792e5b6d6dSopenharmony_ci    return true;
802e5b6d6dSopenharmony_ci}
812e5b6d6dSopenharmony_ci
822e5b6d6dSopenharmony_ciinline UBool ustrcase_isLNS(UChar32 c) {
832e5b6d6dSopenharmony_ci    // Letter, number, symbol,
842e5b6d6dSopenharmony_ci    // or a private use code point because those are typically used as letters or numbers.
852e5b6d6dSopenharmony_ci    // Consider modifier letters only if they are cased.
862e5b6d6dSopenharmony_ci    const uint32_t LNS = (U_GC_L_MASK|U_GC_N_MASK|U_GC_S_MASK|U_GC_CO_MASK) & ~U_GC_LM_MASK;
872e5b6d6dSopenharmony_ci    int gc = u_charType(c);
882e5b6d6dSopenharmony_ci    return (U_MASK(gc) & LNS) != 0 || (gc == U_MODIFIER_LETTER && ucase_getType(c) != UCASE_NONE);
892e5b6d6dSopenharmony_ci}
902e5b6d6dSopenharmony_ci
912e5b6d6dSopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION
922e5b6d6dSopenharmony_ci
932e5b6d6dSopenharmony_ci/** Returns nullptr if error. Pass in either locale or locID, not both. */
942e5b6d6dSopenharmony_ciU_CFUNC
952e5b6d6dSopenharmony_ciBreakIterator *ustrcase_getTitleBreakIterator(
962e5b6d6dSopenharmony_ci        const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter,
972e5b6d6dSopenharmony_ci        LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode);
982e5b6d6dSopenharmony_ci
992e5b6d6dSopenharmony_ci#endif
1002e5b6d6dSopenharmony_ci
1012e5b6d6dSopenharmony_ciU_NAMESPACE_END
1022e5b6d6dSopenharmony_ci
1032e5b6d6dSopenharmony_ci#include "unicode/unistr.h"  // for UStringCaseMapper
1042e5b6d6dSopenharmony_ci
1052e5b6d6dSopenharmony_ci/*
1062e5b6d6dSopenharmony_ci * Internal string casing functions implementing
1072e5b6d6dSopenharmony_ci * ustring.h/ustrcase.cpp and UnicodeString case mapping functions.
1082e5b6d6dSopenharmony_ci */
1092e5b6d6dSopenharmony_ci
1102e5b6d6dSopenharmony_cistruct UCaseMap : public icu::UMemory {
1112e5b6d6dSopenharmony_ci    /** Implements most of ucasemap_open(). */
1122e5b6d6dSopenharmony_ci    UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);
1132e5b6d6dSopenharmony_ci    ~UCaseMap();
1142e5b6d6dSopenharmony_ci
1152e5b6d6dSopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION
1162e5b6d6dSopenharmony_ci    icu::BreakIterator *iter;  /* We adopt the iterator, so we own it. */
1172e5b6d6dSopenharmony_ci#endif
1182e5b6d6dSopenharmony_ci    char locale[32];
1192e5b6d6dSopenharmony_ci    int32_t caseLocale;
1202e5b6d6dSopenharmony_ci    uint32_t options;
1212e5b6d6dSopenharmony_ci};
1222e5b6d6dSopenharmony_ci
1232e5b6d6dSopenharmony_ci#if UCONFIG_NO_BREAK_ITERATION
1242e5b6d6dSopenharmony_ci#   define UCASEMAP_BREAK_ITERATOR_PARAM
1252e5b6d6dSopenharmony_ci#   define UCASEMAP_BREAK_ITERATOR_UNUSED
1262e5b6d6dSopenharmony_ci#   define UCASEMAP_BREAK_ITERATOR
1272e5b6d6dSopenharmony_ci#   define UCASEMAP_BREAK_ITERATOR_NULL
1282e5b6d6dSopenharmony_ci#else
1292e5b6d6dSopenharmony_ci#   define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
1302e5b6d6dSopenharmony_ci#   define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
1312e5b6d6dSopenharmony_ci#   define UCASEMAP_BREAK_ITERATOR iter,
1322e5b6d6dSopenharmony_ci#   define UCASEMAP_BREAK_ITERATOR_NULL NULL,
1332e5b6d6dSopenharmony_ci#endif
1342e5b6d6dSopenharmony_ci
1352e5b6d6dSopenharmony_ciU_CFUNC int32_t
1362e5b6d6dSopenharmony_ciustrcase_getCaseLocale(const char *locale);
1372e5b6d6dSopenharmony_ci
1382e5b6d6dSopenharmony_ci// TODO: swap src / dest if approved for new public api
1392e5b6d6dSopenharmony_ci/** Implements UStringCaseMapper. */
1402e5b6d6dSopenharmony_ciU_CFUNC int32_t U_CALLCONV
1412e5b6d6dSopenharmony_ciustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
1422e5b6d6dSopenharmony_ci                         UChar *dest, int32_t destCapacity,
1432e5b6d6dSopenharmony_ci                         const UChar *src, int32_t srcLength,
1442e5b6d6dSopenharmony_ci                         icu::Edits *edits,
1452e5b6d6dSopenharmony_ci                         UErrorCode &errorCode);
1462e5b6d6dSopenharmony_ci
1472e5b6d6dSopenharmony_ci/** Implements UStringCaseMapper. */
1482e5b6d6dSopenharmony_ciU_CFUNC int32_t U_CALLCONV
1492e5b6d6dSopenharmony_ciustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
1502e5b6d6dSopenharmony_ci                         UChar *dest, int32_t destCapacity,
1512e5b6d6dSopenharmony_ci                         const UChar *src, int32_t srcLength,
1522e5b6d6dSopenharmony_ci                         icu::Edits *edits,
1532e5b6d6dSopenharmony_ci                         UErrorCode &errorCode);
1542e5b6d6dSopenharmony_ci
1552e5b6d6dSopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION
1562e5b6d6dSopenharmony_ci
1572e5b6d6dSopenharmony_ci/** Implements UStringCaseMapper. */
1582e5b6d6dSopenharmony_ciU_CFUNC int32_t U_CALLCONV
1592e5b6d6dSopenharmony_ciustrcase_internalToTitle(int32_t caseLocale, uint32_t options,
1602e5b6d6dSopenharmony_ci                         icu::BreakIterator *iter,
1612e5b6d6dSopenharmony_ci                         UChar *dest, int32_t destCapacity,
1622e5b6d6dSopenharmony_ci                         const UChar *src, int32_t srcLength,
1632e5b6d6dSopenharmony_ci                         icu::Edits *edits,
1642e5b6d6dSopenharmony_ci                         UErrorCode &errorCode);
1652e5b6d6dSopenharmony_ci
1662e5b6d6dSopenharmony_ci#endif
1672e5b6d6dSopenharmony_ci
1682e5b6d6dSopenharmony_ci/** Implements UStringCaseMapper. */
1692e5b6d6dSopenharmony_ciU_CFUNC int32_t U_CALLCONV
1702e5b6d6dSopenharmony_ciustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
1712e5b6d6dSopenharmony_ci                      UChar *dest, int32_t destCapacity,
1722e5b6d6dSopenharmony_ci                      const UChar *src, int32_t srcLength,
1732e5b6d6dSopenharmony_ci                      icu::Edits *edits,
1742e5b6d6dSopenharmony_ci                      UErrorCode &errorCode);
1752e5b6d6dSopenharmony_ci
1762e5b6d6dSopenharmony_ci/**
1772e5b6d6dSopenharmony_ci * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
1782e5b6d6dSopenharmony_ci * Implements argument checking.
1792e5b6d6dSopenharmony_ci */
1802e5b6d6dSopenharmony_ciU_CFUNC int32_t
1812e5b6d6dSopenharmony_ciustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
1822e5b6d6dSopenharmony_ci             UChar *dest, int32_t destCapacity,
1832e5b6d6dSopenharmony_ci             const UChar *src, int32_t srcLength,
1842e5b6d6dSopenharmony_ci             UStringCaseMapper *stringCaseMapper,
1852e5b6d6dSopenharmony_ci             icu::Edits *edits,
1862e5b6d6dSopenharmony_ci             UErrorCode &errorCode);
1872e5b6d6dSopenharmony_ci
1882e5b6d6dSopenharmony_ci/**
1892e5b6d6dSopenharmony_ci * Common string case mapping implementation for old-fashioned u_strToXyz() functions
1902e5b6d6dSopenharmony_ci * that allow the source string to overlap the destination buffer.
1912e5b6d6dSopenharmony_ci * Implements argument checking and internally works with an intermediate buffer if necessary.
1922e5b6d6dSopenharmony_ci */
1932e5b6d6dSopenharmony_ciU_CFUNC int32_t
1942e5b6d6dSopenharmony_ciustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
1952e5b6d6dSopenharmony_ci                        UChar *dest, int32_t destCapacity,
1962e5b6d6dSopenharmony_ci                        const UChar *src, int32_t srcLength,
1972e5b6d6dSopenharmony_ci                        UStringCaseMapper *stringCaseMapper,
1982e5b6d6dSopenharmony_ci                        UErrorCode &errorCode);
1992e5b6d6dSopenharmony_ci
2002e5b6d6dSopenharmony_ci/**
2012e5b6d6dSopenharmony_ci * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
2022e5b6d6dSopenharmony_ci * UTF-8 version of UStringCaseMapper.
2032e5b6d6dSopenharmony_ci * All error checking must be done.
2042e5b6d6dSopenharmony_ci * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
2052e5b6d6dSopenharmony_ci */
2062e5b6d6dSopenharmony_citypedef void U_CALLCONV
2072e5b6d6dSopenharmony_ciUTF8CaseMapper(int32_t caseLocale, uint32_t options,
2082e5b6d6dSopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION
2092e5b6d6dSopenharmony_ci               icu::BreakIterator *iter,
2102e5b6d6dSopenharmony_ci#endif
2112e5b6d6dSopenharmony_ci               const uint8_t *src, int32_t srcLength,
2122e5b6d6dSopenharmony_ci               icu::ByteSink &sink, icu::Edits *edits,
2132e5b6d6dSopenharmony_ci               UErrorCode &errorCode);
2142e5b6d6dSopenharmony_ci
2152e5b6d6dSopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION
2162e5b6d6dSopenharmony_ci
2172e5b6d6dSopenharmony_ci/** Implements UTF8CaseMapper. */
2182e5b6d6dSopenharmony_ciU_CFUNC void U_CALLCONV
2192e5b6d6dSopenharmony_ciucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
2202e5b6d6dSopenharmony_ci        icu::BreakIterator *iter,
2212e5b6d6dSopenharmony_ci        const uint8_t *src, int32_t srcLength,
2222e5b6d6dSopenharmony_ci        icu::ByteSink &sink, icu::Edits *edits,
2232e5b6d6dSopenharmony_ci        UErrorCode &errorCode);
2242e5b6d6dSopenharmony_ci
2252e5b6d6dSopenharmony_ci#endif
2262e5b6d6dSopenharmony_ci
2272e5b6d6dSopenharmony_civoid
2282e5b6d6dSopenharmony_ciucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
2292e5b6d6dSopenharmony_ci                 const char *src, int32_t srcLength,
2302e5b6d6dSopenharmony_ci                 UTF8CaseMapper *stringCaseMapper,
2312e5b6d6dSopenharmony_ci                 icu::ByteSink &sink, icu::Edits *edits,
2322e5b6d6dSopenharmony_ci                 UErrorCode &errorCode);
2332e5b6d6dSopenharmony_ci
2342e5b6d6dSopenharmony_ci/**
2352e5b6d6dSopenharmony_ci * Implements argument checking and buffer handling
2362e5b6d6dSopenharmony_ci * for UTF-8 string case mapping as a common function.
2372e5b6d6dSopenharmony_ci */
2382e5b6d6dSopenharmony_ciint32_t
2392e5b6d6dSopenharmony_ciucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
2402e5b6d6dSopenharmony_ci                 char *dest, int32_t destCapacity,
2412e5b6d6dSopenharmony_ci                 const char *src, int32_t srcLength,
2422e5b6d6dSopenharmony_ci                 UTF8CaseMapper *stringCaseMapper,
2432e5b6d6dSopenharmony_ci                 icu::Edits *edits,
2442e5b6d6dSopenharmony_ci                 UErrorCode &errorCode);
2452e5b6d6dSopenharmony_ci
2462e5b6d6dSopenharmony_ciU_NAMESPACE_BEGIN
2472e5b6d6dSopenharmony_cinamespace GreekUpper {
2482e5b6d6dSopenharmony_ci
2492e5b6d6dSopenharmony_ci// Data bits.
2502e5b6d6dSopenharmony_cistatic const uint32_t UPPER_MASK = 0x3ff;
2512e5b6d6dSopenharmony_cistatic const uint32_t HAS_VOWEL = 0x1000;
2522e5b6d6dSopenharmony_cistatic const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
2532e5b6d6dSopenharmony_cistatic const uint32_t HAS_ACCENT = 0x4000;
2542e5b6d6dSopenharmony_cistatic const uint32_t HAS_DIALYTIKA = 0x8000;
2552e5b6d6dSopenharmony_ci// Further bits during data building and processing, not stored in the data map.
2562e5b6d6dSopenharmony_cistatic const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
2572e5b6d6dSopenharmony_cistatic const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
2582e5b6d6dSopenharmony_ci
2592e5b6d6dSopenharmony_cistatic const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
2602e5b6d6dSopenharmony_cistatic const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
2612e5b6d6dSopenharmony_ci        HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
2622e5b6d6dSopenharmony_cistatic const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
2632e5b6d6dSopenharmony_ci
2642e5b6d6dSopenharmony_ci// State bits.
2652e5b6d6dSopenharmony_cistatic const uint32_t AFTER_CASED = 1;
2662e5b6d6dSopenharmony_cistatic const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
2672e5b6d6dSopenharmony_ci
2682e5b6d6dSopenharmony_ciuint32_t getLetterData(UChar32 c);
2692e5b6d6dSopenharmony_ci
2702e5b6d6dSopenharmony_ci/**
2712e5b6d6dSopenharmony_ci * Returns a non-zero value for each of the Greek combining diacritics
2722e5b6d6dSopenharmony_ci * listed in The Unicode Standard, version 8, chapter 7.2 Greek,
2732e5b6d6dSopenharmony_ci * plus some perispomeni look-alikes.
2742e5b6d6dSopenharmony_ci */
2752e5b6d6dSopenharmony_ciuint32_t getDiacriticData(UChar32 c);
2762e5b6d6dSopenharmony_ci
2772e5b6d6dSopenharmony_ci}  // namespace GreekUpper
2782e5b6d6dSopenharmony_ciU_NAMESPACE_END
2792e5b6d6dSopenharmony_ci
2802e5b6d6dSopenharmony_ci#endif  // __cplusplus
2812e5b6d6dSopenharmony_ci
2822e5b6d6dSopenharmony_ci#endif  // __UCASEMAP_IMP_H__
283