11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
31cb0ef41Sopenharmony_ci/*
41cb0ef41Sopenharmony_ci*******************************************************************************
51cb0ef41Sopenharmony_ci*
61cb0ef41Sopenharmony_ci*   Copyright (C) 2009-2014, International Business Machines
71cb0ef41Sopenharmony_ci*   Corporation and others.  All Rights Reserved.
81cb0ef41Sopenharmony_ci*
91cb0ef41Sopenharmony_ci*******************************************************************************
101cb0ef41Sopenharmony_ci*   file name:  normalizer2impl.cpp
111cb0ef41Sopenharmony_ci*   encoding:   UTF-8
121cb0ef41Sopenharmony_ci*   tab size:   8 (not used)
131cb0ef41Sopenharmony_ci*   indentation:4
141cb0ef41Sopenharmony_ci*
151cb0ef41Sopenharmony_ci*   created on: 2009nov22
161cb0ef41Sopenharmony_ci*   created by: Markus W. Scherer
171cb0ef41Sopenharmony_ci*/
181cb0ef41Sopenharmony_ci
191cb0ef41Sopenharmony_ci// #define UCPTRIE_DEBUG
201cb0ef41Sopenharmony_ci
211cb0ef41Sopenharmony_ci#include "unicode/utypes.h"
221cb0ef41Sopenharmony_ci
231cb0ef41Sopenharmony_ci#if !UCONFIG_NO_NORMALIZATION
241cb0ef41Sopenharmony_ci
251cb0ef41Sopenharmony_ci#include "unicode/bytestream.h"
261cb0ef41Sopenharmony_ci#include "unicode/edits.h"
271cb0ef41Sopenharmony_ci#include "unicode/normalizer2.h"
281cb0ef41Sopenharmony_ci#include "unicode/stringoptions.h"
291cb0ef41Sopenharmony_ci#include "unicode/ucptrie.h"
301cb0ef41Sopenharmony_ci#include "unicode/udata.h"
311cb0ef41Sopenharmony_ci#include "unicode/umutablecptrie.h"
321cb0ef41Sopenharmony_ci#include "unicode/ustring.h"
331cb0ef41Sopenharmony_ci#include "unicode/utf16.h"
341cb0ef41Sopenharmony_ci#include "unicode/utf8.h"
351cb0ef41Sopenharmony_ci#include "bytesinkutil.h"
361cb0ef41Sopenharmony_ci#include "cmemory.h"
371cb0ef41Sopenharmony_ci#include "mutex.h"
381cb0ef41Sopenharmony_ci#include "normalizer2impl.h"
391cb0ef41Sopenharmony_ci#include "putilimp.h"
401cb0ef41Sopenharmony_ci#include "uassert.h"
411cb0ef41Sopenharmony_ci#include "ucptrie_impl.h"
421cb0ef41Sopenharmony_ci#include "uset_imp.h"
431cb0ef41Sopenharmony_ci#include "uvector.h"
441cb0ef41Sopenharmony_ci
451cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN
461cb0ef41Sopenharmony_ci
471cb0ef41Sopenharmony_cinamespace {
481cb0ef41Sopenharmony_ci
491cb0ef41Sopenharmony_ci/**
501cb0ef41Sopenharmony_ci * UTF-8 lead byte for minNoMaybeCP.
511cb0ef41Sopenharmony_ci * Can be lower than the actual lead byte for c.
521cb0ef41Sopenharmony_ci * Typically U+0300 for NFC/NFD, U+00A0 for NFKC/NFKD, U+0041 for NFKC_Casefold.
531cb0ef41Sopenharmony_ci */
541cb0ef41Sopenharmony_ciinline uint8_t leadByteForCP(UChar32 c) {
551cb0ef41Sopenharmony_ci    if (c <= 0x7f) {
561cb0ef41Sopenharmony_ci        return (uint8_t)c;
571cb0ef41Sopenharmony_ci    } else if (c <= 0x7ff) {
581cb0ef41Sopenharmony_ci        return (uint8_t)(0xc0+(c>>6));
591cb0ef41Sopenharmony_ci    } else {
601cb0ef41Sopenharmony_ci        // Should not occur because ccc(U+0300)!=0.
611cb0ef41Sopenharmony_ci        return 0xe0;
621cb0ef41Sopenharmony_ci    }
631cb0ef41Sopenharmony_ci}
641cb0ef41Sopenharmony_ci
651cb0ef41Sopenharmony_ci/**
661cb0ef41Sopenharmony_ci * Returns the code point from one single well-formed UTF-8 byte sequence
671cb0ef41Sopenharmony_ci * between cpStart and cpLimit.
681cb0ef41Sopenharmony_ci *
691cb0ef41Sopenharmony_ci * Trie UTF-8 macros do not assemble whole code points (for efficiency).
701cb0ef41Sopenharmony_ci * When we do need the code point, we call this function.
711cb0ef41Sopenharmony_ci * We should not need it for normalization-inert data (norm16==0).
721cb0ef41Sopenharmony_ci * Illegal sequences yield the error value norm16==0 just like real normalization-inert code points.
731cb0ef41Sopenharmony_ci */
741cb0ef41Sopenharmony_ciUChar32 codePointFromValidUTF8(const uint8_t *cpStart, const uint8_t *cpLimit) {
751cb0ef41Sopenharmony_ci    // Similar to U8_NEXT_UNSAFE(s, i, c).
761cb0ef41Sopenharmony_ci    U_ASSERT(cpStart < cpLimit);
771cb0ef41Sopenharmony_ci    uint8_t c = *cpStart;
781cb0ef41Sopenharmony_ci    switch(cpLimit-cpStart) {
791cb0ef41Sopenharmony_ci    case 1:
801cb0ef41Sopenharmony_ci        return c;
811cb0ef41Sopenharmony_ci    case 2:
821cb0ef41Sopenharmony_ci        return ((c&0x1f)<<6) | (cpStart[1]&0x3f);
831cb0ef41Sopenharmony_ci    case 3:
841cb0ef41Sopenharmony_ci        // no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (char16_t)
851cb0ef41Sopenharmony_ci        return (char16_t)((c<<12) | ((cpStart[1]&0x3f)<<6) | (cpStart[2]&0x3f));
861cb0ef41Sopenharmony_ci    case 4:
871cb0ef41Sopenharmony_ci        return ((c&7)<<18) | ((cpStart[1]&0x3f)<<12) | ((cpStart[2]&0x3f)<<6) | (cpStart[3]&0x3f);
881cb0ef41Sopenharmony_ci    default:
891cb0ef41Sopenharmony_ci        UPRV_UNREACHABLE_EXIT;  // Should not occur.
901cb0ef41Sopenharmony_ci    }
911cb0ef41Sopenharmony_ci}
921cb0ef41Sopenharmony_ci
931cb0ef41Sopenharmony_ci/**
941cb0ef41Sopenharmony_ci * Returns the last code point in [start, p[ if it is valid and in U+1000..U+D7FF.
951cb0ef41Sopenharmony_ci * Otherwise returns a negative value.
961cb0ef41Sopenharmony_ci */
971cb0ef41Sopenharmony_ciUChar32 previousHangulOrJamo(const uint8_t *start, const uint8_t *p) {
981cb0ef41Sopenharmony_ci    if ((p - start) >= 3) {
991cb0ef41Sopenharmony_ci        p -= 3;
1001cb0ef41Sopenharmony_ci        uint8_t l = *p;
1011cb0ef41Sopenharmony_ci        uint8_t t1, t2;
1021cb0ef41Sopenharmony_ci        if (0xe1 <= l && l <= 0xed &&
1031cb0ef41Sopenharmony_ci                (t1 = (uint8_t)(p[1] - 0x80)) <= 0x3f &&
1041cb0ef41Sopenharmony_ci                (t2 = (uint8_t)(p[2] - 0x80)) <= 0x3f &&
1051cb0ef41Sopenharmony_ci                (l < 0xed || t1 <= 0x1f)) {
1061cb0ef41Sopenharmony_ci            return ((l & 0xf) << 12) | (t1 << 6) | t2;
1071cb0ef41Sopenharmony_ci        }
1081cb0ef41Sopenharmony_ci    }
1091cb0ef41Sopenharmony_ci    return U_SENTINEL;
1101cb0ef41Sopenharmony_ci}
1111cb0ef41Sopenharmony_ci
1121cb0ef41Sopenharmony_ci/**
1131cb0ef41Sopenharmony_ci * Returns the offset from the Jamo T base if [src, limit[ starts with a single Jamo T code point.
1141cb0ef41Sopenharmony_ci * Otherwise returns a negative value.
1151cb0ef41Sopenharmony_ci */
1161cb0ef41Sopenharmony_ciint32_t getJamoTMinusBase(const uint8_t *src, const uint8_t *limit) {
1171cb0ef41Sopenharmony_ci    // Jamo T: E1 86 A8..E1 87 82
1181cb0ef41Sopenharmony_ci    if ((limit - src) >= 3 && *src == 0xe1) {
1191cb0ef41Sopenharmony_ci        if (src[1] == 0x86) {
1201cb0ef41Sopenharmony_ci            uint8_t t = src[2];
1211cb0ef41Sopenharmony_ci            // The first Jamo T is U+11A8 but JAMO_T_BASE is 11A7.
1221cb0ef41Sopenharmony_ci            // Offset 0 does not correspond to any conjoining Jamo.
1231cb0ef41Sopenharmony_ci            if (0xa8 <= t && t <= 0xbf) {
1241cb0ef41Sopenharmony_ci                return t - 0xa7;
1251cb0ef41Sopenharmony_ci            }
1261cb0ef41Sopenharmony_ci        } else if (src[1] == 0x87) {
1271cb0ef41Sopenharmony_ci            uint8_t t = src[2];
1281cb0ef41Sopenharmony_ci            if ((int8_t)t <= (int8_t)0x82u) {
1291cb0ef41Sopenharmony_ci                return t - (0xa7 - 0x40);
1301cb0ef41Sopenharmony_ci            }
1311cb0ef41Sopenharmony_ci        }
1321cb0ef41Sopenharmony_ci    }
1331cb0ef41Sopenharmony_ci    return -1;
1341cb0ef41Sopenharmony_ci}
1351cb0ef41Sopenharmony_ci
1361cb0ef41Sopenharmony_civoid
1371cb0ef41Sopenharmony_ciappendCodePointDelta(const uint8_t *cpStart, const uint8_t *cpLimit, int32_t delta,
1381cb0ef41Sopenharmony_ci                     ByteSink &sink, Edits *edits) {
1391cb0ef41Sopenharmony_ci    char buffer[U8_MAX_LENGTH];
1401cb0ef41Sopenharmony_ci    int32_t length;
1411cb0ef41Sopenharmony_ci    int32_t cpLength = (int32_t)(cpLimit - cpStart);
1421cb0ef41Sopenharmony_ci    if (cpLength == 1) {
1431cb0ef41Sopenharmony_ci        // The builder makes ASCII map to ASCII.
1441cb0ef41Sopenharmony_ci        buffer[0] = (uint8_t)(*cpStart + delta);
1451cb0ef41Sopenharmony_ci        length = 1;
1461cb0ef41Sopenharmony_ci    } else {
1471cb0ef41Sopenharmony_ci        int32_t trail = *(cpLimit-1) + delta;
1481cb0ef41Sopenharmony_ci        if (0x80 <= trail && trail <= 0xbf) {
1491cb0ef41Sopenharmony_ci            // The delta only changes the last trail byte.
1501cb0ef41Sopenharmony_ci            --cpLimit;
1511cb0ef41Sopenharmony_ci            length = 0;
1521cb0ef41Sopenharmony_ci            do { buffer[length++] = *cpStart++; } while (cpStart < cpLimit);
1531cb0ef41Sopenharmony_ci            buffer[length++] = (uint8_t)trail;
1541cb0ef41Sopenharmony_ci        } else {
1551cb0ef41Sopenharmony_ci            // Decode the code point, add the delta, re-encode.
1561cb0ef41Sopenharmony_ci            UChar32 c = codePointFromValidUTF8(cpStart, cpLimit) + delta;
1571cb0ef41Sopenharmony_ci            length = 0;
1581cb0ef41Sopenharmony_ci            U8_APPEND_UNSAFE(buffer, length, c);
1591cb0ef41Sopenharmony_ci        }
1601cb0ef41Sopenharmony_ci    }
1611cb0ef41Sopenharmony_ci    if (edits != nullptr) {
1621cb0ef41Sopenharmony_ci        edits->addReplace(cpLength, length);
1631cb0ef41Sopenharmony_ci    }
1641cb0ef41Sopenharmony_ci    sink.Append(buffer, length);
1651cb0ef41Sopenharmony_ci}
1661cb0ef41Sopenharmony_ci
1671cb0ef41Sopenharmony_ci}  // namespace
1681cb0ef41Sopenharmony_ci
1691cb0ef41Sopenharmony_ci// ReorderingBuffer -------------------------------------------------------- ***
1701cb0ef41Sopenharmony_ci
1711cb0ef41Sopenharmony_ciReorderingBuffer::ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest,
1721cb0ef41Sopenharmony_ci                                   UErrorCode &errorCode) :
1731cb0ef41Sopenharmony_ci        impl(ni), str(dest),
1741cb0ef41Sopenharmony_ci        start(str.getBuffer(8)), reorderStart(start), limit(start),
1751cb0ef41Sopenharmony_ci        remainingCapacity(str.getCapacity()), lastCC(0) {
1761cb0ef41Sopenharmony_ci    if (start == nullptr && U_SUCCESS(errorCode)) {
1771cb0ef41Sopenharmony_ci        // getBuffer() already did str.setToBogus()
1781cb0ef41Sopenharmony_ci        errorCode = U_MEMORY_ALLOCATION_ERROR;
1791cb0ef41Sopenharmony_ci    }
1801cb0ef41Sopenharmony_ci}
1811cb0ef41Sopenharmony_ci
1821cb0ef41Sopenharmony_ciUBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) {
1831cb0ef41Sopenharmony_ci    int32_t length=str.length();
1841cb0ef41Sopenharmony_ci    start=str.getBuffer(destCapacity);
1851cb0ef41Sopenharmony_ci    if(start==nullptr) {
1861cb0ef41Sopenharmony_ci        // getBuffer() already did str.setToBogus()
1871cb0ef41Sopenharmony_ci        errorCode=U_MEMORY_ALLOCATION_ERROR;
1881cb0ef41Sopenharmony_ci        return false;
1891cb0ef41Sopenharmony_ci    }
1901cb0ef41Sopenharmony_ci    limit=start+length;
1911cb0ef41Sopenharmony_ci    remainingCapacity=str.getCapacity()-length;
1921cb0ef41Sopenharmony_ci    reorderStart=start;
1931cb0ef41Sopenharmony_ci    if(start==limit) {
1941cb0ef41Sopenharmony_ci        lastCC=0;
1951cb0ef41Sopenharmony_ci    } else {
1961cb0ef41Sopenharmony_ci        setIterator();
1971cb0ef41Sopenharmony_ci        lastCC=previousCC();
1981cb0ef41Sopenharmony_ci        // Set reorderStart after the last code point with cc<=1 if there is one.
1991cb0ef41Sopenharmony_ci        if(lastCC>1) {
2001cb0ef41Sopenharmony_ci            while(previousCC()>1) {}
2011cb0ef41Sopenharmony_ci        }
2021cb0ef41Sopenharmony_ci        reorderStart=codePointLimit;
2031cb0ef41Sopenharmony_ci    }
2041cb0ef41Sopenharmony_ci    return true;
2051cb0ef41Sopenharmony_ci}
2061cb0ef41Sopenharmony_ci
2071cb0ef41Sopenharmony_ciUBool ReorderingBuffer::equals(const char16_t *otherStart, const char16_t *otherLimit) const {
2081cb0ef41Sopenharmony_ci    int32_t length=(int32_t)(limit-start);
2091cb0ef41Sopenharmony_ci    return
2101cb0ef41Sopenharmony_ci        length==(int32_t)(otherLimit-otherStart) &&
2111cb0ef41Sopenharmony_ci        0==u_memcmp(start, otherStart, length);
2121cb0ef41Sopenharmony_ci}
2131cb0ef41Sopenharmony_ci
2141cb0ef41Sopenharmony_ciUBool ReorderingBuffer::equals(const uint8_t *otherStart, const uint8_t *otherLimit) const {
2151cb0ef41Sopenharmony_ci    U_ASSERT((otherLimit - otherStart) <= INT32_MAX);  // ensured by caller
2161cb0ef41Sopenharmony_ci    int32_t length = (int32_t)(limit - start);
2171cb0ef41Sopenharmony_ci    int32_t otherLength = (int32_t)(otherLimit - otherStart);
2181cb0ef41Sopenharmony_ci    // For equal strings, UTF-8 is at least as long as UTF-16, and at most three times as long.
2191cb0ef41Sopenharmony_ci    if (otherLength < length || (otherLength / 3) > length) {
2201cb0ef41Sopenharmony_ci        return false;
2211cb0ef41Sopenharmony_ci    }
2221cb0ef41Sopenharmony_ci    // Compare valid strings from between normalization boundaries.
2231cb0ef41Sopenharmony_ci    // (Invalid sequences are normalization-inert.)
2241cb0ef41Sopenharmony_ci    for (int32_t i = 0, j = 0;;) {
2251cb0ef41Sopenharmony_ci        if (i >= length) {
2261cb0ef41Sopenharmony_ci            return j >= otherLength;
2271cb0ef41Sopenharmony_ci        } else if (j >= otherLength) {
2281cb0ef41Sopenharmony_ci            return false;
2291cb0ef41Sopenharmony_ci        }
2301cb0ef41Sopenharmony_ci        // Not at the end of either string yet.
2311cb0ef41Sopenharmony_ci        UChar32 c, other;
2321cb0ef41Sopenharmony_ci        U16_NEXT_UNSAFE(start, i, c);
2331cb0ef41Sopenharmony_ci        U8_NEXT_UNSAFE(otherStart, j, other);
2341cb0ef41Sopenharmony_ci        if (c != other) {
2351cb0ef41Sopenharmony_ci            return false;
2361cb0ef41Sopenharmony_ci        }
2371cb0ef41Sopenharmony_ci    }
2381cb0ef41Sopenharmony_ci}
2391cb0ef41Sopenharmony_ci
2401cb0ef41Sopenharmony_ciUBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
2411cb0ef41Sopenharmony_ci    if(remainingCapacity<2 && !resize(2, errorCode)) {
2421cb0ef41Sopenharmony_ci        return false;
2431cb0ef41Sopenharmony_ci    }
2441cb0ef41Sopenharmony_ci    if(lastCC<=cc || cc==0) {
2451cb0ef41Sopenharmony_ci        limit[0]=U16_LEAD(c);
2461cb0ef41Sopenharmony_ci        limit[1]=U16_TRAIL(c);
2471cb0ef41Sopenharmony_ci        limit+=2;
2481cb0ef41Sopenharmony_ci        lastCC=cc;
2491cb0ef41Sopenharmony_ci        if(cc<=1) {
2501cb0ef41Sopenharmony_ci            reorderStart=limit;
2511cb0ef41Sopenharmony_ci        }
2521cb0ef41Sopenharmony_ci    } else {
2531cb0ef41Sopenharmony_ci        insert(c, cc);
2541cb0ef41Sopenharmony_ci    }
2551cb0ef41Sopenharmony_ci    remainingCapacity-=2;
2561cb0ef41Sopenharmony_ci    return true;
2571cb0ef41Sopenharmony_ci}
2581cb0ef41Sopenharmony_ci
2591cb0ef41Sopenharmony_ciUBool ReorderingBuffer::append(const char16_t *s, int32_t length, UBool isNFD,
2601cb0ef41Sopenharmony_ci                               uint8_t leadCC, uint8_t trailCC,
2611cb0ef41Sopenharmony_ci                               UErrorCode &errorCode) {
2621cb0ef41Sopenharmony_ci    if(length==0) {
2631cb0ef41Sopenharmony_ci        return true;
2641cb0ef41Sopenharmony_ci    }
2651cb0ef41Sopenharmony_ci    if(remainingCapacity<length && !resize(length, errorCode)) {
2661cb0ef41Sopenharmony_ci        return false;
2671cb0ef41Sopenharmony_ci    }
2681cb0ef41Sopenharmony_ci    remainingCapacity-=length;
2691cb0ef41Sopenharmony_ci    if(lastCC<=leadCC || leadCC==0) {
2701cb0ef41Sopenharmony_ci        if(trailCC<=1) {
2711cb0ef41Sopenharmony_ci            reorderStart=limit+length;
2721cb0ef41Sopenharmony_ci        } else if(leadCC<=1) {
2731cb0ef41Sopenharmony_ci            reorderStart=limit+1;  // Ok if not a code point boundary.
2741cb0ef41Sopenharmony_ci        }
2751cb0ef41Sopenharmony_ci        const char16_t *sLimit=s+length;
2761cb0ef41Sopenharmony_ci        do { *limit++=*s++; } while(s!=sLimit);
2771cb0ef41Sopenharmony_ci        lastCC=trailCC;
2781cb0ef41Sopenharmony_ci    } else {
2791cb0ef41Sopenharmony_ci        int32_t i=0;
2801cb0ef41Sopenharmony_ci        UChar32 c;
2811cb0ef41Sopenharmony_ci        U16_NEXT(s, i, length, c);
2821cb0ef41Sopenharmony_ci        insert(c, leadCC);  // insert first code point
2831cb0ef41Sopenharmony_ci        while(i<length) {
2841cb0ef41Sopenharmony_ci            U16_NEXT(s, i, length, c);
2851cb0ef41Sopenharmony_ci            if(i<length) {
2861cb0ef41Sopenharmony_ci                if (isNFD) {
2871cb0ef41Sopenharmony_ci                    leadCC = Normalizer2Impl::getCCFromYesOrMaybe(impl.getRawNorm16(c));
2881cb0ef41Sopenharmony_ci                } else {
2891cb0ef41Sopenharmony_ci                    leadCC = impl.getCC(impl.getNorm16(c));
2901cb0ef41Sopenharmony_ci                }
2911cb0ef41Sopenharmony_ci            } else {
2921cb0ef41Sopenharmony_ci                leadCC=trailCC;
2931cb0ef41Sopenharmony_ci            }
2941cb0ef41Sopenharmony_ci            append(c, leadCC, errorCode);
2951cb0ef41Sopenharmony_ci        }
2961cb0ef41Sopenharmony_ci    }
2971cb0ef41Sopenharmony_ci    return true;
2981cb0ef41Sopenharmony_ci}
2991cb0ef41Sopenharmony_ci
3001cb0ef41Sopenharmony_ciUBool ReorderingBuffer::appendZeroCC(UChar32 c, UErrorCode &errorCode) {
3011cb0ef41Sopenharmony_ci    int32_t cpLength=U16_LENGTH(c);
3021cb0ef41Sopenharmony_ci    if(remainingCapacity<cpLength && !resize(cpLength, errorCode)) {
3031cb0ef41Sopenharmony_ci        return false;
3041cb0ef41Sopenharmony_ci    }
3051cb0ef41Sopenharmony_ci    remainingCapacity-=cpLength;
3061cb0ef41Sopenharmony_ci    if(cpLength==1) {
3071cb0ef41Sopenharmony_ci        *limit++=(char16_t)c;
3081cb0ef41Sopenharmony_ci    } else {
3091cb0ef41Sopenharmony_ci        limit[0]=U16_LEAD(c);
3101cb0ef41Sopenharmony_ci        limit[1]=U16_TRAIL(c);
3111cb0ef41Sopenharmony_ci        limit+=2;
3121cb0ef41Sopenharmony_ci    }
3131cb0ef41Sopenharmony_ci    lastCC=0;
3141cb0ef41Sopenharmony_ci    reorderStart=limit;
3151cb0ef41Sopenharmony_ci    return true;
3161cb0ef41Sopenharmony_ci}
3171cb0ef41Sopenharmony_ci
3181cb0ef41Sopenharmony_ciUBool ReorderingBuffer::appendZeroCC(const char16_t *s, const char16_t *sLimit, UErrorCode &errorCode) {
3191cb0ef41Sopenharmony_ci    if(s==sLimit) {
3201cb0ef41Sopenharmony_ci        return true;
3211cb0ef41Sopenharmony_ci    }
3221cb0ef41Sopenharmony_ci    int32_t length=(int32_t)(sLimit-s);
3231cb0ef41Sopenharmony_ci    if(remainingCapacity<length && !resize(length, errorCode)) {
3241cb0ef41Sopenharmony_ci        return false;
3251cb0ef41Sopenharmony_ci    }
3261cb0ef41Sopenharmony_ci    u_memcpy(limit, s, length);
3271cb0ef41Sopenharmony_ci    limit+=length;
3281cb0ef41Sopenharmony_ci    remainingCapacity-=length;
3291cb0ef41Sopenharmony_ci    lastCC=0;
3301cb0ef41Sopenharmony_ci    reorderStart=limit;
3311cb0ef41Sopenharmony_ci    return true;
3321cb0ef41Sopenharmony_ci}
3331cb0ef41Sopenharmony_ci
3341cb0ef41Sopenharmony_civoid ReorderingBuffer::remove() {
3351cb0ef41Sopenharmony_ci    reorderStart=limit=start;
3361cb0ef41Sopenharmony_ci    remainingCapacity=str.getCapacity();
3371cb0ef41Sopenharmony_ci    lastCC=0;
3381cb0ef41Sopenharmony_ci}
3391cb0ef41Sopenharmony_ci
3401cb0ef41Sopenharmony_civoid ReorderingBuffer::removeSuffix(int32_t suffixLength) {
3411cb0ef41Sopenharmony_ci    if(suffixLength<(limit-start)) {
3421cb0ef41Sopenharmony_ci        limit-=suffixLength;
3431cb0ef41Sopenharmony_ci        remainingCapacity+=suffixLength;
3441cb0ef41Sopenharmony_ci    } else {
3451cb0ef41Sopenharmony_ci        limit=start;
3461cb0ef41Sopenharmony_ci        remainingCapacity=str.getCapacity();
3471cb0ef41Sopenharmony_ci    }
3481cb0ef41Sopenharmony_ci    lastCC=0;
3491cb0ef41Sopenharmony_ci    reorderStart=limit;
3501cb0ef41Sopenharmony_ci}
3511cb0ef41Sopenharmony_ci
3521cb0ef41Sopenharmony_ciUBool ReorderingBuffer::resize(int32_t appendLength, UErrorCode &errorCode) {
3531cb0ef41Sopenharmony_ci    int32_t reorderStartIndex=(int32_t)(reorderStart-start);
3541cb0ef41Sopenharmony_ci    int32_t length=(int32_t)(limit-start);
3551cb0ef41Sopenharmony_ci    str.releaseBuffer(length);
3561cb0ef41Sopenharmony_ci    int32_t newCapacity=length+appendLength;
3571cb0ef41Sopenharmony_ci    int32_t doubleCapacity=2*str.getCapacity();
3581cb0ef41Sopenharmony_ci    if(newCapacity<doubleCapacity) {
3591cb0ef41Sopenharmony_ci        newCapacity=doubleCapacity;
3601cb0ef41Sopenharmony_ci    }
3611cb0ef41Sopenharmony_ci    if(newCapacity<256) {
3621cb0ef41Sopenharmony_ci        newCapacity=256;
3631cb0ef41Sopenharmony_ci    }
3641cb0ef41Sopenharmony_ci    start=str.getBuffer(newCapacity);
3651cb0ef41Sopenharmony_ci    if(start==nullptr) {
3661cb0ef41Sopenharmony_ci        // getBuffer() already did str.setToBogus()
3671cb0ef41Sopenharmony_ci        errorCode=U_MEMORY_ALLOCATION_ERROR;
3681cb0ef41Sopenharmony_ci        return false;
3691cb0ef41Sopenharmony_ci    }
3701cb0ef41Sopenharmony_ci    reorderStart=start+reorderStartIndex;
3711cb0ef41Sopenharmony_ci    limit=start+length;
3721cb0ef41Sopenharmony_ci    remainingCapacity=str.getCapacity()-length;
3731cb0ef41Sopenharmony_ci    return true;
3741cb0ef41Sopenharmony_ci}
3751cb0ef41Sopenharmony_ci
3761cb0ef41Sopenharmony_civoid ReorderingBuffer::skipPrevious() {
3771cb0ef41Sopenharmony_ci    codePointLimit=codePointStart;
3781cb0ef41Sopenharmony_ci    char16_t c=*--codePointStart;
3791cb0ef41Sopenharmony_ci    if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(*(codePointStart-1))) {
3801cb0ef41Sopenharmony_ci        --codePointStart;
3811cb0ef41Sopenharmony_ci    }
3821cb0ef41Sopenharmony_ci}
3831cb0ef41Sopenharmony_ci
3841cb0ef41Sopenharmony_ciuint8_t ReorderingBuffer::previousCC() {
3851cb0ef41Sopenharmony_ci    codePointLimit=codePointStart;
3861cb0ef41Sopenharmony_ci    if(reorderStart>=codePointStart) {
3871cb0ef41Sopenharmony_ci        return 0;
3881cb0ef41Sopenharmony_ci    }
3891cb0ef41Sopenharmony_ci    UChar32 c=*--codePointStart;
3901cb0ef41Sopenharmony_ci    char16_t c2;
3911cb0ef41Sopenharmony_ci    if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(c2=*(codePointStart-1))) {
3921cb0ef41Sopenharmony_ci        --codePointStart;
3931cb0ef41Sopenharmony_ci        c=U16_GET_SUPPLEMENTARY(c2, c);
3941cb0ef41Sopenharmony_ci    }
3951cb0ef41Sopenharmony_ci    return impl.getCCFromYesOrMaybeCP(c);
3961cb0ef41Sopenharmony_ci}
3971cb0ef41Sopenharmony_ci
3981cb0ef41Sopenharmony_ci// Inserts c somewhere before the last character.
3991cb0ef41Sopenharmony_ci// Requires 0<cc<lastCC which implies reorderStart<limit.
4001cb0ef41Sopenharmony_civoid ReorderingBuffer::insert(UChar32 c, uint8_t cc) {
4011cb0ef41Sopenharmony_ci    for(setIterator(), skipPrevious(); previousCC()>cc;) {}
4021cb0ef41Sopenharmony_ci    // insert c at codePointLimit, after the character with prevCC<=cc
4031cb0ef41Sopenharmony_ci    char16_t *q=limit;
4041cb0ef41Sopenharmony_ci    char16_t *r=limit+=U16_LENGTH(c);
4051cb0ef41Sopenharmony_ci    do {
4061cb0ef41Sopenharmony_ci        *--r=*--q;
4071cb0ef41Sopenharmony_ci    } while(codePointLimit!=q);
4081cb0ef41Sopenharmony_ci    writeCodePoint(q, c);
4091cb0ef41Sopenharmony_ci    if(cc<=1) {
4101cb0ef41Sopenharmony_ci        reorderStart=r;
4111cb0ef41Sopenharmony_ci    }
4121cb0ef41Sopenharmony_ci}
4131cb0ef41Sopenharmony_ci
4141cb0ef41Sopenharmony_ci// Normalizer2Impl --------------------------------------------------------- ***
4151cb0ef41Sopenharmony_ci
4161cb0ef41Sopenharmony_cistruct CanonIterData : public UMemory {
4171cb0ef41Sopenharmony_ci    CanonIterData(UErrorCode &errorCode);
4181cb0ef41Sopenharmony_ci    ~CanonIterData();
4191cb0ef41Sopenharmony_ci    void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode);
4201cb0ef41Sopenharmony_ci    UMutableCPTrie *mutableTrie;
4211cb0ef41Sopenharmony_ci    UCPTrie *trie;
4221cb0ef41Sopenharmony_ci    UVector canonStartSets;  // contains UnicodeSet *
4231cb0ef41Sopenharmony_ci};
4241cb0ef41Sopenharmony_ci
4251cb0ef41Sopenharmony_ciNormalizer2Impl::~Normalizer2Impl() {
4261cb0ef41Sopenharmony_ci    delete fCanonIterData;
4271cb0ef41Sopenharmony_ci}
4281cb0ef41Sopenharmony_ci
4291cb0ef41Sopenharmony_civoid
4301cb0ef41Sopenharmony_ciNormalizer2Impl::init(const int32_t *inIndexes, const UCPTrie *inTrie,
4311cb0ef41Sopenharmony_ci                      const uint16_t *inExtraData, const uint8_t *inSmallFCD) {
4321cb0ef41Sopenharmony_ci    minDecompNoCP = static_cast<char16_t>(inIndexes[IX_MIN_DECOMP_NO_CP]);
4331cb0ef41Sopenharmony_ci    minCompNoMaybeCP = static_cast<char16_t>(inIndexes[IX_MIN_COMP_NO_MAYBE_CP]);
4341cb0ef41Sopenharmony_ci    minLcccCP = static_cast<char16_t>(inIndexes[IX_MIN_LCCC_CP]);
4351cb0ef41Sopenharmony_ci
4361cb0ef41Sopenharmony_ci    minYesNo = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO]);
4371cb0ef41Sopenharmony_ci    minYesNoMappingsOnly = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]);
4381cb0ef41Sopenharmony_ci    minNoNo = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO]);
4391cb0ef41Sopenharmony_ci    minNoNoCompBoundaryBefore = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]);
4401cb0ef41Sopenharmony_ci    minNoNoCompNoMaybeCC = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]);
4411cb0ef41Sopenharmony_ci    minNoNoEmpty = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_EMPTY]);
4421cb0ef41Sopenharmony_ci    limitNoNo = static_cast<uint16_t>(inIndexes[IX_LIMIT_NO_NO]);
4431cb0ef41Sopenharmony_ci    minMaybeYes = static_cast<uint16_t>(inIndexes[IX_MIN_MAYBE_YES]);
4441cb0ef41Sopenharmony_ci    U_ASSERT((minMaybeYes & 7) == 0);  // 8-aligned for noNoDelta bit fields
4451cb0ef41Sopenharmony_ci    centerNoNoDelta = (minMaybeYes >> DELTA_SHIFT) - MAX_DELTA - 1;
4461cb0ef41Sopenharmony_ci
4471cb0ef41Sopenharmony_ci    normTrie=inTrie;
4481cb0ef41Sopenharmony_ci
4491cb0ef41Sopenharmony_ci    maybeYesCompositions=inExtraData;
4501cb0ef41Sopenharmony_ci    extraData=maybeYesCompositions+((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT);
4511cb0ef41Sopenharmony_ci
4521cb0ef41Sopenharmony_ci    smallFCD=inSmallFCD;
4531cb0ef41Sopenharmony_ci}
4541cb0ef41Sopenharmony_ci
4551cb0ef41Sopenharmony_ciU_CDECL_BEGIN
4561cb0ef41Sopenharmony_ci
4571cb0ef41Sopenharmony_cistatic uint32_t U_CALLCONV
4581cb0ef41Sopenharmony_cisegmentStarterMapper(const void * /*context*/, uint32_t value) {
4591cb0ef41Sopenharmony_ci    return value&CANON_NOT_SEGMENT_STARTER;
4601cb0ef41Sopenharmony_ci}
4611cb0ef41Sopenharmony_ci
4621cb0ef41Sopenharmony_ciU_CDECL_END
4631cb0ef41Sopenharmony_ci
4641cb0ef41Sopenharmony_civoid
4651cb0ef41Sopenharmony_ciNormalizer2Impl::addLcccChars(UnicodeSet &set) const {
4661cb0ef41Sopenharmony_ci    UChar32 start = 0, end;
4671cb0ef41Sopenharmony_ci    uint32_t norm16;
4681cb0ef41Sopenharmony_ci    while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
4691cb0ef41Sopenharmony_ci                                   nullptr, nullptr, &norm16)) >= 0) {
4701cb0ef41Sopenharmony_ci        if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES &&
4711cb0ef41Sopenharmony_ci                norm16 != Normalizer2Impl::JAMO_VT) {
4721cb0ef41Sopenharmony_ci            set.add(start, end);
4731cb0ef41Sopenharmony_ci        } else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) {
4741cb0ef41Sopenharmony_ci            uint16_t fcd16 = getFCD16(start);
4751cb0ef41Sopenharmony_ci            if (fcd16 > 0xff) { set.add(start, end); }
4761cb0ef41Sopenharmony_ci        }
4771cb0ef41Sopenharmony_ci        start = end + 1;
4781cb0ef41Sopenharmony_ci    }
4791cb0ef41Sopenharmony_ci}
4801cb0ef41Sopenharmony_ci
4811cb0ef41Sopenharmony_civoid
4821cb0ef41Sopenharmony_ciNormalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {
4831cb0ef41Sopenharmony_ci    // Add the start code point of each same-value range of the trie.
4841cb0ef41Sopenharmony_ci    UChar32 start = 0, end;
4851cb0ef41Sopenharmony_ci    uint32_t value;
4861cb0ef41Sopenharmony_ci    while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
4871cb0ef41Sopenharmony_ci                                   nullptr, nullptr, &value)) >= 0) {
4881cb0ef41Sopenharmony_ci        sa->add(sa->set, start);
4891cb0ef41Sopenharmony_ci        if (start != end && isAlgorithmicNoNo((uint16_t)value) &&
4901cb0ef41Sopenharmony_ci                (value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) {
4911cb0ef41Sopenharmony_ci            // Range of code points with same-norm16-value algorithmic decompositions.
4921cb0ef41Sopenharmony_ci            // They might have different non-zero FCD16 values.
4931cb0ef41Sopenharmony_ci            uint16_t prevFCD16 = getFCD16(start);
4941cb0ef41Sopenharmony_ci            while (++start <= end) {
4951cb0ef41Sopenharmony_ci                uint16_t fcd16 = getFCD16(start);
4961cb0ef41Sopenharmony_ci                if (fcd16 != prevFCD16) {
4971cb0ef41Sopenharmony_ci                    sa->add(sa->set, start);
4981cb0ef41Sopenharmony_ci                    prevFCD16 = fcd16;
4991cb0ef41Sopenharmony_ci                }
5001cb0ef41Sopenharmony_ci            }
5011cb0ef41Sopenharmony_ci        }
5021cb0ef41Sopenharmony_ci        start = end + 1;
5031cb0ef41Sopenharmony_ci    }
5041cb0ef41Sopenharmony_ci
5051cb0ef41Sopenharmony_ci    /* add Hangul LV syllables and LV+1 because of skippables */
5061cb0ef41Sopenharmony_ci    for(char16_t c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) {
5071cb0ef41Sopenharmony_ci        sa->add(sa->set, c);
5081cb0ef41Sopenharmony_ci        sa->add(sa->set, c+1);
5091cb0ef41Sopenharmony_ci    }
5101cb0ef41Sopenharmony_ci    sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */
5111cb0ef41Sopenharmony_ci}
5121cb0ef41Sopenharmony_ci
5131cb0ef41Sopenharmony_civoid
5141cb0ef41Sopenharmony_ciNormalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const {
5151cb0ef41Sopenharmony_ci    // Add the start code point of each same-value range of the canonical iterator data trie.
5161cb0ef41Sopenharmony_ci    if (!ensureCanonIterData(errorCode)) { return; }
5171cb0ef41Sopenharmony_ci    // Currently only used for the SEGMENT_STARTER property.
5181cb0ef41Sopenharmony_ci    UChar32 start = 0, end;
5191cb0ef41Sopenharmony_ci    uint32_t value;
5201cb0ef41Sopenharmony_ci    while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPMAP_RANGE_NORMAL, 0,
5211cb0ef41Sopenharmony_ci                                   segmentStarterMapper, nullptr, &value)) >= 0) {
5221cb0ef41Sopenharmony_ci        sa->add(sa->set, start);
5231cb0ef41Sopenharmony_ci        start = end + 1;
5241cb0ef41Sopenharmony_ci    }
5251cb0ef41Sopenharmony_ci}
5261cb0ef41Sopenharmony_ci
5271cb0ef41Sopenharmony_ciconst char16_t *
5281cb0ef41Sopenharmony_ciNormalizer2Impl::copyLowPrefixFromNulTerminated(const char16_t *src,
5291cb0ef41Sopenharmony_ci                                                UChar32 minNeedDataCP,
5301cb0ef41Sopenharmony_ci                                                ReorderingBuffer *buffer,
5311cb0ef41Sopenharmony_ci                                                UErrorCode &errorCode) const {
5321cb0ef41Sopenharmony_ci    // Make some effort to support NUL-terminated strings reasonably.
5331cb0ef41Sopenharmony_ci    // Take the part of the fast quick check loop that does not look up
5341cb0ef41Sopenharmony_ci    // data and check the first part of the string.
5351cb0ef41Sopenharmony_ci    // After this prefix, determine the string length to simplify the rest
5361cb0ef41Sopenharmony_ci    // of the code.
5371cb0ef41Sopenharmony_ci    const char16_t *prevSrc=src;
5381cb0ef41Sopenharmony_ci    char16_t c;
5391cb0ef41Sopenharmony_ci    while((c=*src++)<minNeedDataCP && c!=0) {}
5401cb0ef41Sopenharmony_ci    // Back out the last character for full processing.
5411cb0ef41Sopenharmony_ci    // Copy this prefix.
5421cb0ef41Sopenharmony_ci    if(--src!=prevSrc) {
5431cb0ef41Sopenharmony_ci        if(buffer!=nullptr) {
5441cb0ef41Sopenharmony_ci            buffer->appendZeroCC(prevSrc, src, errorCode);
5451cb0ef41Sopenharmony_ci        }
5461cb0ef41Sopenharmony_ci    }
5471cb0ef41Sopenharmony_ci    return src;
5481cb0ef41Sopenharmony_ci}
5491cb0ef41Sopenharmony_ci
5501cb0ef41Sopenharmony_ciUnicodeString &
5511cb0ef41Sopenharmony_ciNormalizer2Impl::decompose(const UnicodeString &src, UnicodeString &dest,
5521cb0ef41Sopenharmony_ci                           UErrorCode &errorCode) const {
5531cb0ef41Sopenharmony_ci    if(U_FAILURE(errorCode)) {
5541cb0ef41Sopenharmony_ci        dest.setToBogus();
5551cb0ef41Sopenharmony_ci        return dest;
5561cb0ef41Sopenharmony_ci    }
5571cb0ef41Sopenharmony_ci    const char16_t *sArray=src.getBuffer();
5581cb0ef41Sopenharmony_ci    if(&dest==&src || sArray==nullptr) {
5591cb0ef41Sopenharmony_ci        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
5601cb0ef41Sopenharmony_ci        dest.setToBogus();
5611cb0ef41Sopenharmony_ci        return dest;
5621cb0ef41Sopenharmony_ci    }
5631cb0ef41Sopenharmony_ci    decompose(sArray, sArray+src.length(), dest, src.length(), errorCode);
5641cb0ef41Sopenharmony_ci    return dest;
5651cb0ef41Sopenharmony_ci}
5661cb0ef41Sopenharmony_ci
5671cb0ef41Sopenharmony_civoid
5681cb0ef41Sopenharmony_ciNormalizer2Impl::decompose(const char16_t *src, const char16_t *limit,
5691cb0ef41Sopenharmony_ci                           UnicodeString &dest,
5701cb0ef41Sopenharmony_ci                           int32_t destLengthEstimate,
5711cb0ef41Sopenharmony_ci                           UErrorCode &errorCode) const {
5721cb0ef41Sopenharmony_ci    if(destLengthEstimate<0 && limit!=nullptr) {
5731cb0ef41Sopenharmony_ci        destLengthEstimate=(int32_t)(limit-src);
5741cb0ef41Sopenharmony_ci    }
5751cb0ef41Sopenharmony_ci    dest.remove();
5761cb0ef41Sopenharmony_ci    ReorderingBuffer buffer(*this, dest);
5771cb0ef41Sopenharmony_ci    if(buffer.init(destLengthEstimate, errorCode)) {
5781cb0ef41Sopenharmony_ci        decompose(src, limit, &buffer, errorCode);
5791cb0ef41Sopenharmony_ci    }
5801cb0ef41Sopenharmony_ci}
5811cb0ef41Sopenharmony_ci
5821cb0ef41Sopenharmony_ci// Dual functionality:
5831cb0ef41Sopenharmony_ci// buffer!=nullptr: normalize
5841cb0ef41Sopenharmony_ci// buffer==nullptr: isNormalized/spanQuickCheckYes
5851cb0ef41Sopenharmony_ciconst char16_t *
5861cb0ef41Sopenharmony_ciNormalizer2Impl::decompose(const char16_t *src, const char16_t *limit,
5871cb0ef41Sopenharmony_ci                           ReorderingBuffer *buffer,
5881cb0ef41Sopenharmony_ci                           UErrorCode &errorCode) const {
5891cb0ef41Sopenharmony_ci    UChar32 minNoCP=minDecompNoCP;
5901cb0ef41Sopenharmony_ci    if(limit==nullptr) {
5911cb0ef41Sopenharmony_ci        src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode);
5921cb0ef41Sopenharmony_ci        if(U_FAILURE(errorCode)) {
5931cb0ef41Sopenharmony_ci            return src;
5941cb0ef41Sopenharmony_ci        }
5951cb0ef41Sopenharmony_ci        limit=u_strchr(src, 0);
5961cb0ef41Sopenharmony_ci    }
5971cb0ef41Sopenharmony_ci
5981cb0ef41Sopenharmony_ci    const char16_t *prevSrc;
5991cb0ef41Sopenharmony_ci    UChar32 c=0;
6001cb0ef41Sopenharmony_ci    uint16_t norm16=0;
6011cb0ef41Sopenharmony_ci
6021cb0ef41Sopenharmony_ci    // only for quick check
6031cb0ef41Sopenharmony_ci    const char16_t *prevBoundary=src;
6041cb0ef41Sopenharmony_ci    uint8_t prevCC=0;
6051cb0ef41Sopenharmony_ci
6061cb0ef41Sopenharmony_ci    for(;;) {
6071cb0ef41Sopenharmony_ci        // count code units below the minimum or with irrelevant data for the quick check
6081cb0ef41Sopenharmony_ci        for(prevSrc=src; src!=limit;) {
6091cb0ef41Sopenharmony_ci            if( (c=*src)<minNoCP ||
6101cb0ef41Sopenharmony_ci                isMostDecompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
6111cb0ef41Sopenharmony_ci            ) {
6121cb0ef41Sopenharmony_ci                ++src;
6131cb0ef41Sopenharmony_ci            } else if(!U16_IS_LEAD(c)) {
6141cb0ef41Sopenharmony_ci                break;
6151cb0ef41Sopenharmony_ci            } else {
6161cb0ef41Sopenharmony_ci                char16_t c2;
6171cb0ef41Sopenharmony_ci                if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
6181cb0ef41Sopenharmony_ci                    c=U16_GET_SUPPLEMENTARY(c, c2);
6191cb0ef41Sopenharmony_ci                    norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
6201cb0ef41Sopenharmony_ci                    if(isMostDecompYesAndZeroCC(norm16)) {
6211cb0ef41Sopenharmony_ci                        src+=2;
6221cb0ef41Sopenharmony_ci                    } else {
6231cb0ef41Sopenharmony_ci                        break;
6241cb0ef41Sopenharmony_ci                    }
6251cb0ef41Sopenharmony_ci                } else {
6261cb0ef41Sopenharmony_ci                    ++src;  // unpaired lead surrogate: inert
6271cb0ef41Sopenharmony_ci                }
6281cb0ef41Sopenharmony_ci            }
6291cb0ef41Sopenharmony_ci        }
6301cb0ef41Sopenharmony_ci        // copy these code units all at once
6311cb0ef41Sopenharmony_ci        if(src!=prevSrc) {
6321cb0ef41Sopenharmony_ci            if(buffer!=nullptr) {
6331cb0ef41Sopenharmony_ci                if(!buffer->appendZeroCC(prevSrc, src, errorCode)) {
6341cb0ef41Sopenharmony_ci                    break;
6351cb0ef41Sopenharmony_ci                }
6361cb0ef41Sopenharmony_ci            } else {
6371cb0ef41Sopenharmony_ci                prevCC=0;
6381cb0ef41Sopenharmony_ci                prevBoundary=src;
6391cb0ef41Sopenharmony_ci            }
6401cb0ef41Sopenharmony_ci        }
6411cb0ef41Sopenharmony_ci        if(src==limit) {
6421cb0ef41Sopenharmony_ci            break;
6431cb0ef41Sopenharmony_ci        }
6441cb0ef41Sopenharmony_ci
6451cb0ef41Sopenharmony_ci        // Check one above-minimum, relevant code point.
6461cb0ef41Sopenharmony_ci        src+=U16_LENGTH(c);
6471cb0ef41Sopenharmony_ci        if(buffer!=nullptr) {
6481cb0ef41Sopenharmony_ci            if(!decompose(c, norm16, *buffer, errorCode)) {
6491cb0ef41Sopenharmony_ci                break;
6501cb0ef41Sopenharmony_ci            }
6511cb0ef41Sopenharmony_ci        } else {
6521cb0ef41Sopenharmony_ci            if(isDecompYes(norm16)) {
6531cb0ef41Sopenharmony_ci                uint8_t cc=getCCFromYesOrMaybe(norm16);
6541cb0ef41Sopenharmony_ci                if(prevCC<=cc || cc==0) {
6551cb0ef41Sopenharmony_ci                    prevCC=cc;
6561cb0ef41Sopenharmony_ci                    if(cc<=1) {
6571cb0ef41Sopenharmony_ci                        prevBoundary=src;
6581cb0ef41Sopenharmony_ci                    }
6591cb0ef41Sopenharmony_ci                    continue;
6601cb0ef41Sopenharmony_ci                }
6611cb0ef41Sopenharmony_ci            }
6621cb0ef41Sopenharmony_ci            return prevBoundary;  // "no" or cc out of order
6631cb0ef41Sopenharmony_ci        }
6641cb0ef41Sopenharmony_ci    }
6651cb0ef41Sopenharmony_ci    return src;
6661cb0ef41Sopenharmony_ci}
6671cb0ef41Sopenharmony_ci
6681cb0ef41Sopenharmony_ci// Decompose a short piece of text which is likely to contain characters that
6691cb0ef41Sopenharmony_ci// fail the quick check loop and/or where the quick check loop's overhead
6701cb0ef41Sopenharmony_ci// is unlikely to be amortized.
6711cb0ef41Sopenharmony_ci// Called by the compose() and makeFCD() implementations.
6721cb0ef41Sopenharmony_ciconst char16_t *
6731cb0ef41Sopenharmony_ciNormalizer2Impl::decomposeShort(const char16_t *src, const char16_t *limit,
6741cb0ef41Sopenharmony_ci                                UBool stopAtCompBoundary, UBool onlyContiguous,
6751cb0ef41Sopenharmony_ci                                ReorderingBuffer &buffer, UErrorCode &errorCode) const {
6761cb0ef41Sopenharmony_ci    if (U_FAILURE(errorCode)) {
6771cb0ef41Sopenharmony_ci        return nullptr;
6781cb0ef41Sopenharmony_ci    }
6791cb0ef41Sopenharmony_ci    while(src<limit) {
6801cb0ef41Sopenharmony_ci        if (stopAtCompBoundary && *src < minCompNoMaybeCP) {
6811cb0ef41Sopenharmony_ci            return src;
6821cb0ef41Sopenharmony_ci        }
6831cb0ef41Sopenharmony_ci        const char16_t *prevSrc = src;
6841cb0ef41Sopenharmony_ci        UChar32 c;
6851cb0ef41Sopenharmony_ci        uint16_t norm16;
6861cb0ef41Sopenharmony_ci        UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);
6871cb0ef41Sopenharmony_ci        if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) {
6881cb0ef41Sopenharmony_ci            return prevSrc;
6891cb0ef41Sopenharmony_ci        }
6901cb0ef41Sopenharmony_ci        if(!decompose(c, norm16, buffer, errorCode)) {
6911cb0ef41Sopenharmony_ci            return nullptr;
6921cb0ef41Sopenharmony_ci        }
6931cb0ef41Sopenharmony_ci        if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
6941cb0ef41Sopenharmony_ci            return src;
6951cb0ef41Sopenharmony_ci        }
6961cb0ef41Sopenharmony_ci    }
6971cb0ef41Sopenharmony_ci    return src;
6981cb0ef41Sopenharmony_ci}
6991cb0ef41Sopenharmony_ci
7001cb0ef41Sopenharmony_ciUBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
7011cb0ef41Sopenharmony_ci                                 ReorderingBuffer &buffer,
7021cb0ef41Sopenharmony_ci                                 UErrorCode &errorCode) const {
7031cb0ef41Sopenharmony_ci    // get the decomposition and the lead and trail cc's
7041cb0ef41Sopenharmony_ci    if (norm16 >= limitNoNo) {
7051cb0ef41Sopenharmony_ci        if (isMaybeOrNonZeroCC(norm16)) {
7061cb0ef41Sopenharmony_ci            return buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode);
7071cb0ef41Sopenharmony_ci        }
7081cb0ef41Sopenharmony_ci        // Maps to an isCompYesAndZeroCC.
7091cb0ef41Sopenharmony_ci        c=mapAlgorithmic(c, norm16);
7101cb0ef41Sopenharmony_ci        norm16=getRawNorm16(c);
7111cb0ef41Sopenharmony_ci    }
7121cb0ef41Sopenharmony_ci    if (norm16 < minYesNo) {
7131cb0ef41Sopenharmony_ci        // c does not decompose
7141cb0ef41Sopenharmony_ci        return buffer.append(c, 0, errorCode);
7151cb0ef41Sopenharmony_ci    } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
7161cb0ef41Sopenharmony_ci        // Hangul syllable: decompose algorithmically
7171cb0ef41Sopenharmony_ci        char16_t jamos[3];
7181cb0ef41Sopenharmony_ci        return buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode);
7191cb0ef41Sopenharmony_ci    }
7201cb0ef41Sopenharmony_ci    // c decomposes, get everything from the variable-length extra data
7211cb0ef41Sopenharmony_ci    const uint16_t *mapping=getMapping(norm16);
7221cb0ef41Sopenharmony_ci    uint16_t firstUnit=*mapping;
7231cb0ef41Sopenharmony_ci    int32_t length=firstUnit&MAPPING_LENGTH_MASK;
7241cb0ef41Sopenharmony_ci    uint8_t leadCC, trailCC;
7251cb0ef41Sopenharmony_ci    trailCC=(uint8_t)(firstUnit>>8);
7261cb0ef41Sopenharmony_ci    if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
7271cb0ef41Sopenharmony_ci        leadCC=(uint8_t)(*(mapping-1)>>8);
7281cb0ef41Sopenharmony_ci    } else {
7291cb0ef41Sopenharmony_ci        leadCC=0;
7301cb0ef41Sopenharmony_ci    }
7311cb0ef41Sopenharmony_ci    return buffer.append((const char16_t *)mapping+1, length, true, leadCC, trailCC, errorCode);
7321cb0ef41Sopenharmony_ci}
7331cb0ef41Sopenharmony_ci
7341cb0ef41Sopenharmony_ci// Dual functionality:
7351cb0ef41Sopenharmony_ci// sink != nullptr: normalize
7361cb0ef41Sopenharmony_ci// sink == nullptr: isNormalized/spanQuickCheckYes
7371cb0ef41Sopenharmony_ciconst uint8_t *
7381cb0ef41Sopenharmony_ciNormalizer2Impl::decomposeUTF8(uint32_t options,
7391cb0ef41Sopenharmony_ci                               const uint8_t *src, const uint8_t *limit,
7401cb0ef41Sopenharmony_ci                               ByteSink *sink, Edits *edits, UErrorCode &errorCode) const {
7411cb0ef41Sopenharmony_ci    U_ASSERT(limit != nullptr);
7421cb0ef41Sopenharmony_ci    UnicodeString s16;
7431cb0ef41Sopenharmony_ci    uint8_t minNoLead = leadByteForCP(minDecompNoCP);
7441cb0ef41Sopenharmony_ci
7451cb0ef41Sopenharmony_ci    const uint8_t *prevBoundary = src;
7461cb0ef41Sopenharmony_ci    // only for quick check
7471cb0ef41Sopenharmony_ci    uint8_t prevCC = 0;
7481cb0ef41Sopenharmony_ci
7491cb0ef41Sopenharmony_ci    for (;;) {
7501cb0ef41Sopenharmony_ci        // Fast path: Scan over a sequence of characters below the minimum "no" code point,
7511cb0ef41Sopenharmony_ci        // or with (decompYes && ccc==0) properties.
7521cb0ef41Sopenharmony_ci        const uint8_t *fastStart = src;
7531cb0ef41Sopenharmony_ci        const uint8_t *prevSrc;
7541cb0ef41Sopenharmony_ci        uint16_t norm16 = 0;
7551cb0ef41Sopenharmony_ci
7561cb0ef41Sopenharmony_ci        for (;;) {
7571cb0ef41Sopenharmony_ci            if (src == limit) {
7581cb0ef41Sopenharmony_ci                if (prevBoundary != limit && sink != nullptr) {
7591cb0ef41Sopenharmony_ci                    ByteSinkUtil::appendUnchanged(prevBoundary, limit,
7601cb0ef41Sopenharmony_ci                                                  *sink, options, edits, errorCode);
7611cb0ef41Sopenharmony_ci                }
7621cb0ef41Sopenharmony_ci                return src;
7631cb0ef41Sopenharmony_ci            }
7641cb0ef41Sopenharmony_ci            if (*src < minNoLead) {
7651cb0ef41Sopenharmony_ci                ++src;
7661cb0ef41Sopenharmony_ci            } else {
7671cb0ef41Sopenharmony_ci                prevSrc = src;
7681cb0ef41Sopenharmony_ci                UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
7691cb0ef41Sopenharmony_ci                if (!isMostDecompYesAndZeroCC(norm16)) {
7701cb0ef41Sopenharmony_ci                    break;
7711cb0ef41Sopenharmony_ci                }
7721cb0ef41Sopenharmony_ci            }
7731cb0ef41Sopenharmony_ci        }
7741cb0ef41Sopenharmony_ci        // isMostDecompYesAndZeroCC(norm16) is false, that is, norm16>=minYesNo,
7751cb0ef41Sopenharmony_ci        // and the current character at [prevSrc..src[ is not a common case with cc=0
7761cb0ef41Sopenharmony_ci        // (MIN_NORMAL_MAYBE_YES or JAMO_VT).
7771cb0ef41Sopenharmony_ci        // It could still be a maybeYes with cc=0.
7781cb0ef41Sopenharmony_ci        if (prevSrc != fastStart) {
7791cb0ef41Sopenharmony_ci            // The fast path looped over yes/0 characters before the current one.
7801cb0ef41Sopenharmony_ci            if (sink != nullptr &&
7811cb0ef41Sopenharmony_ci                    !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
7821cb0ef41Sopenharmony_ci                                                   *sink, options, edits, errorCode)) {
7831cb0ef41Sopenharmony_ci                break;
7841cb0ef41Sopenharmony_ci            }
7851cb0ef41Sopenharmony_ci            prevBoundary = prevSrc;
7861cb0ef41Sopenharmony_ci            prevCC = 0;
7871cb0ef41Sopenharmony_ci        }
7881cb0ef41Sopenharmony_ci
7891cb0ef41Sopenharmony_ci        // Medium-fast path: Quick check.
7901cb0ef41Sopenharmony_ci        if (isMaybeOrNonZeroCC(norm16)) {
7911cb0ef41Sopenharmony_ci            // Does not decompose.
7921cb0ef41Sopenharmony_ci            uint8_t cc = getCCFromYesOrMaybe(norm16);
7931cb0ef41Sopenharmony_ci            if (prevCC <= cc || cc == 0) {
7941cb0ef41Sopenharmony_ci                prevCC = cc;
7951cb0ef41Sopenharmony_ci                if (cc <= 1) {
7961cb0ef41Sopenharmony_ci                    if (sink != nullptr &&
7971cb0ef41Sopenharmony_ci                            !ByteSinkUtil::appendUnchanged(prevBoundary, src,
7981cb0ef41Sopenharmony_ci                                                           *sink, options, edits, errorCode)) {
7991cb0ef41Sopenharmony_ci                        break;
8001cb0ef41Sopenharmony_ci                    }
8011cb0ef41Sopenharmony_ci                    prevBoundary = src;
8021cb0ef41Sopenharmony_ci                }
8031cb0ef41Sopenharmony_ci                continue;
8041cb0ef41Sopenharmony_ci            }
8051cb0ef41Sopenharmony_ci        }
8061cb0ef41Sopenharmony_ci        if (sink == nullptr) {
8071cb0ef41Sopenharmony_ci            return prevBoundary;  // quick check: "no" or cc out of order
8081cb0ef41Sopenharmony_ci        }
8091cb0ef41Sopenharmony_ci
8101cb0ef41Sopenharmony_ci        // Slow path
8111cb0ef41Sopenharmony_ci        // Decompose up to and including the current character.
8121cb0ef41Sopenharmony_ci        if (prevBoundary != prevSrc && norm16HasDecompBoundaryBefore(norm16)) {
8131cb0ef41Sopenharmony_ci            if (!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
8141cb0ef41Sopenharmony_ci                                               *sink, options, edits, errorCode)) {
8151cb0ef41Sopenharmony_ci                break;
8161cb0ef41Sopenharmony_ci            }
8171cb0ef41Sopenharmony_ci            prevBoundary = prevSrc;
8181cb0ef41Sopenharmony_ci        }
8191cb0ef41Sopenharmony_ci        ReorderingBuffer buffer(*this, s16, errorCode);
8201cb0ef41Sopenharmony_ci        if (U_FAILURE(errorCode)) {
8211cb0ef41Sopenharmony_ci            break;
8221cb0ef41Sopenharmony_ci        }
8231cb0ef41Sopenharmony_ci        decomposeShort(prevBoundary, src, STOP_AT_LIMIT, false /* onlyContiguous */,
8241cb0ef41Sopenharmony_ci                       buffer, errorCode);
8251cb0ef41Sopenharmony_ci        // Decompose until the next boundary.
8261cb0ef41Sopenharmony_ci        if (buffer.getLastCC() > 1) {
8271cb0ef41Sopenharmony_ci            src = decomposeShort(src, limit, STOP_AT_DECOMP_BOUNDARY, false /* onlyContiguous */,
8281cb0ef41Sopenharmony_ci                                 buffer, errorCode);
8291cb0ef41Sopenharmony_ci        }
8301cb0ef41Sopenharmony_ci        if (U_FAILURE(errorCode)) {
8311cb0ef41Sopenharmony_ci            break;
8321cb0ef41Sopenharmony_ci        }
8331cb0ef41Sopenharmony_ci        if ((src - prevSrc) > INT32_MAX) {  // guard before buffer.equals()
8341cb0ef41Sopenharmony_ci            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
8351cb0ef41Sopenharmony_ci            break;
8361cb0ef41Sopenharmony_ci        }
8371cb0ef41Sopenharmony_ci        // We already know there was a change if the original character decomposed;
8381cb0ef41Sopenharmony_ci        // otherwise compare.
8391cb0ef41Sopenharmony_ci        if (isMaybeOrNonZeroCC(norm16) && buffer.equals(prevBoundary, src)) {
8401cb0ef41Sopenharmony_ci            if (!ByteSinkUtil::appendUnchanged(prevBoundary, src,
8411cb0ef41Sopenharmony_ci                                               *sink, options, edits, errorCode)) {
8421cb0ef41Sopenharmony_ci                break;
8431cb0ef41Sopenharmony_ci            }
8441cb0ef41Sopenharmony_ci        } else {
8451cb0ef41Sopenharmony_ci            if (!ByteSinkUtil::appendChange(prevBoundary, src, buffer.getStart(), buffer.length(),
8461cb0ef41Sopenharmony_ci                                            *sink, edits, errorCode)) {
8471cb0ef41Sopenharmony_ci                break;
8481cb0ef41Sopenharmony_ci            }
8491cb0ef41Sopenharmony_ci        }
8501cb0ef41Sopenharmony_ci        prevBoundary = src;
8511cb0ef41Sopenharmony_ci        prevCC = 0;
8521cb0ef41Sopenharmony_ci    }
8531cb0ef41Sopenharmony_ci    return src;
8541cb0ef41Sopenharmony_ci}
8551cb0ef41Sopenharmony_ci
8561cb0ef41Sopenharmony_ciconst uint8_t *
8571cb0ef41Sopenharmony_ciNormalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
8581cb0ef41Sopenharmony_ci                                StopAt stopAt, UBool onlyContiguous,
8591cb0ef41Sopenharmony_ci                                ReorderingBuffer &buffer, UErrorCode &errorCode) const {
8601cb0ef41Sopenharmony_ci    if (U_FAILURE(errorCode)) {
8611cb0ef41Sopenharmony_ci        return nullptr;
8621cb0ef41Sopenharmony_ci    }
8631cb0ef41Sopenharmony_ci    while (src < limit) {
8641cb0ef41Sopenharmony_ci        const uint8_t *prevSrc = src;
8651cb0ef41Sopenharmony_ci        uint16_t norm16;
8661cb0ef41Sopenharmony_ci        UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
8671cb0ef41Sopenharmony_ci        // Get the decomposition and the lead and trail cc's.
8681cb0ef41Sopenharmony_ci        UChar32 c = U_SENTINEL;
8691cb0ef41Sopenharmony_ci        if (norm16 >= limitNoNo) {
8701cb0ef41Sopenharmony_ci            if (isMaybeOrNonZeroCC(norm16)) {
8711cb0ef41Sopenharmony_ci                // No comp boundaries around this character.
8721cb0ef41Sopenharmony_ci                uint8_t cc = getCCFromYesOrMaybe(norm16);
8731cb0ef41Sopenharmony_ci                if (cc == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
8741cb0ef41Sopenharmony_ci                    return prevSrc;
8751cb0ef41Sopenharmony_ci                }
8761cb0ef41Sopenharmony_ci                c = codePointFromValidUTF8(prevSrc, src);
8771cb0ef41Sopenharmony_ci                if (!buffer.append(c, cc, errorCode)) {
8781cb0ef41Sopenharmony_ci                    return nullptr;
8791cb0ef41Sopenharmony_ci                }
8801cb0ef41Sopenharmony_ci                if (stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1) {
8811cb0ef41Sopenharmony_ci                    return src;
8821cb0ef41Sopenharmony_ci                }
8831cb0ef41Sopenharmony_ci                continue;
8841cb0ef41Sopenharmony_ci            }
8851cb0ef41Sopenharmony_ci            // Maps to an isCompYesAndZeroCC.
8861cb0ef41Sopenharmony_ci            if (stopAt != STOP_AT_LIMIT) {
8871cb0ef41Sopenharmony_ci                return prevSrc;
8881cb0ef41Sopenharmony_ci            }
8891cb0ef41Sopenharmony_ci            c = codePointFromValidUTF8(prevSrc, src);
8901cb0ef41Sopenharmony_ci            c = mapAlgorithmic(c, norm16);
8911cb0ef41Sopenharmony_ci            norm16 = getRawNorm16(c);
8921cb0ef41Sopenharmony_ci        } else if (stopAt != STOP_AT_LIMIT && norm16 < minNoNoCompNoMaybeCC) {
8931cb0ef41Sopenharmony_ci            return prevSrc;
8941cb0ef41Sopenharmony_ci        }
8951cb0ef41Sopenharmony_ci        // norm16!=INERT guarantees that [prevSrc, src[ is valid UTF-8.
8961cb0ef41Sopenharmony_ci        // We do not see invalid UTF-8 here because
8971cb0ef41Sopenharmony_ci        // its norm16==INERT is normalization-inert,
8981cb0ef41Sopenharmony_ci        // so it gets copied unchanged in the fast path,
8991cb0ef41Sopenharmony_ci        // and we stop the slow path where invalid UTF-8 begins.
9001cb0ef41Sopenharmony_ci        // c >= 0 is the result of an algorithmic mapping.
9011cb0ef41Sopenharmony_ci        U_ASSERT(c >= 0 || norm16 != INERT);
9021cb0ef41Sopenharmony_ci        if (norm16 < minYesNo) {
9031cb0ef41Sopenharmony_ci            if (c < 0) {
9041cb0ef41Sopenharmony_ci                c = codePointFromValidUTF8(prevSrc, src);
9051cb0ef41Sopenharmony_ci            }
9061cb0ef41Sopenharmony_ci            // does not decompose
9071cb0ef41Sopenharmony_ci            if (!buffer.append(c, 0, errorCode)) {
9081cb0ef41Sopenharmony_ci                return nullptr;
9091cb0ef41Sopenharmony_ci            }
9101cb0ef41Sopenharmony_ci        } else if (isHangulLV(norm16) || isHangulLVT(norm16)) {
9111cb0ef41Sopenharmony_ci            // Hangul syllable: decompose algorithmically
9121cb0ef41Sopenharmony_ci            if (c < 0) {
9131cb0ef41Sopenharmony_ci                c = codePointFromValidUTF8(prevSrc, src);
9141cb0ef41Sopenharmony_ci            }
9151cb0ef41Sopenharmony_ci            char16_t jamos[3];
9161cb0ef41Sopenharmony_ci            if (!buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode)) {
9171cb0ef41Sopenharmony_ci                return nullptr;
9181cb0ef41Sopenharmony_ci            }
9191cb0ef41Sopenharmony_ci        } else {
9201cb0ef41Sopenharmony_ci            // The character decomposes, get everything from the variable-length extra data.
9211cb0ef41Sopenharmony_ci            const uint16_t *mapping = getMapping(norm16);
9221cb0ef41Sopenharmony_ci            uint16_t firstUnit = *mapping;
9231cb0ef41Sopenharmony_ci            int32_t length = firstUnit & MAPPING_LENGTH_MASK;
9241cb0ef41Sopenharmony_ci            uint8_t trailCC = (uint8_t)(firstUnit >> 8);
9251cb0ef41Sopenharmony_ci            uint8_t leadCC;
9261cb0ef41Sopenharmony_ci            if (firstUnit & MAPPING_HAS_CCC_LCCC_WORD) {
9271cb0ef41Sopenharmony_ci                leadCC = (uint8_t)(*(mapping-1) >> 8);
9281cb0ef41Sopenharmony_ci            } else {
9291cb0ef41Sopenharmony_ci                leadCC = 0;
9301cb0ef41Sopenharmony_ci            }
9311cb0ef41Sopenharmony_ci            if (leadCC == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
9321cb0ef41Sopenharmony_ci                return prevSrc;
9331cb0ef41Sopenharmony_ci            }
9341cb0ef41Sopenharmony_ci            if (!buffer.append((const char16_t *)mapping+1, length, true, leadCC, trailCC, errorCode)) {
9351cb0ef41Sopenharmony_ci                return nullptr;
9361cb0ef41Sopenharmony_ci            }
9371cb0ef41Sopenharmony_ci        }
9381cb0ef41Sopenharmony_ci        if ((stopAt == STOP_AT_COMP_BOUNDARY && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) ||
9391cb0ef41Sopenharmony_ci                (stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1)) {
9401cb0ef41Sopenharmony_ci            return src;
9411cb0ef41Sopenharmony_ci        }
9421cb0ef41Sopenharmony_ci    }
9431cb0ef41Sopenharmony_ci    return src;
9441cb0ef41Sopenharmony_ci}
9451cb0ef41Sopenharmony_ci
9461cb0ef41Sopenharmony_ciconst char16_t *
9471cb0ef41Sopenharmony_ciNormalizer2Impl::getDecomposition(UChar32 c, char16_t buffer[4], int32_t &length) const {
9481cb0ef41Sopenharmony_ci    uint16_t norm16;
9491cb0ef41Sopenharmony_ci    if(c<minDecompNoCP || isMaybeOrNonZeroCC(norm16=getNorm16(c))) {
9501cb0ef41Sopenharmony_ci        // c does not decompose
9511cb0ef41Sopenharmony_ci        return nullptr;
9521cb0ef41Sopenharmony_ci    }
9531cb0ef41Sopenharmony_ci    const char16_t *decomp = nullptr;
9541cb0ef41Sopenharmony_ci    if(isDecompNoAlgorithmic(norm16)) {
9551cb0ef41Sopenharmony_ci        // Maps to an isCompYesAndZeroCC.
9561cb0ef41Sopenharmony_ci        c=mapAlgorithmic(c, norm16);
9571cb0ef41Sopenharmony_ci        decomp=buffer;
9581cb0ef41Sopenharmony_ci        length=0;
9591cb0ef41Sopenharmony_ci        U16_APPEND_UNSAFE(buffer, length, c);
9601cb0ef41Sopenharmony_ci        // The mapping might decompose further.
9611cb0ef41Sopenharmony_ci        norm16 = getRawNorm16(c);
9621cb0ef41Sopenharmony_ci    }
9631cb0ef41Sopenharmony_ci    if (norm16 < minYesNo) {
9641cb0ef41Sopenharmony_ci        return decomp;
9651cb0ef41Sopenharmony_ci    } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
9661cb0ef41Sopenharmony_ci        // Hangul syllable: decompose algorithmically
9671cb0ef41Sopenharmony_ci        length=Hangul::decompose(c, buffer);
9681cb0ef41Sopenharmony_ci        return buffer;
9691cb0ef41Sopenharmony_ci    }
9701cb0ef41Sopenharmony_ci    // c decomposes, get everything from the variable-length extra data
9711cb0ef41Sopenharmony_ci    const uint16_t *mapping=getMapping(norm16);
9721cb0ef41Sopenharmony_ci    length=*mapping&MAPPING_LENGTH_MASK;
9731cb0ef41Sopenharmony_ci    return (const char16_t *)mapping+1;
9741cb0ef41Sopenharmony_ci}
9751cb0ef41Sopenharmony_ci
9761cb0ef41Sopenharmony_ci// The capacity of the buffer must be 30=MAPPING_LENGTH_MASK-1
9771cb0ef41Sopenharmony_ci// so that a raw mapping fits that consists of one unit ("rm0")
9781cb0ef41Sopenharmony_ci// plus all but the first two code units of the normal mapping.
9791cb0ef41Sopenharmony_ci// The maximum length of a normal mapping is 31=MAPPING_LENGTH_MASK.
9801cb0ef41Sopenharmony_ciconst char16_t *
9811cb0ef41Sopenharmony_ciNormalizer2Impl::getRawDecomposition(UChar32 c, char16_t buffer[30], int32_t &length) const {
9821cb0ef41Sopenharmony_ci    uint16_t norm16;
9831cb0ef41Sopenharmony_ci    if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
9841cb0ef41Sopenharmony_ci        // c does not decompose
9851cb0ef41Sopenharmony_ci        return nullptr;
9861cb0ef41Sopenharmony_ci    } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
9871cb0ef41Sopenharmony_ci        // Hangul syllable: decompose algorithmically
9881cb0ef41Sopenharmony_ci        Hangul::getRawDecomposition(c, buffer);
9891cb0ef41Sopenharmony_ci        length=2;
9901cb0ef41Sopenharmony_ci        return buffer;
9911cb0ef41Sopenharmony_ci    } else if(isDecompNoAlgorithmic(norm16)) {
9921cb0ef41Sopenharmony_ci        c=mapAlgorithmic(c, norm16);
9931cb0ef41Sopenharmony_ci        length=0;
9941cb0ef41Sopenharmony_ci        U16_APPEND_UNSAFE(buffer, length, c);
9951cb0ef41Sopenharmony_ci        return buffer;
9961cb0ef41Sopenharmony_ci    }
9971cb0ef41Sopenharmony_ci    // c decomposes, get everything from the variable-length extra data
9981cb0ef41Sopenharmony_ci    const uint16_t *mapping=getMapping(norm16);
9991cb0ef41Sopenharmony_ci    uint16_t firstUnit=*mapping;
10001cb0ef41Sopenharmony_ci    int32_t mLength=firstUnit&MAPPING_LENGTH_MASK;  // length of normal mapping
10011cb0ef41Sopenharmony_ci    if(firstUnit&MAPPING_HAS_RAW_MAPPING) {
10021cb0ef41Sopenharmony_ci        // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word.
10031cb0ef41Sopenharmony_ci        // Bit 7=MAPPING_HAS_CCC_LCCC_WORD
10041cb0ef41Sopenharmony_ci        const uint16_t *rawMapping=mapping-((firstUnit>>7)&1)-1;
10051cb0ef41Sopenharmony_ci        uint16_t rm0=*rawMapping;
10061cb0ef41Sopenharmony_ci        if(rm0<=MAPPING_LENGTH_MASK) {
10071cb0ef41Sopenharmony_ci            length=rm0;
10081cb0ef41Sopenharmony_ci            return (const char16_t *)rawMapping-rm0;
10091cb0ef41Sopenharmony_ci        } else {
10101cb0ef41Sopenharmony_ci            // Copy the normal mapping and replace its first two code units with rm0.
10111cb0ef41Sopenharmony_ci            buffer[0]=(char16_t)rm0;
10121cb0ef41Sopenharmony_ci            u_memcpy(buffer+1, (const char16_t *)mapping+1+2, mLength-2);
10131cb0ef41Sopenharmony_ci            length=mLength-1;
10141cb0ef41Sopenharmony_ci            return buffer;
10151cb0ef41Sopenharmony_ci        }
10161cb0ef41Sopenharmony_ci    } else {
10171cb0ef41Sopenharmony_ci        length=mLength;
10181cb0ef41Sopenharmony_ci        return (const char16_t *)mapping+1;
10191cb0ef41Sopenharmony_ci    }
10201cb0ef41Sopenharmony_ci}
10211cb0ef41Sopenharmony_ci
10221cb0ef41Sopenharmony_civoid Normalizer2Impl::decomposeAndAppend(const char16_t *src, const char16_t *limit,
10231cb0ef41Sopenharmony_ci                                         UBool doDecompose,
10241cb0ef41Sopenharmony_ci                                         UnicodeString &safeMiddle,
10251cb0ef41Sopenharmony_ci                                         ReorderingBuffer &buffer,
10261cb0ef41Sopenharmony_ci                                         UErrorCode &errorCode) const {
10271cb0ef41Sopenharmony_ci    buffer.copyReorderableSuffixTo(safeMiddle);
10281cb0ef41Sopenharmony_ci    if(doDecompose) {
10291cb0ef41Sopenharmony_ci        decompose(src, limit, &buffer, errorCode);
10301cb0ef41Sopenharmony_ci        return;
10311cb0ef41Sopenharmony_ci    }
10321cb0ef41Sopenharmony_ci    // Just merge the strings at the boundary.
10331cb0ef41Sopenharmony_ci    bool isFirst = true;
10341cb0ef41Sopenharmony_ci    uint8_t firstCC = 0, prevCC = 0, cc;
10351cb0ef41Sopenharmony_ci    const char16_t *p = src;
10361cb0ef41Sopenharmony_ci    while (p != limit) {
10371cb0ef41Sopenharmony_ci        const char16_t *codePointStart = p;
10381cb0ef41Sopenharmony_ci        UChar32 c;
10391cb0ef41Sopenharmony_ci        uint16_t norm16;
10401cb0ef41Sopenharmony_ci        UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
10411cb0ef41Sopenharmony_ci        if ((cc = getCC(norm16)) == 0) {
10421cb0ef41Sopenharmony_ci            p = codePointStart;
10431cb0ef41Sopenharmony_ci            break;
10441cb0ef41Sopenharmony_ci        }
10451cb0ef41Sopenharmony_ci        if (isFirst) {
10461cb0ef41Sopenharmony_ci            firstCC = cc;
10471cb0ef41Sopenharmony_ci            isFirst = false;
10481cb0ef41Sopenharmony_ci        }
10491cb0ef41Sopenharmony_ci        prevCC = cc;
10501cb0ef41Sopenharmony_ci    }
10511cb0ef41Sopenharmony_ci    if(limit==nullptr) {  // appendZeroCC() needs limit!=nullptr
10521cb0ef41Sopenharmony_ci        limit=u_strchr(p, 0);
10531cb0ef41Sopenharmony_ci    }
10541cb0ef41Sopenharmony_ci
10551cb0ef41Sopenharmony_ci    if (buffer.append(src, (int32_t)(p - src), false, firstCC, prevCC, errorCode)) {
10561cb0ef41Sopenharmony_ci        buffer.appendZeroCC(p, limit, errorCode);
10571cb0ef41Sopenharmony_ci    }
10581cb0ef41Sopenharmony_ci}
10591cb0ef41Sopenharmony_ci
10601cb0ef41Sopenharmony_ciUBool Normalizer2Impl::hasDecompBoundaryBefore(UChar32 c) const {
10611cb0ef41Sopenharmony_ci    return c < minLcccCP || (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) ||
10621cb0ef41Sopenharmony_ci        norm16HasDecompBoundaryBefore(getNorm16(c));
10631cb0ef41Sopenharmony_ci}
10641cb0ef41Sopenharmony_ci
10651cb0ef41Sopenharmony_ciUBool Normalizer2Impl::norm16HasDecompBoundaryBefore(uint16_t norm16) const {
10661cb0ef41Sopenharmony_ci    if (norm16 < minNoNoCompNoMaybeCC) {
10671cb0ef41Sopenharmony_ci        return true;
10681cb0ef41Sopenharmony_ci    }
10691cb0ef41Sopenharmony_ci    if (norm16 >= limitNoNo) {
10701cb0ef41Sopenharmony_ci        return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
10711cb0ef41Sopenharmony_ci    }
10721cb0ef41Sopenharmony_ci    // c decomposes, get everything from the variable-length extra data
10731cb0ef41Sopenharmony_ci    const uint16_t *mapping=getMapping(norm16);
10741cb0ef41Sopenharmony_ci    uint16_t firstUnit=*mapping;
10751cb0ef41Sopenharmony_ci    // true if leadCC==0 (hasFCDBoundaryBefore())
10761cb0ef41Sopenharmony_ci    return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0;
10771cb0ef41Sopenharmony_ci}
10781cb0ef41Sopenharmony_ci
10791cb0ef41Sopenharmony_ciUBool Normalizer2Impl::hasDecompBoundaryAfter(UChar32 c) const {
10801cb0ef41Sopenharmony_ci    if (c < minDecompNoCP) {
10811cb0ef41Sopenharmony_ci        return true;
10821cb0ef41Sopenharmony_ci    }
10831cb0ef41Sopenharmony_ci    if (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) {
10841cb0ef41Sopenharmony_ci        return true;
10851cb0ef41Sopenharmony_ci    }
10861cb0ef41Sopenharmony_ci    return norm16HasDecompBoundaryAfter(getNorm16(c));
10871cb0ef41Sopenharmony_ci}
10881cb0ef41Sopenharmony_ci
10891cb0ef41Sopenharmony_ciUBool Normalizer2Impl::norm16HasDecompBoundaryAfter(uint16_t norm16) const {
10901cb0ef41Sopenharmony_ci    if(norm16 <= minYesNo || isHangulLVT(norm16)) {
10911cb0ef41Sopenharmony_ci        return true;
10921cb0ef41Sopenharmony_ci    }
10931cb0ef41Sopenharmony_ci    if (norm16 >= limitNoNo) {
10941cb0ef41Sopenharmony_ci        if (isMaybeOrNonZeroCC(norm16)) {
10951cb0ef41Sopenharmony_ci            return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
10961cb0ef41Sopenharmony_ci        }
10971cb0ef41Sopenharmony_ci        // Maps to an isCompYesAndZeroCC.
10981cb0ef41Sopenharmony_ci        return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1;
10991cb0ef41Sopenharmony_ci    }
11001cb0ef41Sopenharmony_ci    // c decomposes, get everything from the variable-length extra data
11011cb0ef41Sopenharmony_ci    const uint16_t *mapping=getMapping(norm16);
11021cb0ef41Sopenharmony_ci    uint16_t firstUnit=*mapping;
11031cb0ef41Sopenharmony_ci    // decomp after-boundary: same as hasFCDBoundaryAfter(),
11041cb0ef41Sopenharmony_ci    // fcd16<=1 || trailCC==0
11051cb0ef41Sopenharmony_ci    if(firstUnit>0x1ff) {
11061cb0ef41Sopenharmony_ci        return false;  // trailCC>1
11071cb0ef41Sopenharmony_ci    }
11081cb0ef41Sopenharmony_ci    if(firstUnit<=0xff) {
11091cb0ef41Sopenharmony_ci        return true;  // trailCC==0
11101cb0ef41Sopenharmony_ci    }
11111cb0ef41Sopenharmony_ci    // if(trailCC==1) test leadCC==0, same as checking for before-boundary
11121cb0ef41Sopenharmony_ci    // true if leadCC==0 (hasFCDBoundaryBefore())
11131cb0ef41Sopenharmony_ci    return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0;
11141cb0ef41Sopenharmony_ci}
11151cb0ef41Sopenharmony_ci
11161cb0ef41Sopenharmony_ci/*
11171cb0ef41Sopenharmony_ci * Finds the recomposition result for
11181cb0ef41Sopenharmony_ci * a forward-combining "lead" character,
11191cb0ef41Sopenharmony_ci * specified with a pointer to its compositions list,
11201cb0ef41Sopenharmony_ci * and a backward-combining "trail" character.
11211cb0ef41Sopenharmony_ci *
11221cb0ef41Sopenharmony_ci * If the lead and trail characters combine, then this function returns
11231cb0ef41Sopenharmony_ci * the following "compositeAndFwd" value:
11241cb0ef41Sopenharmony_ci * Bits 21..1  composite character
11251cb0ef41Sopenharmony_ci * Bit      0  set if the composite is a forward-combining starter
11261cb0ef41Sopenharmony_ci * otherwise it returns -1.
11271cb0ef41Sopenharmony_ci *
11281cb0ef41Sopenharmony_ci * The compositions list has (trail, compositeAndFwd) pair entries,
11291cb0ef41Sopenharmony_ci * encoded as either pairs or triples of 16-bit units.
11301cb0ef41Sopenharmony_ci * The last entry has the high bit of its first unit set.
11311cb0ef41Sopenharmony_ci *
11321cb0ef41Sopenharmony_ci * The list is sorted by ascending trail characters (there are no duplicates).
11331cb0ef41Sopenharmony_ci * A linear search is used.
11341cb0ef41Sopenharmony_ci *
11351cb0ef41Sopenharmony_ci * See normalizer2impl.h for a more detailed description
11361cb0ef41Sopenharmony_ci * of the compositions list format.
11371cb0ef41Sopenharmony_ci */
11381cb0ef41Sopenharmony_ciint32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) {
11391cb0ef41Sopenharmony_ci    uint16_t key1, firstUnit;
11401cb0ef41Sopenharmony_ci    if(trail<COMP_1_TRAIL_LIMIT) {
11411cb0ef41Sopenharmony_ci        // trail character is 0..33FF
11421cb0ef41Sopenharmony_ci        // result entry may have 2 or 3 units
11431cb0ef41Sopenharmony_ci        key1=(uint16_t)(trail<<1);
11441cb0ef41Sopenharmony_ci        while(key1>(firstUnit=*list)) {
11451cb0ef41Sopenharmony_ci            list+=2+(firstUnit&COMP_1_TRIPLE);
11461cb0ef41Sopenharmony_ci        }
11471cb0ef41Sopenharmony_ci        if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
11481cb0ef41Sopenharmony_ci            if(firstUnit&COMP_1_TRIPLE) {
11491cb0ef41Sopenharmony_ci                return ((int32_t)list[1]<<16)|list[2];
11501cb0ef41Sopenharmony_ci            } else {
11511cb0ef41Sopenharmony_ci                return list[1];
11521cb0ef41Sopenharmony_ci            }
11531cb0ef41Sopenharmony_ci        }
11541cb0ef41Sopenharmony_ci    } else {
11551cb0ef41Sopenharmony_ci        // trail character is 3400..10FFFF
11561cb0ef41Sopenharmony_ci        // result entry has 3 units
11571cb0ef41Sopenharmony_ci        key1=(uint16_t)(COMP_1_TRAIL_LIMIT+
11581cb0ef41Sopenharmony_ci                        (((trail>>COMP_1_TRAIL_SHIFT))&
11591cb0ef41Sopenharmony_ci                          ~COMP_1_TRIPLE));
11601cb0ef41Sopenharmony_ci        uint16_t key2=(uint16_t)(trail<<COMP_2_TRAIL_SHIFT);
11611cb0ef41Sopenharmony_ci        uint16_t secondUnit;
11621cb0ef41Sopenharmony_ci        for(;;) {
11631cb0ef41Sopenharmony_ci            if(key1>(firstUnit=*list)) {
11641cb0ef41Sopenharmony_ci                list+=2+(firstUnit&COMP_1_TRIPLE);
11651cb0ef41Sopenharmony_ci            } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
11661cb0ef41Sopenharmony_ci                if(key2>(secondUnit=list[1])) {
11671cb0ef41Sopenharmony_ci                    if(firstUnit&COMP_1_LAST_TUPLE) {
11681cb0ef41Sopenharmony_ci                        break;
11691cb0ef41Sopenharmony_ci                    } else {
11701cb0ef41Sopenharmony_ci                        list+=3;
11711cb0ef41Sopenharmony_ci                    }
11721cb0ef41Sopenharmony_ci                } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
11731cb0ef41Sopenharmony_ci                    return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)|list[2];
11741cb0ef41Sopenharmony_ci                } else {
11751cb0ef41Sopenharmony_ci                    break;
11761cb0ef41Sopenharmony_ci                }
11771cb0ef41Sopenharmony_ci            } else {
11781cb0ef41Sopenharmony_ci                break;
11791cb0ef41Sopenharmony_ci            }
11801cb0ef41Sopenharmony_ci        }
11811cb0ef41Sopenharmony_ci    }
11821cb0ef41Sopenharmony_ci    return -1;
11831cb0ef41Sopenharmony_ci}
11841cb0ef41Sopenharmony_ci
11851cb0ef41Sopenharmony_ci/**
11861cb0ef41Sopenharmony_ci  * @param list some character's compositions list
11871cb0ef41Sopenharmony_ci  * @param set recursively receives the composites from these compositions
11881cb0ef41Sopenharmony_ci  */
11891cb0ef41Sopenharmony_civoid Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const {
11901cb0ef41Sopenharmony_ci    uint16_t firstUnit;
11911cb0ef41Sopenharmony_ci    int32_t compositeAndFwd;
11921cb0ef41Sopenharmony_ci    do {
11931cb0ef41Sopenharmony_ci        firstUnit=*list;
11941cb0ef41Sopenharmony_ci        if((firstUnit&COMP_1_TRIPLE)==0) {
11951cb0ef41Sopenharmony_ci            compositeAndFwd=list[1];
11961cb0ef41Sopenharmony_ci            list+=2;
11971cb0ef41Sopenharmony_ci        } else {
11981cb0ef41Sopenharmony_ci            compositeAndFwd=(((int32_t)list[1]&~COMP_2_TRAIL_MASK)<<16)|list[2];
11991cb0ef41Sopenharmony_ci            list+=3;
12001cb0ef41Sopenharmony_ci        }
12011cb0ef41Sopenharmony_ci        UChar32 composite=compositeAndFwd>>1;
12021cb0ef41Sopenharmony_ci        if((compositeAndFwd&1)!=0) {
12031cb0ef41Sopenharmony_ci            addComposites(getCompositionsListForComposite(getRawNorm16(composite)), set);
12041cb0ef41Sopenharmony_ci        }
12051cb0ef41Sopenharmony_ci        set.add(composite);
12061cb0ef41Sopenharmony_ci    } while((firstUnit&COMP_1_LAST_TUPLE)==0);
12071cb0ef41Sopenharmony_ci}
12081cb0ef41Sopenharmony_ci
12091cb0ef41Sopenharmony_ci/*
12101cb0ef41Sopenharmony_ci * Recomposes the buffer text starting at recomposeStartIndex
12111cb0ef41Sopenharmony_ci * (which is in NFD - decomposed and canonically ordered),
12121cb0ef41Sopenharmony_ci * and truncates the buffer contents.
12131cb0ef41Sopenharmony_ci *
12141cb0ef41Sopenharmony_ci * Note that recomposition never lengthens the text:
12151cb0ef41Sopenharmony_ci * Any character consists of either one or two code units;
12161cb0ef41Sopenharmony_ci * a composition may contain at most one more code unit than the original starter,
12171cb0ef41Sopenharmony_ci * while the combining mark that is removed has at least one code unit.
12181cb0ef41Sopenharmony_ci */
12191cb0ef41Sopenharmony_civoid Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
12201cb0ef41Sopenharmony_ci                                UBool onlyContiguous) const {
12211cb0ef41Sopenharmony_ci    char16_t *p=buffer.getStart()+recomposeStartIndex;
12221cb0ef41Sopenharmony_ci    char16_t *limit=buffer.getLimit();
12231cb0ef41Sopenharmony_ci    if(p==limit) {
12241cb0ef41Sopenharmony_ci        return;
12251cb0ef41Sopenharmony_ci    }
12261cb0ef41Sopenharmony_ci
12271cb0ef41Sopenharmony_ci    char16_t *starter, *pRemove, *q, *r;
12281cb0ef41Sopenharmony_ci    const uint16_t *compositionsList;
12291cb0ef41Sopenharmony_ci    UChar32 c, compositeAndFwd;
12301cb0ef41Sopenharmony_ci    uint16_t norm16;
12311cb0ef41Sopenharmony_ci    uint8_t cc, prevCC;
12321cb0ef41Sopenharmony_ci    UBool starterIsSupplementary;
12331cb0ef41Sopenharmony_ci
12341cb0ef41Sopenharmony_ci    // Some of the following variables are not used until we have a forward-combining starter
12351cb0ef41Sopenharmony_ci    // and are only initialized now to avoid compiler warnings.
12361cb0ef41Sopenharmony_ci    compositionsList=nullptr;  // used as indicator for whether we have a forward-combining starter
12371cb0ef41Sopenharmony_ci    starter=nullptr;
12381cb0ef41Sopenharmony_ci    starterIsSupplementary=false;
12391cb0ef41Sopenharmony_ci    prevCC=0;
12401cb0ef41Sopenharmony_ci
12411cb0ef41Sopenharmony_ci    for(;;) {
12421cb0ef41Sopenharmony_ci        UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
12431cb0ef41Sopenharmony_ci        cc=getCCFromYesOrMaybe(norm16);
12441cb0ef41Sopenharmony_ci        if( // this character combines backward and
12451cb0ef41Sopenharmony_ci            isMaybe(norm16) &&
12461cb0ef41Sopenharmony_ci            // we have seen a starter that combines forward and
12471cb0ef41Sopenharmony_ci            compositionsList!=nullptr &&
12481cb0ef41Sopenharmony_ci            // the backward-combining character is not blocked
12491cb0ef41Sopenharmony_ci            (prevCC<cc || prevCC==0)
12501cb0ef41Sopenharmony_ci        ) {
12511cb0ef41Sopenharmony_ci            if(isJamoVT(norm16)) {
12521cb0ef41Sopenharmony_ci                // c is a Jamo V/T, see if we can compose it with the previous character.
12531cb0ef41Sopenharmony_ci                if(c<Hangul::JAMO_T_BASE) {
12541cb0ef41Sopenharmony_ci                    // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
12551cb0ef41Sopenharmony_ci                    char16_t prev=(char16_t)(*starter-Hangul::JAMO_L_BASE);
12561cb0ef41Sopenharmony_ci                    if(prev<Hangul::JAMO_L_COUNT) {
12571cb0ef41Sopenharmony_ci                        pRemove=p-1;
12581cb0ef41Sopenharmony_ci                        char16_t syllable=(char16_t)
12591cb0ef41Sopenharmony_ci                            (Hangul::HANGUL_BASE+
12601cb0ef41Sopenharmony_ci                             (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))*
12611cb0ef41Sopenharmony_ci                             Hangul::JAMO_T_COUNT);
12621cb0ef41Sopenharmony_ci                        char16_t t;
12631cb0ef41Sopenharmony_ci                        if(p!=limit && (t=(char16_t)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) {
12641cb0ef41Sopenharmony_ci                            ++p;
12651cb0ef41Sopenharmony_ci                            syllable+=t;  // The next character was a Jamo T.
12661cb0ef41Sopenharmony_ci                        }
12671cb0ef41Sopenharmony_ci                        *starter=syllable;
12681cb0ef41Sopenharmony_ci                        // remove the Jamo V/T
12691cb0ef41Sopenharmony_ci                        q=pRemove;
12701cb0ef41Sopenharmony_ci                        r=p;
12711cb0ef41Sopenharmony_ci                        while(r<limit) {
12721cb0ef41Sopenharmony_ci                            *q++=*r++;
12731cb0ef41Sopenharmony_ci                        }
12741cb0ef41Sopenharmony_ci                        limit=q;
12751cb0ef41Sopenharmony_ci                        p=pRemove;
12761cb0ef41Sopenharmony_ci                    }
12771cb0ef41Sopenharmony_ci                }
12781cb0ef41Sopenharmony_ci                /*
12791cb0ef41Sopenharmony_ci                 * No "else" for Jamo T:
12801cb0ef41Sopenharmony_ci                 * Since the input is in NFD, there are no Hangul LV syllables that
12811cb0ef41Sopenharmony_ci                 * a Jamo T could combine with.
12821cb0ef41Sopenharmony_ci                 * All Jamo Ts are combined above when handling Jamo Vs.
12831cb0ef41Sopenharmony_ci                 */
12841cb0ef41Sopenharmony_ci                if(p==limit) {
12851cb0ef41Sopenharmony_ci                    break;
12861cb0ef41Sopenharmony_ci                }
12871cb0ef41Sopenharmony_ci                compositionsList=nullptr;
12881cb0ef41Sopenharmony_ci                continue;
12891cb0ef41Sopenharmony_ci            } else if((compositeAndFwd=combine(compositionsList, c))>=0) {
12901cb0ef41Sopenharmony_ci                // The starter and the combining mark (c) do combine.
12911cb0ef41Sopenharmony_ci                UChar32 composite=compositeAndFwd>>1;
12921cb0ef41Sopenharmony_ci
12931cb0ef41Sopenharmony_ci                // Replace the starter with the composite, remove the combining mark.
12941cb0ef41Sopenharmony_ci                pRemove=p-U16_LENGTH(c);  // pRemove & p: start & limit of the combining mark
12951cb0ef41Sopenharmony_ci                if(starterIsSupplementary) {
12961cb0ef41Sopenharmony_ci                    if(U_IS_SUPPLEMENTARY(composite)) {
12971cb0ef41Sopenharmony_ci                        // both are supplementary
12981cb0ef41Sopenharmony_ci                        starter[0]=U16_LEAD(composite);
12991cb0ef41Sopenharmony_ci                        starter[1]=U16_TRAIL(composite);
13001cb0ef41Sopenharmony_ci                    } else {
13011cb0ef41Sopenharmony_ci                        *starter=(char16_t)composite;
13021cb0ef41Sopenharmony_ci                        // The composite is shorter than the starter,
13031cb0ef41Sopenharmony_ci                        // move the intermediate characters forward one.
13041cb0ef41Sopenharmony_ci                        starterIsSupplementary=false;
13051cb0ef41Sopenharmony_ci                        q=starter+1;
13061cb0ef41Sopenharmony_ci                        r=q+1;
13071cb0ef41Sopenharmony_ci                        while(r<pRemove) {
13081cb0ef41Sopenharmony_ci                            *q++=*r++;
13091cb0ef41Sopenharmony_ci                        }
13101cb0ef41Sopenharmony_ci                        --pRemove;
13111cb0ef41Sopenharmony_ci                    }
13121cb0ef41Sopenharmony_ci                } else if(U_IS_SUPPLEMENTARY(composite)) {
13131cb0ef41Sopenharmony_ci                    // The composite is longer than the starter,
13141cb0ef41Sopenharmony_ci                    // move the intermediate characters back one.
13151cb0ef41Sopenharmony_ci                    starterIsSupplementary=true;
13161cb0ef41Sopenharmony_ci                    ++starter;  // temporarily increment for the loop boundary
13171cb0ef41Sopenharmony_ci                    q=pRemove;
13181cb0ef41Sopenharmony_ci                    r=++pRemove;
13191cb0ef41Sopenharmony_ci                    while(starter<q) {
13201cb0ef41Sopenharmony_ci                        *--r=*--q;
13211cb0ef41Sopenharmony_ci                    }
13221cb0ef41Sopenharmony_ci                    *starter=U16_TRAIL(composite);
13231cb0ef41Sopenharmony_ci                    *--starter=U16_LEAD(composite);  // undo the temporary increment
13241cb0ef41Sopenharmony_ci                } else {
13251cb0ef41Sopenharmony_ci                    // both are on the BMP
13261cb0ef41Sopenharmony_ci                    *starter=(char16_t)composite;
13271cb0ef41Sopenharmony_ci                }
13281cb0ef41Sopenharmony_ci
13291cb0ef41Sopenharmony_ci                /* remove the combining mark by moving the following text over it */
13301cb0ef41Sopenharmony_ci                if(pRemove<p) {
13311cb0ef41Sopenharmony_ci                    q=pRemove;
13321cb0ef41Sopenharmony_ci                    r=p;
13331cb0ef41Sopenharmony_ci                    while(r<limit) {
13341cb0ef41Sopenharmony_ci                        *q++=*r++;
13351cb0ef41Sopenharmony_ci                    }
13361cb0ef41Sopenharmony_ci                    limit=q;
13371cb0ef41Sopenharmony_ci                    p=pRemove;
13381cb0ef41Sopenharmony_ci                }
13391cb0ef41Sopenharmony_ci                // Keep prevCC because we removed the combining mark.
13401cb0ef41Sopenharmony_ci
13411cb0ef41Sopenharmony_ci                if(p==limit) {
13421cb0ef41Sopenharmony_ci                    break;
13431cb0ef41Sopenharmony_ci                }
13441cb0ef41Sopenharmony_ci                // Is the composite a starter that combines forward?
13451cb0ef41Sopenharmony_ci                if(compositeAndFwd&1) {
13461cb0ef41Sopenharmony_ci                    compositionsList=
13471cb0ef41Sopenharmony_ci                        getCompositionsListForComposite(getRawNorm16(composite));
13481cb0ef41Sopenharmony_ci                } else {
13491cb0ef41Sopenharmony_ci                    compositionsList=nullptr;
13501cb0ef41Sopenharmony_ci                }
13511cb0ef41Sopenharmony_ci
13521cb0ef41Sopenharmony_ci                // We combined; continue with looking for compositions.
13531cb0ef41Sopenharmony_ci                continue;
13541cb0ef41Sopenharmony_ci            }
13551cb0ef41Sopenharmony_ci        }
13561cb0ef41Sopenharmony_ci
13571cb0ef41Sopenharmony_ci        // no combination this time
13581cb0ef41Sopenharmony_ci        prevCC=cc;
13591cb0ef41Sopenharmony_ci        if(p==limit) {
13601cb0ef41Sopenharmony_ci            break;
13611cb0ef41Sopenharmony_ci        }
13621cb0ef41Sopenharmony_ci
13631cb0ef41Sopenharmony_ci        // If c did not combine, then check if it is a starter.
13641cb0ef41Sopenharmony_ci        if(cc==0) {
13651cb0ef41Sopenharmony_ci            // Found a new starter.
13661cb0ef41Sopenharmony_ci            if((compositionsList=getCompositionsListForDecompYes(norm16))!=nullptr) {
13671cb0ef41Sopenharmony_ci                // It may combine with something, prepare for it.
13681cb0ef41Sopenharmony_ci                if(U_IS_BMP(c)) {
13691cb0ef41Sopenharmony_ci                    starterIsSupplementary=false;
13701cb0ef41Sopenharmony_ci                    starter=p-1;
13711cb0ef41Sopenharmony_ci                } else {
13721cb0ef41Sopenharmony_ci                    starterIsSupplementary=true;
13731cb0ef41Sopenharmony_ci                    starter=p-2;
13741cb0ef41Sopenharmony_ci                }
13751cb0ef41Sopenharmony_ci            }
13761cb0ef41Sopenharmony_ci        } else if(onlyContiguous) {
13771cb0ef41Sopenharmony_ci            // FCC: no discontiguous compositions; any intervening character blocks.
13781cb0ef41Sopenharmony_ci            compositionsList=nullptr;
13791cb0ef41Sopenharmony_ci        }
13801cb0ef41Sopenharmony_ci    }
13811cb0ef41Sopenharmony_ci    buffer.setReorderingLimit(limit);
13821cb0ef41Sopenharmony_ci}
13831cb0ef41Sopenharmony_ci
13841cb0ef41Sopenharmony_ciUChar32
13851cb0ef41Sopenharmony_ciNormalizer2Impl::composePair(UChar32 a, UChar32 b) const {
13861cb0ef41Sopenharmony_ci    uint16_t norm16=getNorm16(a);  // maps an out-of-range 'a' to inert norm16
13871cb0ef41Sopenharmony_ci    const uint16_t *list;
13881cb0ef41Sopenharmony_ci    if(isInert(norm16)) {
13891cb0ef41Sopenharmony_ci        return U_SENTINEL;
13901cb0ef41Sopenharmony_ci    } else if(norm16<minYesNoMappingsOnly) {
13911cb0ef41Sopenharmony_ci        // a combines forward.
13921cb0ef41Sopenharmony_ci        if(isJamoL(norm16)) {
13931cb0ef41Sopenharmony_ci            b-=Hangul::JAMO_V_BASE;
13941cb0ef41Sopenharmony_ci            if(0<=b && b<Hangul::JAMO_V_COUNT) {
13951cb0ef41Sopenharmony_ci                return
13961cb0ef41Sopenharmony_ci                    (Hangul::HANGUL_BASE+
13971cb0ef41Sopenharmony_ci                     ((a-Hangul::JAMO_L_BASE)*Hangul::JAMO_V_COUNT+b)*
13981cb0ef41Sopenharmony_ci                     Hangul::JAMO_T_COUNT);
13991cb0ef41Sopenharmony_ci            } else {
14001cb0ef41Sopenharmony_ci                return U_SENTINEL;
14011cb0ef41Sopenharmony_ci            }
14021cb0ef41Sopenharmony_ci        } else if(isHangulLV(norm16)) {
14031cb0ef41Sopenharmony_ci            b-=Hangul::JAMO_T_BASE;
14041cb0ef41Sopenharmony_ci            if(0<b && b<Hangul::JAMO_T_COUNT) {  // not b==0!
14051cb0ef41Sopenharmony_ci                return a+b;
14061cb0ef41Sopenharmony_ci            } else {
14071cb0ef41Sopenharmony_ci                return U_SENTINEL;
14081cb0ef41Sopenharmony_ci            }
14091cb0ef41Sopenharmony_ci        } else {
14101cb0ef41Sopenharmony_ci            // 'a' has a compositions list in extraData
14111cb0ef41Sopenharmony_ci            list=getMapping(norm16);
14121cb0ef41Sopenharmony_ci            if(norm16>minYesNo) {  // composite 'a' has both mapping & compositions list
14131cb0ef41Sopenharmony_ci                list+=  // mapping pointer
14141cb0ef41Sopenharmony_ci                    1+  // +1 to skip the first unit with the mapping length
14151cb0ef41Sopenharmony_ci                    (*list&MAPPING_LENGTH_MASK);  // + mapping length
14161cb0ef41Sopenharmony_ci            }
14171cb0ef41Sopenharmony_ci        }
14181cb0ef41Sopenharmony_ci    } else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) {
14191cb0ef41Sopenharmony_ci        return U_SENTINEL;
14201cb0ef41Sopenharmony_ci    } else {
14211cb0ef41Sopenharmony_ci        list=getCompositionsListForMaybe(norm16);
14221cb0ef41Sopenharmony_ci    }
14231cb0ef41Sopenharmony_ci    if(b<0 || 0x10ffff<b) {  // combine(list, b) requires a valid code point b
14241cb0ef41Sopenharmony_ci        return U_SENTINEL;
14251cb0ef41Sopenharmony_ci    }
14261cb0ef41Sopenharmony_ci#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
14271cb0ef41Sopenharmony_ci    return combine(list, b)>>1;
14281cb0ef41Sopenharmony_ci#else
14291cb0ef41Sopenharmony_ci    int32_t compositeAndFwd=combine(list, b);
14301cb0ef41Sopenharmony_ci    return compositeAndFwd>=0 ? compositeAndFwd>>1 : U_SENTINEL;
14311cb0ef41Sopenharmony_ci#endif
14321cb0ef41Sopenharmony_ci}
14331cb0ef41Sopenharmony_ci
14341cb0ef41Sopenharmony_ci// Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
14351cb0ef41Sopenharmony_ci// doCompose: normalize
14361cb0ef41Sopenharmony_ci// !doCompose: isNormalized (buffer must be empty and initialized)
14371cb0ef41Sopenharmony_ciUBool
14381cb0ef41Sopenharmony_ciNormalizer2Impl::compose(const char16_t *src, const char16_t *limit,
14391cb0ef41Sopenharmony_ci                         UBool onlyContiguous,
14401cb0ef41Sopenharmony_ci                         UBool doCompose,
14411cb0ef41Sopenharmony_ci                         ReorderingBuffer &buffer,
14421cb0ef41Sopenharmony_ci                         UErrorCode &errorCode) const {
14431cb0ef41Sopenharmony_ci    const char16_t *prevBoundary=src;
14441cb0ef41Sopenharmony_ci    UChar32 minNoMaybeCP=minCompNoMaybeCP;
14451cb0ef41Sopenharmony_ci    if(limit==nullptr) {
14461cb0ef41Sopenharmony_ci        src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP,
14471cb0ef41Sopenharmony_ci                                           doCompose ? &buffer : nullptr,
14481cb0ef41Sopenharmony_ci                                           errorCode);
14491cb0ef41Sopenharmony_ci        if(U_FAILURE(errorCode)) {
14501cb0ef41Sopenharmony_ci            return false;
14511cb0ef41Sopenharmony_ci        }
14521cb0ef41Sopenharmony_ci        limit=u_strchr(src, 0);
14531cb0ef41Sopenharmony_ci        if (prevBoundary != src) {
14541cb0ef41Sopenharmony_ci            if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) {
14551cb0ef41Sopenharmony_ci                prevBoundary = src;
14561cb0ef41Sopenharmony_ci            } else {
14571cb0ef41Sopenharmony_ci                buffer.removeSuffix(1);
14581cb0ef41Sopenharmony_ci                prevBoundary = --src;
14591cb0ef41Sopenharmony_ci            }
14601cb0ef41Sopenharmony_ci        }
14611cb0ef41Sopenharmony_ci    }
14621cb0ef41Sopenharmony_ci
14631cb0ef41Sopenharmony_ci    for (;;) {
14641cb0ef41Sopenharmony_ci        // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
14651cb0ef41Sopenharmony_ci        // or with (compYes && ccc==0) properties.
14661cb0ef41Sopenharmony_ci        const char16_t *prevSrc;
14671cb0ef41Sopenharmony_ci        UChar32 c = 0;
14681cb0ef41Sopenharmony_ci        uint16_t norm16 = 0;
14691cb0ef41Sopenharmony_ci        for (;;) {
14701cb0ef41Sopenharmony_ci            if (src == limit) {
14711cb0ef41Sopenharmony_ci                if (prevBoundary != limit && doCompose) {
14721cb0ef41Sopenharmony_ci                    buffer.appendZeroCC(prevBoundary, limit, errorCode);
14731cb0ef41Sopenharmony_ci                }
14741cb0ef41Sopenharmony_ci                return true;
14751cb0ef41Sopenharmony_ci            }
14761cb0ef41Sopenharmony_ci            if( (c=*src)<minNoMaybeCP ||
14771cb0ef41Sopenharmony_ci                isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
14781cb0ef41Sopenharmony_ci            ) {
14791cb0ef41Sopenharmony_ci                ++src;
14801cb0ef41Sopenharmony_ci            } else {
14811cb0ef41Sopenharmony_ci                prevSrc = src++;
14821cb0ef41Sopenharmony_ci                if(!U16_IS_LEAD(c)) {
14831cb0ef41Sopenharmony_ci                    break;
14841cb0ef41Sopenharmony_ci                } else {
14851cb0ef41Sopenharmony_ci                    char16_t c2;
14861cb0ef41Sopenharmony_ci                    if(src!=limit && U16_IS_TRAIL(c2=*src)) {
14871cb0ef41Sopenharmony_ci                        ++src;
14881cb0ef41Sopenharmony_ci                        c=U16_GET_SUPPLEMENTARY(c, c2);
14891cb0ef41Sopenharmony_ci                        norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
14901cb0ef41Sopenharmony_ci                        if(!isCompYesAndZeroCC(norm16)) {
14911cb0ef41Sopenharmony_ci                            break;
14921cb0ef41Sopenharmony_ci                        }
14931cb0ef41Sopenharmony_ci                    }
14941cb0ef41Sopenharmony_ci                }
14951cb0ef41Sopenharmony_ci            }
14961cb0ef41Sopenharmony_ci        }
14971cb0ef41Sopenharmony_ci        // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
14981cb0ef41Sopenharmony_ci        // The current character is either a "noNo" (has a mapping)
14991cb0ef41Sopenharmony_ci        // or a "maybeYes" (combines backward)
15001cb0ef41Sopenharmony_ci        // or a "yesYes" with ccc!=0.
15011cb0ef41Sopenharmony_ci        // It is not a Hangul syllable or Jamo L because those have "yes" properties.
15021cb0ef41Sopenharmony_ci
15031cb0ef41Sopenharmony_ci        // Medium-fast path: Handle cases that do not require full decomposition and recomposition.
15041cb0ef41Sopenharmony_ci        if (!isMaybeOrNonZeroCC(norm16)) {  // minNoNo <= norm16 < minMaybeYes
15051cb0ef41Sopenharmony_ci            if (!doCompose) {
15061cb0ef41Sopenharmony_ci                return false;
15071cb0ef41Sopenharmony_ci            }
15081cb0ef41Sopenharmony_ci            // Fast path for mapping a character that is immediately surrounded by boundaries.
15091cb0ef41Sopenharmony_ci            // In this case, we need not decompose around the current character.
15101cb0ef41Sopenharmony_ci            if (isDecompNoAlgorithmic(norm16)) {
15111cb0ef41Sopenharmony_ci                // Maps to a single isCompYesAndZeroCC character
15121cb0ef41Sopenharmony_ci                // which also implies hasCompBoundaryBefore.
15131cb0ef41Sopenharmony_ci                if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
15141cb0ef41Sopenharmony_ci                        hasCompBoundaryBefore(src, limit)) {
15151cb0ef41Sopenharmony_ci                    if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
15161cb0ef41Sopenharmony_ci                        break;
15171cb0ef41Sopenharmony_ci                    }
15181cb0ef41Sopenharmony_ci                    if(!buffer.append(mapAlgorithmic(c, norm16), 0, errorCode)) {
15191cb0ef41Sopenharmony_ci                        break;
15201cb0ef41Sopenharmony_ci                    }
15211cb0ef41Sopenharmony_ci                    prevBoundary = src;
15221cb0ef41Sopenharmony_ci                    continue;
15231cb0ef41Sopenharmony_ci                }
15241cb0ef41Sopenharmony_ci            } else if (norm16 < minNoNoCompBoundaryBefore) {
15251cb0ef41Sopenharmony_ci                // The mapping is comp-normalized which also implies hasCompBoundaryBefore.
15261cb0ef41Sopenharmony_ci                if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
15271cb0ef41Sopenharmony_ci                        hasCompBoundaryBefore(src, limit)) {
15281cb0ef41Sopenharmony_ci                    if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
15291cb0ef41Sopenharmony_ci                        break;
15301cb0ef41Sopenharmony_ci                    }
15311cb0ef41Sopenharmony_ci                    const char16_t *mapping = reinterpret_cast<const char16_t *>(getMapping(norm16));
15321cb0ef41Sopenharmony_ci                    int32_t length = *mapping++ & MAPPING_LENGTH_MASK;
15331cb0ef41Sopenharmony_ci                    if(!buffer.appendZeroCC(mapping, mapping + length, errorCode)) {
15341cb0ef41Sopenharmony_ci                        break;
15351cb0ef41Sopenharmony_ci                    }
15361cb0ef41Sopenharmony_ci                    prevBoundary = src;
15371cb0ef41Sopenharmony_ci                    continue;
15381cb0ef41Sopenharmony_ci                }
15391cb0ef41Sopenharmony_ci            } else if (norm16 >= minNoNoEmpty) {
15401cb0ef41Sopenharmony_ci                // The current character maps to nothing.
15411cb0ef41Sopenharmony_ci                // Simply omit it from the output if there is a boundary before _or_ after it.
15421cb0ef41Sopenharmony_ci                // The character itself implies no boundaries.
15431cb0ef41Sopenharmony_ci                if (hasCompBoundaryBefore(src, limit) ||
15441cb0ef41Sopenharmony_ci                        hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) {
15451cb0ef41Sopenharmony_ci                    if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
15461cb0ef41Sopenharmony_ci                        break;
15471cb0ef41Sopenharmony_ci                    }
15481cb0ef41Sopenharmony_ci                    prevBoundary = src;
15491cb0ef41Sopenharmony_ci                    continue;
15501cb0ef41Sopenharmony_ci                }
15511cb0ef41Sopenharmony_ci            }
15521cb0ef41Sopenharmony_ci            // Other "noNo" type, or need to examine more text around this character:
15531cb0ef41Sopenharmony_ci            // Fall through to the slow path.
15541cb0ef41Sopenharmony_ci        } else if (isJamoVT(norm16) && prevBoundary != prevSrc) {
15551cb0ef41Sopenharmony_ci            char16_t prev=*(prevSrc-1);
15561cb0ef41Sopenharmony_ci            if(c<Hangul::JAMO_T_BASE) {
15571cb0ef41Sopenharmony_ci                // The current character is a Jamo Vowel,
15581cb0ef41Sopenharmony_ci                // compose with previous Jamo L and following Jamo T.
15591cb0ef41Sopenharmony_ci                char16_t l = (char16_t)(prev-Hangul::JAMO_L_BASE);
15601cb0ef41Sopenharmony_ci                if(l<Hangul::JAMO_L_COUNT) {
15611cb0ef41Sopenharmony_ci                    if (!doCompose) {
15621cb0ef41Sopenharmony_ci                        return false;
15631cb0ef41Sopenharmony_ci                    }
15641cb0ef41Sopenharmony_ci                    int32_t t;
15651cb0ef41Sopenharmony_ci                    if (src != limit &&
15661cb0ef41Sopenharmony_ci                            0 < (t = ((int32_t)*src - Hangul::JAMO_T_BASE)) &&
15671cb0ef41Sopenharmony_ci                            t < Hangul::JAMO_T_COUNT) {
15681cb0ef41Sopenharmony_ci                        // The next character is a Jamo T.
15691cb0ef41Sopenharmony_ci                        ++src;
15701cb0ef41Sopenharmony_ci                    } else if (hasCompBoundaryBefore(src, limit)) {
15711cb0ef41Sopenharmony_ci                        // No Jamo T follows, not even via decomposition.
15721cb0ef41Sopenharmony_ci                        t = 0;
15731cb0ef41Sopenharmony_ci                    } else {
15741cb0ef41Sopenharmony_ci                        t = -1;
15751cb0ef41Sopenharmony_ci                    }
15761cb0ef41Sopenharmony_ci                    if (t >= 0) {
15771cb0ef41Sopenharmony_ci                        UChar32 syllable = Hangul::HANGUL_BASE +
15781cb0ef41Sopenharmony_ci                            (l*Hangul::JAMO_V_COUNT + (c-Hangul::JAMO_V_BASE)) *
15791cb0ef41Sopenharmony_ci                            Hangul::JAMO_T_COUNT + t;
15801cb0ef41Sopenharmony_ci                        --prevSrc;  // Replace the Jamo L as well.
15811cb0ef41Sopenharmony_ci                        if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
15821cb0ef41Sopenharmony_ci                            break;
15831cb0ef41Sopenharmony_ci                        }
15841cb0ef41Sopenharmony_ci                        if(!buffer.appendBMP((char16_t)syllable, 0, errorCode)) {
15851cb0ef41Sopenharmony_ci                            break;
15861cb0ef41Sopenharmony_ci                        }
15871cb0ef41Sopenharmony_ci                        prevBoundary = src;
15881cb0ef41Sopenharmony_ci                        continue;
15891cb0ef41Sopenharmony_ci                    }
15901cb0ef41Sopenharmony_ci                    // If we see L+V+x where x!=T then we drop to the slow path,
15911cb0ef41Sopenharmony_ci                    // decompose and recompose.
15921cb0ef41Sopenharmony_ci                    // This is to deal with NFKC finding normal L and V but a
15931cb0ef41Sopenharmony_ci                    // compatibility variant of a T.
15941cb0ef41Sopenharmony_ci                    // We need to either fully compose that combination here
15951cb0ef41Sopenharmony_ci                    // (which would complicate the code and may not work with strange custom data)
15961cb0ef41Sopenharmony_ci                    // or use the slow path.
15971cb0ef41Sopenharmony_ci                }
15981cb0ef41Sopenharmony_ci            } else if (Hangul::isHangulLV(prev)) {
15991cb0ef41Sopenharmony_ci                // The current character is a Jamo Trailing consonant,
16001cb0ef41Sopenharmony_ci                // compose with previous Hangul LV that does not contain a Jamo T.
16011cb0ef41Sopenharmony_ci                if (!doCompose) {
16021cb0ef41Sopenharmony_ci                    return false;
16031cb0ef41Sopenharmony_ci                }
16041cb0ef41Sopenharmony_ci                UChar32 syllable = prev + c - Hangul::JAMO_T_BASE;
16051cb0ef41Sopenharmony_ci                --prevSrc;  // Replace the Hangul LV as well.
16061cb0ef41Sopenharmony_ci                if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
16071cb0ef41Sopenharmony_ci                    break;
16081cb0ef41Sopenharmony_ci                }
16091cb0ef41Sopenharmony_ci                if(!buffer.appendBMP((char16_t)syllable, 0, errorCode)) {
16101cb0ef41Sopenharmony_ci                    break;
16111cb0ef41Sopenharmony_ci                }
16121cb0ef41Sopenharmony_ci                prevBoundary = src;
16131cb0ef41Sopenharmony_ci                continue;
16141cb0ef41Sopenharmony_ci            }
16151cb0ef41Sopenharmony_ci            // No matching context, or may need to decompose surrounding text first:
16161cb0ef41Sopenharmony_ci            // Fall through to the slow path.
16171cb0ef41Sopenharmony_ci        } else if (norm16 > JAMO_VT) {  // norm16 >= MIN_YES_YES_WITH_CC
16181cb0ef41Sopenharmony_ci            // One or more combining marks that do not combine-back:
16191cb0ef41Sopenharmony_ci            // Check for canonical order, copy unchanged if ok and
16201cb0ef41Sopenharmony_ci            // if followed by a character with a boundary-before.
16211cb0ef41Sopenharmony_ci            uint8_t cc = getCCFromNormalYesOrMaybe(norm16);  // cc!=0
16221cb0ef41Sopenharmony_ci            if (onlyContiguous /* FCC */ && getPreviousTrailCC(prevBoundary, prevSrc) > cc) {
16231cb0ef41Sopenharmony_ci                // Fails FCD test, need to decompose and contiguously recompose.
16241cb0ef41Sopenharmony_ci                if (!doCompose) {
16251cb0ef41Sopenharmony_ci                    return false;
16261cb0ef41Sopenharmony_ci                }
16271cb0ef41Sopenharmony_ci            } else {
16281cb0ef41Sopenharmony_ci                // If !onlyContiguous (not FCC), then we ignore the tccc of
16291cb0ef41Sopenharmony_ci                // the previous character which passed the quick check "yes && ccc==0" test.
16301cb0ef41Sopenharmony_ci                const char16_t *nextSrc;
16311cb0ef41Sopenharmony_ci                uint16_t n16;
16321cb0ef41Sopenharmony_ci                for (;;) {
16331cb0ef41Sopenharmony_ci                    if (src == limit) {
16341cb0ef41Sopenharmony_ci                        if (doCompose) {
16351cb0ef41Sopenharmony_ci                            buffer.appendZeroCC(prevBoundary, limit, errorCode);
16361cb0ef41Sopenharmony_ci                        }
16371cb0ef41Sopenharmony_ci                        return true;
16381cb0ef41Sopenharmony_ci                    }
16391cb0ef41Sopenharmony_ci                    uint8_t prevCC = cc;
16401cb0ef41Sopenharmony_ci                    nextSrc = src;
16411cb0ef41Sopenharmony_ci                    UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, n16);
16421cb0ef41Sopenharmony_ci                    if (n16 >= MIN_YES_YES_WITH_CC) {
16431cb0ef41Sopenharmony_ci                        cc = getCCFromNormalYesOrMaybe(n16);
16441cb0ef41Sopenharmony_ci                        if (prevCC > cc) {
16451cb0ef41Sopenharmony_ci                            if (!doCompose) {
16461cb0ef41Sopenharmony_ci                                return false;
16471cb0ef41Sopenharmony_ci                            }
16481cb0ef41Sopenharmony_ci                            break;
16491cb0ef41Sopenharmony_ci                        }
16501cb0ef41Sopenharmony_ci                    } else {
16511cb0ef41Sopenharmony_ci                        break;
16521cb0ef41Sopenharmony_ci                    }
16531cb0ef41Sopenharmony_ci                    src = nextSrc;
16541cb0ef41Sopenharmony_ci                }
16551cb0ef41Sopenharmony_ci                // src is after the last in-order combining mark.
16561cb0ef41Sopenharmony_ci                // If there is a boundary here, then we continue with no change.
16571cb0ef41Sopenharmony_ci                if (norm16HasCompBoundaryBefore(n16)) {
16581cb0ef41Sopenharmony_ci                    if (isCompYesAndZeroCC(n16)) {
16591cb0ef41Sopenharmony_ci                        src = nextSrc;
16601cb0ef41Sopenharmony_ci                    }
16611cb0ef41Sopenharmony_ci                    continue;
16621cb0ef41Sopenharmony_ci                }
16631cb0ef41Sopenharmony_ci                // Use the slow path. There is no boundary in [prevSrc, src[.
16641cb0ef41Sopenharmony_ci            }
16651cb0ef41Sopenharmony_ci        }
16661cb0ef41Sopenharmony_ci
16671cb0ef41Sopenharmony_ci        // Slow path: Find the nearest boundaries around the current character,
16681cb0ef41Sopenharmony_ci        // decompose and recompose.
16691cb0ef41Sopenharmony_ci        if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
16701cb0ef41Sopenharmony_ci            const char16_t *p = prevSrc;
16711cb0ef41Sopenharmony_ci            UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, norm16);
16721cb0ef41Sopenharmony_ci            if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
16731cb0ef41Sopenharmony_ci                prevSrc = p;
16741cb0ef41Sopenharmony_ci            }
16751cb0ef41Sopenharmony_ci        }
16761cb0ef41Sopenharmony_ci        if (doCompose && prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) {
16771cb0ef41Sopenharmony_ci            break;
16781cb0ef41Sopenharmony_ci        }
16791cb0ef41Sopenharmony_ci        int32_t recomposeStartIndex=buffer.length();
16801cb0ef41Sopenharmony_ci        // We know there is not a boundary here.
16811cb0ef41Sopenharmony_ci        decomposeShort(prevSrc, src, false /* !stopAtCompBoundary */, onlyContiguous,
16821cb0ef41Sopenharmony_ci                       buffer, errorCode);
16831cb0ef41Sopenharmony_ci        // Decompose until the next boundary.
16841cb0ef41Sopenharmony_ci        src = decomposeShort(src, limit, true /* stopAtCompBoundary */, onlyContiguous,
16851cb0ef41Sopenharmony_ci                             buffer, errorCode);
16861cb0ef41Sopenharmony_ci        if (U_FAILURE(errorCode)) {
16871cb0ef41Sopenharmony_ci            break;
16881cb0ef41Sopenharmony_ci        }
16891cb0ef41Sopenharmony_ci        if ((src - prevSrc) > INT32_MAX) {  // guard before buffer.equals()
16901cb0ef41Sopenharmony_ci            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
16911cb0ef41Sopenharmony_ci            return true;
16921cb0ef41Sopenharmony_ci        }
16931cb0ef41Sopenharmony_ci        recompose(buffer, recomposeStartIndex, onlyContiguous);
16941cb0ef41Sopenharmony_ci        if(!doCompose) {
16951cb0ef41Sopenharmony_ci            if(!buffer.equals(prevSrc, src)) {
16961cb0ef41Sopenharmony_ci                return false;
16971cb0ef41Sopenharmony_ci            }
16981cb0ef41Sopenharmony_ci            buffer.remove();
16991cb0ef41Sopenharmony_ci        }
17001cb0ef41Sopenharmony_ci        prevBoundary=src;
17011cb0ef41Sopenharmony_ci    }
17021cb0ef41Sopenharmony_ci    return true;
17031cb0ef41Sopenharmony_ci}
17041cb0ef41Sopenharmony_ci
17051cb0ef41Sopenharmony_ci// Very similar to compose(): Make the same changes in both places if relevant.
17061cb0ef41Sopenharmony_ci// pQCResult==nullptr: spanQuickCheckYes
17071cb0ef41Sopenharmony_ci// pQCResult!=nullptr: quickCheck (*pQCResult must be UNORM_YES)
17081cb0ef41Sopenharmony_ciconst char16_t *
17091cb0ef41Sopenharmony_ciNormalizer2Impl::composeQuickCheck(const char16_t *src, const char16_t *limit,
17101cb0ef41Sopenharmony_ci                                   UBool onlyContiguous,
17111cb0ef41Sopenharmony_ci                                   UNormalizationCheckResult *pQCResult) const {
17121cb0ef41Sopenharmony_ci    const char16_t *prevBoundary=src;
17131cb0ef41Sopenharmony_ci    UChar32 minNoMaybeCP=minCompNoMaybeCP;
17141cb0ef41Sopenharmony_ci    if(limit==nullptr) {
17151cb0ef41Sopenharmony_ci        UErrorCode errorCode=U_ZERO_ERROR;
17161cb0ef41Sopenharmony_ci        src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, nullptr, errorCode);
17171cb0ef41Sopenharmony_ci        limit=u_strchr(src, 0);
17181cb0ef41Sopenharmony_ci        if (prevBoundary != src) {
17191cb0ef41Sopenharmony_ci            if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) {
17201cb0ef41Sopenharmony_ci                prevBoundary = src;
17211cb0ef41Sopenharmony_ci            } else {
17221cb0ef41Sopenharmony_ci                prevBoundary = --src;
17231cb0ef41Sopenharmony_ci            }
17241cb0ef41Sopenharmony_ci        }
17251cb0ef41Sopenharmony_ci    }
17261cb0ef41Sopenharmony_ci
17271cb0ef41Sopenharmony_ci    for(;;) {
17281cb0ef41Sopenharmony_ci        // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
17291cb0ef41Sopenharmony_ci        // or with (compYes && ccc==0) properties.
17301cb0ef41Sopenharmony_ci        const char16_t *prevSrc;
17311cb0ef41Sopenharmony_ci        UChar32 c = 0;
17321cb0ef41Sopenharmony_ci        uint16_t norm16 = 0;
17331cb0ef41Sopenharmony_ci        for (;;) {
17341cb0ef41Sopenharmony_ci            if(src==limit) {
17351cb0ef41Sopenharmony_ci                return src;
17361cb0ef41Sopenharmony_ci            }
17371cb0ef41Sopenharmony_ci            if( (c=*src)<minNoMaybeCP ||
17381cb0ef41Sopenharmony_ci                isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
17391cb0ef41Sopenharmony_ci            ) {
17401cb0ef41Sopenharmony_ci                ++src;
17411cb0ef41Sopenharmony_ci            } else {
17421cb0ef41Sopenharmony_ci                prevSrc = src++;
17431cb0ef41Sopenharmony_ci                if(!U16_IS_LEAD(c)) {
17441cb0ef41Sopenharmony_ci                    break;
17451cb0ef41Sopenharmony_ci                } else {
17461cb0ef41Sopenharmony_ci                    char16_t c2;
17471cb0ef41Sopenharmony_ci                    if(src!=limit && U16_IS_TRAIL(c2=*src)) {
17481cb0ef41Sopenharmony_ci                        ++src;
17491cb0ef41Sopenharmony_ci                        c=U16_GET_SUPPLEMENTARY(c, c2);
17501cb0ef41Sopenharmony_ci                        norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
17511cb0ef41Sopenharmony_ci                        if(!isCompYesAndZeroCC(norm16)) {
17521cb0ef41Sopenharmony_ci                            break;
17531cb0ef41Sopenharmony_ci                        }
17541cb0ef41Sopenharmony_ci                    }
17551cb0ef41Sopenharmony_ci                }
17561cb0ef41Sopenharmony_ci            }
17571cb0ef41Sopenharmony_ci        }
17581cb0ef41Sopenharmony_ci        // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
17591cb0ef41Sopenharmony_ci        // The current character is either a "noNo" (has a mapping)
17601cb0ef41Sopenharmony_ci        // or a "maybeYes" (combines backward)
17611cb0ef41Sopenharmony_ci        // or a "yesYes" with ccc!=0.
17621cb0ef41Sopenharmony_ci        // It is not a Hangul syllable or Jamo L because those have "yes" properties.
17631cb0ef41Sopenharmony_ci
17641cb0ef41Sopenharmony_ci        uint16_t prevNorm16 = INERT;
17651cb0ef41Sopenharmony_ci        if (prevBoundary != prevSrc) {
17661cb0ef41Sopenharmony_ci            if (norm16HasCompBoundaryBefore(norm16)) {
17671cb0ef41Sopenharmony_ci                prevBoundary = prevSrc;
17681cb0ef41Sopenharmony_ci            } else {
17691cb0ef41Sopenharmony_ci                const char16_t *p = prevSrc;
17701cb0ef41Sopenharmony_ci                uint16_t n16;
17711cb0ef41Sopenharmony_ci                UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, n16);
17721cb0ef41Sopenharmony_ci                if (norm16HasCompBoundaryAfter(n16, onlyContiguous)) {
17731cb0ef41Sopenharmony_ci                    prevBoundary = prevSrc;
17741cb0ef41Sopenharmony_ci                } else {
17751cb0ef41Sopenharmony_ci                    prevBoundary = p;
17761cb0ef41Sopenharmony_ci                    prevNorm16 = n16;
17771cb0ef41Sopenharmony_ci                }
17781cb0ef41Sopenharmony_ci            }
17791cb0ef41Sopenharmony_ci        }
17801cb0ef41Sopenharmony_ci
17811cb0ef41Sopenharmony_ci        if(isMaybeOrNonZeroCC(norm16)) {
17821cb0ef41Sopenharmony_ci            uint8_t cc=getCCFromYesOrMaybe(norm16);
17831cb0ef41Sopenharmony_ci            if (onlyContiguous /* FCC */ && cc != 0 &&
17841cb0ef41Sopenharmony_ci                    getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) {
17851cb0ef41Sopenharmony_ci                // The [prevBoundary..prevSrc[ character
17861cb0ef41Sopenharmony_ci                // passed the quick check "yes && ccc==0" test
17871cb0ef41Sopenharmony_ci                // but is out of canonical order with the current combining mark.
17881cb0ef41Sopenharmony_ci            } else {
17891cb0ef41Sopenharmony_ci                // If !onlyContiguous (not FCC), then we ignore the tccc of
17901cb0ef41Sopenharmony_ci                // the previous character which passed the quick check "yes && ccc==0" test.
17911cb0ef41Sopenharmony_ci                const char16_t *nextSrc;
17921cb0ef41Sopenharmony_ci                for (;;) {
17931cb0ef41Sopenharmony_ci                    if (norm16 < MIN_YES_YES_WITH_CC) {
17941cb0ef41Sopenharmony_ci                        if (pQCResult != nullptr) {
17951cb0ef41Sopenharmony_ci                            *pQCResult = UNORM_MAYBE;
17961cb0ef41Sopenharmony_ci                        } else {
17971cb0ef41Sopenharmony_ci                            return prevBoundary;
17981cb0ef41Sopenharmony_ci                        }
17991cb0ef41Sopenharmony_ci                    }
18001cb0ef41Sopenharmony_ci                    if (src == limit) {
18011cb0ef41Sopenharmony_ci                        return src;
18021cb0ef41Sopenharmony_ci                    }
18031cb0ef41Sopenharmony_ci                    uint8_t prevCC = cc;
18041cb0ef41Sopenharmony_ci                    nextSrc = src;
18051cb0ef41Sopenharmony_ci                    UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, norm16);
18061cb0ef41Sopenharmony_ci                    if (isMaybeOrNonZeroCC(norm16)) {
18071cb0ef41Sopenharmony_ci                        cc = getCCFromYesOrMaybe(norm16);
18081cb0ef41Sopenharmony_ci                        if (!(prevCC <= cc || cc == 0)) {
18091cb0ef41Sopenharmony_ci                            break;
18101cb0ef41Sopenharmony_ci                        }
18111cb0ef41Sopenharmony_ci                    } else {
18121cb0ef41Sopenharmony_ci                        break;
18131cb0ef41Sopenharmony_ci                    }
18141cb0ef41Sopenharmony_ci                    src = nextSrc;
18151cb0ef41Sopenharmony_ci                }
18161cb0ef41Sopenharmony_ci                // src is after the last in-order combining mark.
18171cb0ef41Sopenharmony_ci                if (isCompYesAndZeroCC(norm16)) {
18181cb0ef41Sopenharmony_ci                    prevBoundary = src;
18191cb0ef41Sopenharmony_ci                    src = nextSrc;
18201cb0ef41Sopenharmony_ci                    continue;
18211cb0ef41Sopenharmony_ci                }
18221cb0ef41Sopenharmony_ci            }
18231cb0ef41Sopenharmony_ci        }
18241cb0ef41Sopenharmony_ci        if(pQCResult!=nullptr) {
18251cb0ef41Sopenharmony_ci            *pQCResult=UNORM_NO;
18261cb0ef41Sopenharmony_ci        }
18271cb0ef41Sopenharmony_ci        return prevBoundary;
18281cb0ef41Sopenharmony_ci    }
18291cb0ef41Sopenharmony_ci}
18301cb0ef41Sopenharmony_ci
18311cb0ef41Sopenharmony_civoid Normalizer2Impl::composeAndAppend(const char16_t *src, const char16_t *limit,
18321cb0ef41Sopenharmony_ci                                       UBool doCompose,
18331cb0ef41Sopenharmony_ci                                       UBool onlyContiguous,
18341cb0ef41Sopenharmony_ci                                       UnicodeString &safeMiddle,
18351cb0ef41Sopenharmony_ci                                       ReorderingBuffer &buffer,
18361cb0ef41Sopenharmony_ci                                       UErrorCode &errorCode) const {
18371cb0ef41Sopenharmony_ci    if(!buffer.isEmpty()) {
18381cb0ef41Sopenharmony_ci        const char16_t *firstStarterInSrc=findNextCompBoundary(src, limit, onlyContiguous);
18391cb0ef41Sopenharmony_ci        if(src!=firstStarterInSrc) {
18401cb0ef41Sopenharmony_ci            const char16_t *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(),
18411cb0ef41Sopenharmony_ci                                                                    buffer.getLimit(), onlyContiguous);
18421cb0ef41Sopenharmony_ci            int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest);
18431cb0ef41Sopenharmony_ci            UnicodeString middle(lastStarterInDest, destSuffixLength);
18441cb0ef41Sopenharmony_ci            buffer.removeSuffix(destSuffixLength);
18451cb0ef41Sopenharmony_ci            safeMiddle=middle;
18461cb0ef41Sopenharmony_ci            middle.append(src, (int32_t)(firstStarterInSrc-src));
18471cb0ef41Sopenharmony_ci            const char16_t *middleStart=middle.getBuffer();
18481cb0ef41Sopenharmony_ci            compose(middleStart, middleStart+middle.length(), onlyContiguous,
18491cb0ef41Sopenharmony_ci                    true, buffer, errorCode);
18501cb0ef41Sopenharmony_ci            if(U_FAILURE(errorCode)) {
18511cb0ef41Sopenharmony_ci                return;
18521cb0ef41Sopenharmony_ci            }
18531cb0ef41Sopenharmony_ci            src=firstStarterInSrc;
18541cb0ef41Sopenharmony_ci        }
18551cb0ef41Sopenharmony_ci    }
18561cb0ef41Sopenharmony_ci    if(doCompose) {
18571cb0ef41Sopenharmony_ci        compose(src, limit, onlyContiguous, true, buffer, errorCode);
18581cb0ef41Sopenharmony_ci    } else {
18591cb0ef41Sopenharmony_ci        if(limit==nullptr) {  // appendZeroCC() needs limit!=nullptr
18601cb0ef41Sopenharmony_ci            limit=u_strchr(src, 0);
18611cb0ef41Sopenharmony_ci        }
18621cb0ef41Sopenharmony_ci        buffer.appendZeroCC(src, limit, errorCode);
18631cb0ef41Sopenharmony_ci    }
18641cb0ef41Sopenharmony_ci}
18651cb0ef41Sopenharmony_ci
18661cb0ef41Sopenharmony_ciUBool
18671cb0ef41Sopenharmony_ciNormalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
18681cb0ef41Sopenharmony_ci                             const uint8_t *src, const uint8_t *limit,
18691cb0ef41Sopenharmony_ci                             ByteSink *sink, Edits *edits, UErrorCode &errorCode) const {
18701cb0ef41Sopenharmony_ci    U_ASSERT(limit != nullptr);
18711cb0ef41Sopenharmony_ci    UnicodeString s16;
18721cb0ef41Sopenharmony_ci    uint8_t minNoMaybeLead = leadByteForCP(minCompNoMaybeCP);
18731cb0ef41Sopenharmony_ci    const uint8_t *prevBoundary = src;
18741cb0ef41Sopenharmony_ci
18751cb0ef41Sopenharmony_ci    for (;;) {
18761cb0ef41Sopenharmony_ci        // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
18771cb0ef41Sopenharmony_ci        // or with (compYes && ccc==0) properties.
18781cb0ef41Sopenharmony_ci        const uint8_t *prevSrc;
18791cb0ef41Sopenharmony_ci        uint16_t norm16 = 0;
18801cb0ef41Sopenharmony_ci        for (;;) {
18811cb0ef41Sopenharmony_ci            if (src == limit) {
18821cb0ef41Sopenharmony_ci                if (prevBoundary != limit && sink != nullptr) {
18831cb0ef41Sopenharmony_ci                    ByteSinkUtil::appendUnchanged(prevBoundary, limit,
18841cb0ef41Sopenharmony_ci                                                  *sink, options, edits, errorCode);
18851cb0ef41Sopenharmony_ci                }
18861cb0ef41Sopenharmony_ci                return true;
18871cb0ef41Sopenharmony_ci            }
18881cb0ef41Sopenharmony_ci            if (*src < minNoMaybeLead) {
18891cb0ef41Sopenharmony_ci                ++src;
18901cb0ef41Sopenharmony_ci            } else {
18911cb0ef41Sopenharmony_ci                prevSrc = src;
18921cb0ef41Sopenharmony_ci                UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
18931cb0ef41Sopenharmony_ci                if (!isCompYesAndZeroCC(norm16)) {
18941cb0ef41Sopenharmony_ci                    break;
18951cb0ef41Sopenharmony_ci                }
18961cb0ef41Sopenharmony_ci            }
18971cb0ef41Sopenharmony_ci        }
18981cb0ef41Sopenharmony_ci        // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
18991cb0ef41Sopenharmony_ci        // The current character is either a "noNo" (has a mapping)
19001cb0ef41Sopenharmony_ci        // or a "maybeYes" (combines backward)
19011cb0ef41Sopenharmony_ci        // or a "yesYes" with ccc!=0.
19021cb0ef41Sopenharmony_ci        // It is not a Hangul syllable or Jamo L because those have "yes" properties.
19031cb0ef41Sopenharmony_ci
19041cb0ef41Sopenharmony_ci        // Medium-fast path: Handle cases that do not require full decomposition and recomposition.
19051cb0ef41Sopenharmony_ci        if (!isMaybeOrNonZeroCC(norm16)) {  // minNoNo <= norm16 < minMaybeYes
19061cb0ef41Sopenharmony_ci            if (sink == nullptr) {
19071cb0ef41Sopenharmony_ci                return false;
19081cb0ef41Sopenharmony_ci            }
19091cb0ef41Sopenharmony_ci            // Fast path for mapping a character that is immediately surrounded by boundaries.
19101cb0ef41Sopenharmony_ci            // In this case, we need not decompose around the current character.
19111cb0ef41Sopenharmony_ci            if (isDecompNoAlgorithmic(norm16)) {
19121cb0ef41Sopenharmony_ci                // Maps to a single isCompYesAndZeroCC character
19131cb0ef41Sopenharmony_ci                // which also implies hasCompBoundaryBefore.
19141cb0ef41Sopenharmony_ci                if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
19151cb0ef41Sopenharmony_ci                        hasCompBoundaryBefore(src, limit)) {
19161cb0ef41Sopenharmony_ci                    if (prevBoundary != prevSrc &&
19171cb0ef41Sopenharmony_ci                            !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
19181cb0ef41Sopenharmony_ci                                                           *sink, options, edits, errorCode)) {
19191cb0ef41Sopenharmony_ci                        break;
19201cb0ef41Sopenharmony_ci                    }
19211cb0ef41Sopenharmony_ci                    appendCodePointDelta(prevSrc, src, getAlgorithmicDelta(norm16), *sink, edits);
19221cb0ef41Sopenharmony_ci                    prevBoundary = src;
19231cb0ef41Sopenharmony_ci                    continue;
19241cb0ef41Sopenharmony_ci                }
19251cb0ef41Sopenharmony_ci            } else if (norm16 < minNoNoCompBoundaryBefore) {
19261cb0ef41Sopenharmony_ci                // The mapping is comp-normalized which also implies hasCompBoundaryBefore.
19271cb0ef41Sopenharmony_ci                if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
19281cb0ef41Sopenharmony_ci                        hasCompBoundaryBefore(src, limit)) {
19291cb0ef41Sopenharmony_ci                    if (prevBoundary != prevSrc &&
19301cb0ef41Sopenharmony_ci                            !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
19311cb0ef41Sopenharmony_ci                                                           *sink, options, edits, errorCode)) {
19321cb0ef41Sopenharmony_ci                        break;
19331cb0ef41Sopenharmony_ci                    }
19341cb0ef41Sopenharmony_ci                    const uint16_t *mapping = getMapping(norm16);
19351cb0ef41Sopenharmony_ci                    int32_t length = *mapping++ & MAPPING_LENGTH_MASK;
19361cb0ef41Sopenharmony_ci                    if (!ByteSinkUtil::appendChange(prevSrc, src, (const char16_t *)mapping, length,
19371cb0ef41Sopenharmony_ci                                                    *sink, edits, errorCode)) {
19381cb0ef41Sopenharmony_ci                        break;
19391cb0ef41Sopenharmony_ci                    }
19401cb0ef41Sopenharmony_ci                    prevBoundary = src;
19411cb0ef41Sopenharmony_ci                    continue;
19421cb0ef41Sopenharmony_ci                }
19431cb0ef41Sopenharmony_ci            } else if (norm16 >= minNoNoEmpty) {
19441cb0ef41Sopenharmony_ci                // The current character maps to nothing.
19451cb0ef41Sopenharmony_ci                // Simply omit it from the output if there is a boundary before _or_ after it.
19461cb0ef41Sopenharmony_ci                // The character itself implies no boundaries.
19471cb0ef41Sopenharmony_ci                if (hasCompBoundaryBefore(src, limit) ||
19481cb0ef41Sopenharmony_ci                        hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) {
19491cb0ef41Sopenharmony_ci                    if (prevBoundary != prevSrc &&
19501cb0ef41Sopenharmony_ci                            !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
19511cb0ef41Sopenharmony_ci                                                           *sink, options, edits, errorCode)) {
19521cb0ef41Sopenharmony_ci                        break;
19531cb0ef41Sopenharmony_ci                    }
19541cb0ef41Sopenharmony_ci                    if (edits != nullptr) {
19551cb0ef41Sopenharmony_ci                        edits->addReplace((int32_t)(src - prevSrc), 0);
19561cb0ef41Sopenharmony_ci                    }
19571cb0ef41Sopenharmony_ci                    prevBoundary = src;
19581cb0ef41Sopenharmony_ci                    continue;
19591cb0ef41Sopenharmony_ci                }
19601cb0ef41Sopenharmony_ci            }
19611cb0ef41Sopenharmony_ci            // Other "noNo" type, or need to examine more text around this character:
19621cb0ef41Sopenharmony_ci            // Fall through to the slow path.
19631cb0ef41Sopenharmony_ci        } else if (isJamoVT(norm16)) {
19641cb0ef41Sopenharmony_ci            // Jamo L: E1 84 80..92
19651cb0ef41Sopenharmony_ci            // Jamo V: E1 85 A1..B5
19661cb0ef41Sopenharmony_ci            // Jamo T: E1 86 A8..E1 87 82
19671cb0ef41Sopenharmony_ci            U_ASSERT((src - prevSrc) == 3 && *prevSrc == 0xe1);
19681cb0ef41Sopenharmony_ci            UChar32 prev = previousHangulOrJamo(prevBoundary, prevSrc);
19691cb0ef41Sopenharmony_ci            if (prevSrc[1] == 0x85) {
19701cb0ef41Sopenharmony_ci                // The current character is a Jamo Vowel,
19711cb0ef41Sopenharmony_ci                // compose with previous Jamo L and following Jamo T.
19721cb0ef41Sopenharmony_ci                UChar32 l = prev - Hangul::JAMO_L_BASE;
19731cb0ef41Sopenharmony_ci                if ((uint32_t)l < Hangul::JAMO_L_COUNT) {
19741cb0ef41Sopenharmony_ci                    if (sink == nullptr) {
19751cb0ef41Sopenharmony_ci                        return false;
19761cb0ef41Sopenharmony_ci                    }
19771cb0ef41Sopenharmony_ci                    int32_t t = getJamoTMinusBase(src, limit);
19781cb0ef41Sopenharmony_ci                    if (t >= 0) {
19791cb0ef41Sopenharmony_ci                        // The next character is a Jamo T.
19801cb0ef41Sopenharmony_ci                        src += 3;
19811cb0ef41Sopenharmony_ci                    } else if (hasCompBoundaryBefore(src, limit)) {
19821cb0ef41Sopenharmony_ci                        // No Jamo T follows, not even via decomposition.
19831cb0ef41Sopenharmony_ci                        t = 0;
19841cb0ef41Sopenharmony_ci                    }
19851cb0ef41Sopenharmony_ci                    if (t >= 0) {
19861cb0ef41Sopenharmony_ci                        UChar32 syllable = Hangul::HANGUL_BASE +
19871cb0ef41Sopenharmony_ci                            (l*Hangul::JAMO_V_COUNT + (prevSrc[2]-0xa1)) *
19881cb0ef41Sopenharmony_ci                            Hangul::JAMO_T_COUNT + t;
19891cb0ef41Sopenharmony_ci                        prevSrc -= 3;  // Replace the Jamo L as well.
19901cb0ef41Sopenharmony_ci                        if (prevBoundary != prevSrc &&
19911cb0ef41Sopenharmony_ci                                !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
19921cb0ef41Sopenharmony_ci                                                               *sink, options, edits, errorCode)) {
19931cb0ef41Sopenharmony_ci                            break;
19941cb0ef41Sopenharmony_ci                        }
19951cb0ef41Sopenharmony_ci                        ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits);
19961cb0ef41Sopenharmony_ci                        prevBoundary = src;
19971cb0ef41Sopenharmony_ci                        continue;
19981cb0ef41Sopenharmony_ci                    }
19991cb0ef41Sopenharmony_ci                    // If we see L+V+x where x!=T then we drop to the slow path,
20001cb0ef41Sopenharmony_ci                    // decompose and recompose.
20011cb0ef41Sopenharmony_ci                    // This is to deal with NFKC finding normal L and V but a
20021cb0ef41Sopenharmony_ci                    // compatibility variant of a T.
20031cb0ef41Sopenharmony_ci                    // We need to either fully compose that combination here
20041cb0ef41Sopenharmony_ci                    // (which would complicate the code and may not work with strange custom data)
20051cb0ef41Sopenharmony_ci                    // or use the slow path.
20061cb0ef41Sopenharmony_ci                }
20071cb0ef41Sopenharmony_ci            } else if (Hangul::isHangulLV(prev)) {
20081cb0ef41Sopenharmony_ci                // The current character is a Jamo Trailing consonant,
20091cb0ef41Sopenharmony_ci                // compose with previous Hangul LV that does not contain a Jamo T.
20101cb0ef41Sopenharmony_ci                if (sink == nullptr) {
20111cb0ef41Sopenharmony_ci                    return false;
20121cb0ef41Sopenharmony_ci                }
20131cb0ef41Sopenharmony_ci                UChar32 syllable = prev + getJamoTMinusBase(prevSrc, src);
20141cb0ef41Sopenharmony_ci                prevSrc -= 3;  // Replace the Hangul LV as well.
20151cb0ef41Sopenharmony_ci                if (prevBoundary != prevSrc &&
20161cb0ef41Sopenharmony_ci                        !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
20171cb0ef41Sopenharmony_ci                                                       *sink, options, edits, errorCode)) {
20181cb0ef41Sopenharmony_ci                    break;
20191cb0ef41Sopenharmony_ci                }
20201cb0ef41Sopenharmony_ci                ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits);
20211cb0ef41Sopenharmony_ci                prevBoundary = src;
20221cb0ef41Sopenharmony_ci                continue;
20231cb0ef41Sopenharmony_ci            }
20241cb0ef41Sopenharmony_ci            // No matching context, or may need to decompose surrounding text first:
20251cb0ef41Sopenharmony_ci            // Fall through to the slow path.
20261cb0ef41Sopenharmony_ci        } else if (norm16 > JAMO_VT) {  // norm16 >= MIN_YES_YES_WITH_CC
20271cb0ef41Sopenharmony_ci            // One or more combining marks that do not combine-back:
20281cb0ef41Sopenharmony_ci            // Check for canonical order, copy unchanged if ok and
20291cb0ef41Sopenharmony_ci            // if followed by a character with a boundary-before.
20301cb0ef41Sopenharmony_ci            uint8_t cc = getCCFromNormalYesOrMaybe(norm16);  // cc!=0
20311cb0ef41Sopenharmony_ci            if (onlyContiguous /* FCC */ && getPreviousTrailCC(prevBoundary, prevSrc) > cc) {
20321cb0ef41Sopenharmony_ci                // Fails FCD test, need to decompose and contiguously recompose.
20331cb0ef41Sopenharmony_ci                if (sink == nullptr) {
20341cb0ef41Sopenharmony_ci                    return false;
20351cb0ef41Sopenharmony_ci                }
20361cb0ef41Sopenharmony_ci            } else {
20371cb0ef41Sopenharmony_ci                // If !onlyContiguous (not FCC), then we ignore the tccc of
20381cb0ef41Sopenharmony_ci                // the previous character which passed the quick check "yes && ccc==0" test.
20391cb0ef41Sopenharmony_ci                const uint8_t *nextSrc;
20401cb0ef41Sopenharmony_ci                uint16_t n16;
20411cb0ef41Sopenharmony_ci                for (;;) {
20421cb0ef41Sopenharmony_ci                    if (src == limit) {
20431cb0ef41Sopenharmony_ci                        if (sink != nullptr) {
20441cb0ef41Sopenharmony_ci                            ByteSinkUtil::appendUnchanged(prevBoundary, limit,
20451cb0ef41Sopenharmony_ci                                                          *sink, options, edits, errorCode);
20461cb0ef41Sopenharmony_ci                        }
20471cb0ef41Sopenharmony_ci                        return true;
20481cb0ef41Sopenharmony_ci                    }
20491cb0ef41Sopenharmony_ci                    uint8_t prevCC = cc;
20501cb0ef41Sopenharmony_ci                    nextSrc = src;
20511cb0ef41Sopenharmony_ci                    UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, n16);
20521cb0ef41Sopenharmony_ci                    if (n16 >= MIN_YES_YES_WITH_CC) {
20531cb0ef41Sopenharmony_ci                        cc = getCCFromNormalYesOrMaybe(n16);
20541cb0ef41Sopenharmony_ci                        if (prevCC > cc) {
20551cb0ef41Sopenharmony_ci                            if (sink == nullptr) {
20561cb0ef41Sopenharmony_ci                                return false;
20571cb0ef41Sopenharmony_ci                            }
20581cb0ef41Sopenharmony_ci                            break;
20591cb0ef41Sopenharmony_ci                        }
20601cb0ef41Sopenharmony_ci                    } else {
20611cb0ef41Sopenharmony_ci                        break;
20621cb0ef41Sopenharmony_ci                    }
20631cb0ef41Sopenharmony_ci                    src = nextSrc;
20641cb0ef41Sopenharmony_ci                }
20651cb0ef41Sopenharmony_ci                // src is after the last in-order combining mark.
20661cb0ef41Sopenharmony_ci                // If there is a boundary here, then we continue with no change.
20671cb0ef41Sopenharmony_ci                if (norm16HasCompBoundaryBefore(n16)) {
20681cb0ef41Sopenharmony_ci                    if (isCompYesAndZeroCC(n16)) {
20691cb0ef41Sopenharmony_ci                        src = nextSrc;
20701cb0ef41Sopenharmony_ci                    }
20711cb0ef41Sopenharmony_ci                    continue;
20721cb0ef41Sopenharmony_ci                }
20731cb0ef41Sopenharmony_ci                // Use the slow path. There is no boundary in [prevSrc, src[.
20741cb0ef41Sopenharmony_ci            }
20751cb0ef41Sopenharmony_ci        }
20761cb0ef41Sopenharmony_ci
20771cb0ef41Sopenharmony_ci        // Slow path: Find the nearest boundaries around the current character,
20781cb0ef41Sopenharmony_ci        // decompose and recompose.
20791cb0ef41Sopenharmony_ci        if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
20801cb0ef41Sopenharmony_ci            const uint8_t *p = prevSrc;
20811cb0ef41Sopenharmony_ci            UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, prevBoundary, p, norm16);
20821cb0ef41Sopenharmony_ci            if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
20831cb0ef41Sopenharmony_ci                prevSrc = p;
20841cb0ef41Sopenharmony_ci            }
20851cb0ef41Sopenharmony_ci        }
20861cb0ef41Sopenharmony_ci        ReorderingBuffer buffer(*this, s16, errorCode);
20871cb0ef41Sopenharmony_ci        if (U_FAILURE(errorCode)) {
20881cb0ef41Sopenharmony_ci            break;
20891cb0ef41Sopenharmony_ci        }
20901cb0ef41Sopenharmony_ci        // We know there is not a boundary here.
20911cb0ef41Sopenharmony_ci        decomposeShort(prevSrc, src, STOP_AT_LIMIT, onlyContiguous,
20921cb0ef41Sopenharmony_ci                       buffer, errorCode);
20931cb0ef41Sopenharmony_ci        // Decompose until the next boundary.
20941cb0ef41Sopenharmony_ci        src = decomposeShort(src, limit, STOP_AT_COMP_BOUNDARY, onlyContiguous,
20951cb0ef41Sopenharmony_ci                             buffer, errorCode);
20961cb0ef41Sopenharmony_ci        if (U_FAILURE(errorCode)) {
20971cb0ef41Sopenharmony_ci            break;
20981cb0ef41Sopenharmony_ci        }
20991cb0ef41Sopenharmony_ci        if ((src - prevSrc) > INT32_MAX) {  // guard before buffer.equals()
21001cb0ef41Sopenharmony_ci            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
21011cb0ef41Sopenharmony_ci            return true;
21021cb0ef41Sopenharmony_ci        }
21031cb0ef41Sopenharmony_ci        recompose(buffer, 0, onlyContiguous);
21041cb0ef41Sopenharmony_ci        if (!buffer.equals(prevSrc, src)) {
21051cb0ef41Sopenharmony_ci            if (sink == nullptr) {
21061cb0ef41Sopenharmony_ci                return false;
21071cb0ef41Sopenharmony_ci            }
21081cb0ef41Sopenharmony_ci            if (prevBoundary != prevSrc &&
21091cb0ef41Sopenharmony_ci                    !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
21101cb0ef41Sopenharmony_ci                                                   *sink, options, edits, errorCode)) {
21111cb0ef41Sopenharmony_ci                break;
21121cb0ef41Sopenharmony_ci            }
21131cb0ef41Sopenharmony_ci            if (!ByteSinkUtil::appendChange(prevSrc, src, buffer.getStart(), buffer.length(),
21141cb0ef41Sopenharmony_ci                                            *sink, edits, errorCode)) {
21151cb0ef41Sopenharmony_ci                break;
21161cb0ef41Sopenharmony_ci            }
21171cb0ef41Sopenharmony_ci            prevBoundary = src;
21181cb0ef41Sopenharmony_ci        }
21191cb0ef41Sopenharmony_ci    }
21201cb0ef41Sopenharmony_ci    return true;
21211cb0ef41Sopenharmony_ci}
21221cb0ef41Sopenharmony_ci
21231cb0ef41Sopenharmony_ciUBool Normalizer2Impl::hasCompBoundaryBefore(const char16_t *src, const char16_t *limit) const {
21241cb0ef41Sopenharmony_ci    if (src == limit || *src < minCompNoMaybeCP) {
21251cb0ef41Sopenharmony_ci        return true;
21261cb0ef41Sopenharmony_ci    }
21271cb0ef41Sopenharmony_ci    UChar32 c;
21281cb0ef41Sopenharmony_ci    uint16_t norm16;
21291cb0ef41Sopenharmony_ci    UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);
21301cb0ef41Sopenharmony_ci    return norm16HasCompBoundaryBefore(norm16);
21311cb0ef41Sopenharmony_ci}
21321cb0ef41Sopenharmony_ci
21331cb0ef41Sopenharmony_ciUBool Normalizer2Impl::hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const {
21341cb0ef41Sopenharmony_ci    if (src == limit) {
21351cb0ef41Sopenharmony_ci        return true;
21361cb0ef41Sopenharmony_ci    }
21371cb0ef41Sopenharmony_ci    uint16_t norm16;
21381cb0ef41Sopenharmony_ci    UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
21391cb0ef41Sopenharmony_ci    return norm16HasCompBoundaryBefore(norm16);
21401cb0ef41Sopenharmony_ci}
21411cb0ef41Sopenharmony_ci
21421cb0ef41Sopenharmony_ciUBool Normalizer2Impl::hasCompBoundaryAfter(const char16_t *start, const char16_t *p,
21431cb0ef41Sopenharmony_ci                                            UBool onlyContiguous) const {
21441cb0ef41Sopenharmony_ci    if (start == p) {
21451cb0ef41Sopenharmony_ci        return true;
21461cb0ef41Sopenharmony_ci    }
21471cb0ef41Sopenharmony_ci    UChar32 c;
21481cb0ef41Sopenharmony_ci    uint16_t norm16;
21491cb0ef41Sopenharmony_ci    UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
21501cb0ef41Sopenharmony_ci    return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
21511cb0ef41Sopenharmony_ci}
21521cb0ef41Sopenharmony_ci
21531cb0ef41Sopenharmony_ciUBool Normalizer2Impl::hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p,
21541cb0ef41Sopenharmony_ci                                            UBool onlyContiguous) const {
21551cb0ef41Sopenharmony_ci    if (start == p) {
21561cb0ef41Sopenharmony_ci        return true;
21571cb0ef41Sopenharmony_ci    }
21581cb0ef41Sopenharmony_ci    uint16_t norm16;
21591cb0ef41Sopenharmony_ci    UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, start, p, norm16);
21601cb0ef41Sopenharmony_ci    return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
21611cb0ef41Sopenharmony_ci}
21621cb0ef41Sopenharmony_ci
21631cb0ef41Sopenharmony_ciconst char16_t *Normalizer2Impl::findPreviousCompBoundary(const char16_t *start, const char16_t *p,
21641cb0ef41Sopenharmony_ci                                                       UBool onlyContiguous) const {
21651cb0ef41Sopenharmony_ci    while (p != start) {
21661cb0ef41Sopenharmony_ci        const char16_t *codePointLimit = p;
21671cb0ef41Sopenharmony_ci        UChar32 c;
21681cb0ef41Sopenharmony_ci        uint16_t norm16;
21691cb0ef41Sopenharmony_ci        UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
21701cb0ef41Sopenharmony_ci        if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
21711cb0ef41Sopenharmony_ci            return codePointLimit;
21721cb0ef41Sopenharmony_ci        }
21731cb0ef41Sopenharmony_ci        if (hasCompBoundaryBefore(c, norm16)) {
21741cb0ef41Sopenharmony_ci            return p;
21751cb0ef41Sopenharmony_ci        }
21761cb0ef41Sopenharmony_ci    }
21771cb0ef41Sopenharmony_ci    return p;
21781cb0ef41Sopenharmony_ci}
21791cb0ef41Sopenharmony_ci
21801cb0ef41Sopenharmony_ciconst char16_t *Normalizer2Impl::findNextCompBoundary(const char16_t *p, const char16_t *limit,
21811cb0ef41Sopenharmony_ci                                                   UBool onlyContiguous) const {
21821cb0ef41Sopenharmony_ci    while (p != limit) {
21831cb0ef41Sopenharmony_ci        const char16_t *codePointStart = p;
21841cb0ef41Sopenharmony_ci        UChar32 c;
21851cb0ef41Sopenharmony_ci        uint16_t norm16;
21861cb0ef41Sopenharmony_ci        UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
21871cb0ef41Sopenharmony_ci        if (hasCompBoundaryBefore(c, norm16)) {
21881cb0ef41Sopenharmony_ci            return codePointStart;
21891cb0ef41Sopenharmony_ci        }
21901cb0ef41Sopenharmony_ci        if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
21911cb0ef41Sopenharmony_ci            return p;
21921cb0ef41Sopenharmony_ci        }
21931cb0ef41Sopenharmony_ci    }
21941cb0ef41Sopenharmony_ci    return p;
21951cb0ef41Sopenharmony_ci}
21961cb0ef41Sopenharmony_ci
21971cb0ef41Sopenharmony_ciuint8_t Normalizer2Impl::getPreviousTrailCC(const char16_t *start, const char16_t *p) const {
21981cb0ef41Sopenharmony_ci    if (start == p) {
21991cb0ef41Sopenharmony_ci        return 0;
22001cb0ef41Sopenharmony_ci    }
22011cb0ef41Sopenharmony_ci    int32_t i = (int32_t)(p - start);
22021cb0ef41Sopenharmony_ci    UChar32 c;
22031cb0ef41Sopenharmony_ci    U16_PREV(start, 0, i, c);
22041cb0ef41Sopenharmony_ci    return (uint8_t)getFCD16(c);
22051cb0ef41Sopenharmony_ci}
22061cb0ef41Sopenharmony_ci
22071cb0ef41Sopenharmony_ciuint8_t Normalizer2Impl::getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const {
22081cb0ef41Sopenharmony_ci    if (start == p) {
22091cb0ef41Sopenharmony_ci        return 0;
22101cb0ef41Sopenharmony_ci    }
22111cb0ef41Sopenharmony_ci    int32_t i = (int32_t)(p - start);
22121cb0ef41Sopenharmony_ci    UChar32 c;
22131cb0ef41Sopenharmony_ci    U8_PREV(start, 0, i, c);
22141cb0ef41Sopenharmony_ci    return (uint8_t)getFCD16(c);
22151cb0ef41Sopenharmony_ci}
22161cb0ef41Sopenharmony_ci
22171cb0ef41Sopenharmony_ci// Note: normalizer2impl.cpp r30982 (2011-nov-27)
22181cb0ef41Sopenharmony_ci// still had getFCDTrie() which built and cached an FCD trie.
22191cb0ef41Sopenharmony_ci// That provided faster access to FCD data than getFCD16FromNormData()
22201cb0ef41Sopenharmony_ci// but required synchronization and consumed some 10kB of heap memory
22211cb0ef41Sopenharmony_ci// in any process that uses FCD (e.g., via collation).
22221cb0ef41Sopenharmony_ci// minDecompNoCP etc. and smallFCD[] are intended to help with any loss of performance,
22231cb0ef41Sopenharmony_ci// at least for ASCII & CJK.
22241cb0ef41Sopenharmony_ci
22251cb0ef41Sopenharmony_ci// Ticket 20907 - The optimizer in MSVC/Visual Studio versions below 16.4 has trouble with this
22261cb0ef41Sopenharmony_ci// function on Windows ARM64. As a work-around, we disable optimizations for this function.
22271cb0ef41Sopenharmony_ci// This work-around could/should be removed once the following versions of Visual Studio are no
22281cb0ef41Sopenharmony_ci// longer supported: All versions of VS2017, and versions of VS2019 below 16.4.
22291cb0ef41Sopenharmony_ci#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
22301cb0ef41Sopenharmony_ci#pragma optimize( "", off )
22311cb0ef41Sopenharmony_ci#endif
22321cb0ef41Sopenharmony_ci// Gets the FCD value from the regular normalization data.
22331cb0ef41Sopenharmony_ciuint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
22341cb0ef41Sopenharmony_ci    uint16_t norm16=getNorm16(c);
22351cb0ef41Sopenharmony_ci    if (norm16 >= limitNoNo) {
22361cb0ef41Sopenharmony_ci        if(norm16>=MIN_NORMAL_MAYBE_YES) {
22371cb0ef41Sopenharmony_ci            // combining mark
22381cb0ef41Sopenharmony_ci            norm16=getCCFromNormalYesOrMaybe(norm16);
22391cb0ef41Sopenharmony_ci            return norm16|(norm16<<8);
22401cb0ef41Sopenharmony_ci        } else if(norm16>=minMaybeYes) {
22411cb0ef41Sopenharmony_ci            return 0;
22421cb0ef41Sopenharmony_ci        } else {  // isDecompNoAlgorithmic(norm16)
22431cb0ef41Sopenharmony_ci            uint16_t deltaTrailCC = norm16 & DELTA_TCCC_MASK;
22441cb0ef41Sopenharmony_ci            if (deltaTrailCC <= DELTA_TCCC_1) {
22451cb0ef41Sopenharmony_ci                return deltaTrailCC >> OFFSET_SHIFT;
22461cb0ef41Sopenharmony_ci            }
22471cb0ef41Sopenharmony_ci            // Maps to an isCompYesAndZeroCC.
22481cb0ef41Sopenharmony_ci            c=mapAlgorithmic(c, norm16);
22491cb0ef41Sopenharmony_ci            norm16=getRawNorm16(c);
22501cb0ef41Sopenharmony_ci        }
22511cb0ef41Sopenharmony_ci    }
22521cb0ef41Sopenharmony_ci    if(norm16<=minYesNo || isHangulLVT(norm16)) {
22531cb0ef41Sopenharmony_ci        // no decomposition or Hangul syllable, all zeros
22541cb0ef41Sopenharmony_ci        return 0;
22551cb0ef41Sopenharmony_ci    }
22561cb0ef41Sopenharmony_ci    // c decomposes, get everything from the variable-length extra data
22571cb0ef41Sopenharmony_ci    const uint16_t *mapping=getMapping(norm16);
22581cb0ef41Sopenharmony_ci    uint16_t firstUnit=*mapping;
22591cb0ef41Sopenharmony_ci    norm16=firstUnit>>8;  // tccc
22601cb0ef41Sopenharmony_ci    if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
22611cb0ef41Sopenharmony_ci        norm16|=*(mapping-1)&0xff00;  // lccc
22621cb0ef41Sopenharmony_ci    }
22631cb0ef41Sopenharmony_ci    return norm16;
22641cb0ef41Sopenharmony_ci}
22651cb0ef41Sopenharmony_ci#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
22661cb0ef41Sopenharmony_ci#pragma optimize( "", on )
22671cb0ef41Sopenharmony_ci#endif
22681cb0ef41Sopenharmony_ci
22691cb0ef41Sopenharmony_ci// Dual functionality:
22701cb0ef41Sopenharmony_ci// buffer!=nullptr: normalize
22711cb0ef41Sopenharmony_ci// buffer==nullptr: isNormalized/quickCheck/spanQuickCheckYes
22721cb0ef41Sopenharmony_ciconst char16_t *
22731cb0ef41Sopenharmony_ciNormalizer2Impl::makeFCD(const char16_t *src, const char16_t *limit,
22741cb0ef41Sopenharmony_ci                         ReorderingBuffer *buffer,
22751cb0ef41Sopenharmony_ci                         UErrorCode &errorCode) const {
22761cb0ef41Sopenharmony_ci    // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
22771cb0ef41Sopenharmony_ci    // Similar to the prevBoundary in the compose() implementation.
22781cb0ef41Sopenharmony_ci    const char16_t *prevBoundary=src;
22791cb0ef41Sopenharmony_ci    int32_t prevFCD16=0;
22801cb0ef41Sopenharmony_ci    if(limit==nullptr) {
22811cb0ef41Sopenharmony_ci        src=copyLowPrefixFromNulTerminated(src, minLcccCP, buffer, errorCode);
22821cb0ef41Sopenharmony_ci        if(U_FAILURE(errorCode)) {
22831cb0ef41Sopenharmony_ci            return src;
22841cb0ef41Sopenharmony_ci        }
22851cb0ef41Sopenharmony_ci        if(prevBoundary<src) {
22861cb0ef41Sopenharmony_ci            prevBoundary=src;
22871cb0ef41Sopenharmony_ci            // We know that the previous character's lccc==0.
22881cb0ef41Sopenharmony_ci            // Fetching the fcd16 value was deferred for this below-U+0300 code point.
22891cb0ef41Sopenharmony_ci            prevFCD16=getFCD16(*(src-1));
22901cb0ef41Sopenharmony_ci            if(prevFCD16>1) {
22911cb0ef41Sopenharmony_ci                --prevBoundary;
22921cb0ef41Sopenharmony_ci            }
22931cb0ef41Sopenharmony_ci        }
22941cb0ef41Sopenharmony_ci        limit=u_strchr(src, 0);
22951cb0ef41Sopenharmony_ci    }
22961cb0ef41Sopenharmony_ci
22971cb0ef41Sopenharmony_ci    // Note: In this function we use buffer->appendZeroCC() because we track
22981cb0ef41Sopenharmony_ci    // the lead and trail combining classes here, rather than leaving it to
22991cb0ef41Sopenharmony_ci    // the ReorderingBuffer.
23001cb0ef41Sopenharmony_ci    // The exception is the call to decomposeShort() which uses the buffer
23011cb0ef41Sopenharmony_ci    // in the normal way.
23021cb0ef41Sopenharmony_ci
23031cb0ef41Sopenharmony_ci    const char16_t *prevSrc;
23041cb0ef41Sopenharmony_ci    UChar32 c=0;
23051cb0ef41Sopenharmony_ci    uint16_t fcd16=0;
23061cb0ef41Sopenharmony_ci
23071cb0ef41Sopenharmony_ci    for(;;) {
23081cb0ef41Sopenharmony_ci        // count code units with lccc==0
23091cb0ef41Sopenharmony_ci        for(prevSrc=src; src!=limit;) {
23101cb0ef41Sopenharmony_ci            if((c=*src)<minLcccCP) {
23111cb0ef41Sopenharmony_ci                prevFCD16=~c;
23121cb0ef41Sopenharmony_ci                ++src;
23131cb0ef41Sopenharmony_ci            } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
23141cb0ef41Sopenharmony_ci                prevFCD16=0;
23151cb0ef41Sopenharmony_ci                ++src;
23161cb0ef41Sopenharmony_ci            } else {
23171cb0ef41Sopenharmony_ci                if(U16_IS_LEAD(c)) {
23181cb0ef41Sopenharmony_ci                    char16_t c2;
23191cb0ef41Sopenharmony_ci                    if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
23201cb0ef41Sopenharmony_ci                        c=U16_GET_SUPPLEMENTARY(c, c2);
23211cb0ef41Sopenharmony_ci                    }
23221cb0ef41Sopenharmony_ci                }
23231cb0ef41Sopenharmony_ci                if((fcd16=getFCD16FromNormData(c))<=0xff) {
23241cb0ef41Sopenharmony_ci                    prevFCD16=fcd16;
23251cb0ef41Sopenharmony_ci                    src+=U16_LENGTH(c);
23261cb0ef41Sopenharmony_ci                } else {
23271cb0ef41Sopenharmony_ci                    break;
23281cb0ef41Sopenharmony_ci                }
23291cb0ef41Sopenharmony_ci            }
23301cb0ef41Sopenharmony_ci        }
23311cb0ef41Sopenharmony_ci        // copy these code units all at once
23321cb0ef41Sopenharmony_ci        if(src!=prevSrc) {
23331cb0ef41Sopenharmony_ci            if(buffer!=nullptr && !buffer->appendZeroCC(prevSrc, src, errorCode)) {
23341cb0ef41Sopenharmony_ci                break;
23351cb0ef41Sopenharmony_ci            }
23361cb0ef41Sopenharmony_ci            if(src==limit) {
23371cb0ef41Sopenharmony_ci                break;
23381cb0ef41Sopenharmony_ci            }
23391cb0ef41Sopenharmony_ci            prevBoundary=src;
23401cb0ef41Sopenharmony_ci            // We know that the previous character's lccc==0.
23411cb0ef41Sopenharmony_ci            if(prevFCD16<0) {
23421cb0ef41Sopenharmony_ci                // Fetching the fcd16 value was deferred for this below-minLcccCP code point.
23431cb0ef41Sopenharmony_ci                UChar32 prev=~prevFCD16;
23441cb0ef41Sopenharmony_ci                if(prev<minDecompNoCP) {
23451cb0ef41Sopenharmony_ci                    prevFCD16=0;
23461cb0ef41Sopenharmony_ci                } else {
23471cb0ef41Sopenharmony_ci                    prevFCD16=getFCD16FromNormData(prev);
23481cb0ef41Sopenharmony_ci                    if(prevFCD16>1) {
23491cb0ef41Sopenharmony_ci                        --prevBoundary;
23501cb0ef41Sopenharmony_ci                    }
23511cb0ef41Sopenharmony_ci                }
23521cb0ef41Sopenharmony_ci            } else {
23531cb0ef41Sopenharmony_ci                const char16_t *p=src-1;
23541cb0ef41Sopenharmony_ci                if(U16_IS_TRAIL(*p) && prevSrc<p && U16_IS_LEAD(*(p-1))) {
23551cb0ef41Sopenharmony_ci                    --p;
23561cb0ef41Sopenharmony_ci                    // Need to fetch the previous character's FCD value because
23571cb0ef41Sopenharmony_ci                    // prevFCD16 was just for the trail surrogate code point.
23581cb0ef41Sopenharmony_ci                    prevFCD16=getFCD16FromNormData(U16_GET_SUPPLEMENTARY(p[0], p[1]));
23591cb0ef41Sopenharmony_ci                    // Still known to have lccc==0 because its lead surrogate unit had lccc==0.
23601cb0ef41Sopenharmony_ci                }
23611cb0ef41Sopenharmony_ci                if(prevFCD16>1) {
23621cb0ef41Sopenharmony_ci                    prevBoundary=p;
23631cb0ef41Sopenharmony_ci                }
23641cb0ef41Sopenharmony_ci            }
23651cb0ef41Sopenharmony_ci            // The start of the current character (c).
23661cb0ef41Sopenharmony_ci            prevSrc=src;
23671cb0ef41Sopenharmony_ci        } else if(src==limit) {
23681cb0ef41Sopenharmony_ci            break;
23691cb0ef41Sopenharmony_ci        }
23701cb0ef41Sopenharmony_ci
23711cb0ef41Sopenharmony_ci        src+=U16_LENGTH(c);
23721cb0ef41Sopenharmony_ci        // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
23731cb0ef41Sopenharmony_ci        // Check for proper order, and decompose locally if necessary.
23741cb0ef41Sopenharmony_ci        if((prevFCD16&0xff)<=(fcd16>>8)) {
23751cb0ef41Sopenharmony_ci            // proper order: prev tccc <= current lccc
23761cb0ef41Sopenharmony_ci            if((fcd16&0xff)<=1) {
23771cb0ef41Sopenharmony_ci                prevBoundary=src;
23781cb0ef41Sopenharmony_ci            }
23791cb0ef41Sopenharmony_ci            if(buffer!=nullptr && !buffer->appendZeroCC(c, errorCode)) {
23801cb0ef41Sopenharmony_ci                break;
23811cb0ef41Sopenharmony_ci            }
23821cb0ef41Sopenharmony_ci            prevFCD16=fcd16;
23831cb0ef41Sopenharmony_ci            continue;
23841cb0ef41Sopenharmony_ci        } else if(buffer==nullptr) {
23851cb0ef41Sopenharmony_ci            return prevBoundary;  // quick check "no"
23861cb0ef41Sopenharmony_ci        } else {
23871cb0ef41Sopenharmony_ci            /*
23881cb0ef41Sopenharmony_ci             * Back out the part of the source that we copied or appended
23891cb0ef41Sopenharmony_ci             * already but is now going to be decomposed.
23901cb0ef41Sopenharmony_ci             * prevSrc is set to after what was copied/appended.
23911cb0ef41Sopenharmony_ci             */
23921cb0ef41Sopenharmony_ci            buffer->removeSuffix((int32_t)(prevSrc-prevBoundary));
23931cb0ef41Sopenharmony_ci            /*
23941cb0ef41Sopenharmony_ci             * Find the part of the source that needs to be decomposed,
23951cb0ef41Sopenharmony_ci             * up to the next safe boundary.
23961cb0ef41Sopenharmony_ci             */
23971cb0ef41Sopenharmony_ci            src=findNextFCDBoundary(src, limit);
23981cb0ef41Sopenharmony_ci            /*
23991cb0ef41Sopenharmony_ci             * The source text does not fulfill the conditions for FCD.
24001cb0ef41Sopenharmony_ci             * Decompose and reorder a limited piece of the text.
24011cb0ef41Sopenharmony_ci             */
24021cb0ef41Sopenharmony_ci            decomposeShort(prevBoundary, src, false, false, *buffer, errorCode);
24031cb0ef41Sopenharmony_ci            if (U_FAILURE(errorCode)) {
24041cb0ef41Sopenharmony_ci                break;
24051cb0ef41Sopenharmony_ci            }
24061cb0ef41Sopenharmony_ci            prevBoundary=src;
24071cb0ef41Sopenharmony_ci            prevFCD16=0;
24081cb0ef41Sopenharmony_ci        }
24091cb0ef41Sopenharmony_ci    }
24101cb0ef41Sopenharmony_ci    return src;
24111cb0ef41Sopenharmony_ci}
24121cb0ef41Sopenharmony_ci
24131cb0ef41Sopenharmony_civoid Normalizer2Impl::makeFCDAndAppend(const char16_t *src, const char16_t *limit,
24141cb0ef41Sopenharmony_ci                                       UBool doMakeFCD,
24151cb0ef41Sopenharmony_ci                                       UnicodeString &safeMiddle,
24161cb0ef41Sopenharmony_ci                                       ReorderingBuffer &buffer,
24171cb0ef41Sopenharmony_ci                                       UErrorCode &errorCode) const {
24181cb0ef41Sopenharmony_ci    if(!buffer.isEmpty()) {
24191cb0ef41Sopenharmony_ci        const char16_t *firstBoundaryInSrc=findNextFCDBoundary(src, limit);
24201cb0ef41Sopenharmony_ci        if(src!=firstBoundaryInSrc) {
24211cb0ef41Sopenharmony_ci            const char16_t *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(),
24221cb0ef41Sopenharmony_ci                                                                    buffer.getLimit());
24231cb0ef41Sopenharmony_ci            int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest);
24241cb0ef41Sopenharmony_ci            UnicodeString middle(lastBoundaryInDest, destSuffixLength);
24251cb0ef41Sopenharmony_ci            buffer.removeSuffix(destSuffixLength);
24261cb0ef41Sopenharmony_ci            safeMiddle=middle;
24271cb0ef41Sopenharmony_ci            middle.append(src, (int32_t)(firstBoundaryInSrc-src));
24281cb0ef41Sopenharmony_ci            const char16_t *middleStart=middle.getBuffer();
24291cb0ef41Sopenharmony_ci            makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode);
24301cb0ef41Sopenharmony_ci            if(U_FAILURE(errorCode)) {
24311cb0ef41Sopenharmony_ci                return;
24321cb0ef41Sopenharmony_ci            }
24331cb0ef41Sopenharmony_ci            src=firstBoundaryInSrc;
24341cb0ef41Sopenharmony_ci        }
24351cb0ef41Sopenharmony_ci    }
24361cb0ef41Sopenharmony_ci    if(doMakeFCD) {
24371cb0ef41Sopenharmony_ci        makeFCD(src, limit, &buffer, errorCode);
24381cb0ef41Sopenharmony_ci    } else {
24391cb0ef41Sopenharmony_ci        if(limit==nullptr) {  // appendZeroCC() needs limit!=nullptr
24401cb0ef41Sopenharmony_ci            limit=u_strchr(src, 0);
24411cb0ef41Sopenharmony_ci        }
24421cb0ef41Sopenharmony_ci        buffer.appendZeroCC(src, limit, errorCode);
24431cb0ef41Sopenharmony_ci    }
24441cb0ef41Sopenharmony_ci}
24451cb0ef41Sopenharmony_ci
24461cb0ef41Sopenharmony_ciconst char16_t *Normalizer2Impl::findPreviousFCDBoundary(const char16_t *start, const char16_t *p) const {
24471cb0ef41Sopenharmony_ci    while(start<p) {
24481cb0ef41Sopenharmony_ci        const char16_t *codePointLimit = p;
24491cb0ef41Sopenharmony_ci        UChar32 c;
24501cb0ef41Sopenharmony_ci        uint16_t norm16;
24511cb0ef41Sopenharmony_ci        UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
24521cb0ef41Sopenharmony_ci        if (c < minDecompNoCP || norm16HasDecompBoundaryAfter(norm16)) {
24531cb0ef41Sopenharmony_ci            return codePointLimit;
24541cb0ef41Sopenharmony_ci        }
24551cb0ef41Sopenharmony_ci        if (norm16HasDecompBoundaryBefore(norm16)) {
24561cb0ef41Sopenharmony_ci            return p;
24571cb0ef41Sopenharmony_ci        }
24581cb0ef41Sopenharmony_ci    }
24591cb0ef41Sopenharmony_ci    return p;
24601cb0ef41Sopenharmony_ci}
24611cb0ef41Sopenharmony_ci
24621cb0ef41Sopenharmony_ciconst char16_t *Normalizer2Impl::findNextFCDBoundary(const char16_t *p, const char16_t *limit) const {
24631cb0ef41Sopenharmony_ci    while(p<limit) {
24641cb0ef41Sopenharmony_ci        const char16_t *codePointStart=p;
24651cb0ef41Sopenharmony_ci        UChar32 c;
24661cb0ef41Sopenharmony_ci        uint16_t norm16;
24671cb0ef41Sopenharmony_ci        UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
24681cb0ef41Sopenharmony_ci        if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16)) {
24691cb0ef41Sopenharmony_ci            return codePointStart;
24701cb0ef41Sopenharmony_ci        }
24711cb0ef41Sopenharmony_ci        if (norm16HasDecompBoundaryAfter(norm16)) {
24721cb0ef41Sopenharmony_ci            return p;
24731cb0ef41Sopenharmony_ci        }
24741cb0ef41Sopenharmony_ci    }
24751cb0ef41Sopenharmony_ci    return p;
24761cb0ef41Sopenharmony_ci}
24771cb0ef41Sopenharmony_ci
24781cb0ef41Sopenharmony_ci// CanonicalIterator data -------------------------------------------------- ***
24791cb0ef41Sopenharmony_ci
24801cb0ef41Sopenharmony_ciCanonIterData::CanonIterData(UErrorCode &errorCode) :
24811cb0ef41Sopenharmony_ci        mutableTrie(umutablecptrie_open(0, 0, &errorCode)), trie(nullptr),
24821cb0ef41Sopenharmony_ci        canonStartSets(uprv_deleteUObject, nullptr, errorCode) {}
24831cb0ef41Sopenharmony_ci
24841cb0ef41Sopenharmony_ciCanonIterData::~CanonIterData() {
24851cb0ef41Sopenharmony_ci    umutablecptrie_close(mutableTrie);
24861cb0ef41Sopenharmony_ci    ucptrie_close(trie);
24871cb0ef41Sopenharmony_ci}
24881cb0ef41Sopenharmony_ci
24891cb0ef41Sopenharmony_civoid CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) {
24901cb0ef41Sopenharmony_ci    uint32_t canonValue = umutablecptrie_get(mutableTrie, decompLead);
24911cb0ef41Sopenharmony_ci    if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) {
24921cb0ef41Sopenharmony_ci        // origin is the first character whose decomposition starts with
24931cb0ef41Sopenharmony_ci        // the character for which we are setting the value.
24941cb0ef41Sopenharmony_ci        umutablecptrie_set(mutableTrie, decompLead, canonValue|origin, &errorCode);
24951cb0ef41Sopenharmony_ci    } else {
24961cb0ef41Sopenharmony_ci        // origin is not the first character, or it is U+0000.
24971cb0ef41Sopenharmony_ci        UnicodeSet *set;
24981cb0ef41Sopenharmony_ci        if((canonValue&CANON_HAS_SET)==0) {
24991cb0ef41Sopenharmony_ci            LocalPointer<UnicodeSet> lpSet(new UnicodeSet, errorCode);
25001cb0ef41Sopenharmony_ci            set=lpSet.getAlias();
25011cb0ef41Sopenharmony_ci            if(U_FAILURE(errorCode)) {
25021cb0ef41Sopenharmony_ci                return;
25031cb0ef41Sopenharmony_ci            }
25041cb0ef41Sopenharmony_ci            UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
25051cb0ef41Sopenharmony_ci            canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
25061cb0ef41Sopenharmony_ci            umutablecptrie_set(mutableTrie, decompLead, canonValue, &errorCode);
25071cb0ef41Sopenharmony_ci            canonStartSets.adoptElement(lpSet.orphan(), errorCode);
25081cb0ef41Sopenharmony_ci            if (U_FAILURE(errorCode)) {
25091cb0ef41Sopenharmony_ci                return;
25101cb0ef41Sopenharmony_ci            }
25111cb0ef41Sopenharmony_ci            if(firstOrigin!=0) {
25121cb0ef41Sopenharmony_ci                set->add(firstOrigin);
25131cb0ef41Sopenharmony_ci            }
25141cb0ef41Sopenharmony_ci        } else {
25151cb0ef41Sopenharmony_ci            set=(UnicodeSet *)canonStartSets[(int32_t)(canonValue&CANON_VALUE_MASK)];
25161cb0ef41Sopenharmony_ci        }
25171cb0ef41Sopenharmony_ci        set->add(origin);
25181cb0ef41Sopenharmony_ci    }
25191cb0ef41Sopenharmony_ci}
25201cb0ef41Sopenharmony_ci
25211cb0ef41Sopenharmony_ci// C++ class for friend access to private Normalizer2Impl members.
25221cb0ef41Sopenharmony_ciclass InitCanonIterData {
25231cb0ef41Sopenharmony_cipublic:
25241cb0ef41Sopenharmony_ci    static void doInit(Normalizer2Impl *impl, UErrorCode &errorCode);
25251cb0ef41Sopenharmony_ci};
25261cb0ef41Sopenharmony_ci
25271cb0ef41Sopenharmony_ciU_CDECL_BEGIN
25281cb0ef41Sopenharmony_ci
25291cb0ef41Sopenharmony_ci// UInitOnce instantiation function for CanonIterData
25301cb0ef41Sopenharmony_cistatic void U_CALLCONV
25311cb0ef41Sopenharmony_ciinitCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) {
25321cb0ef41Sopenharmony_ci    InitCanonIterData::doInit(impl, errorCode);
25331cb0ef41Sopenharmony_ci}
25341cb0ef41Sopenharmony_ci
25351cb0ef41Sopenharmony_ciU_CDECL_END
25361cb0ef41Sopenharmony_ci
25371cb0ef41Sopenharmony_civoid InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
25381cb0ef41Sopenharmony_ci    U_ASSERT(impl->fCanonIterData == nullptr);
25391cb0ef41Sopenharmony_ci    impl->fCanonIterData = new CanonIterData(errorCode);
25401cb0ef41Sopenharmony_ci    if (impl->fCanonIterData == nullptr) {
25411cb0ef41Sopenharmony_ci        errorCode=U_MEMORY_ALLOCATION_ERROR;
25421cb0ef41Sopenharmony_ci    }
25431cb0ef41Sopenharmony_ci    if (U_SUCCESS(errorCode)) {
25441cb0ef41Sopenharmony_ci        UChar32 start = 0, end;
25451cb0ef41Sopenharmony_ci        uint32_t value;
25461cb0ef41Sopenharmony_ci        while ((end = ucptrie_getRange(impl->normTrie, start,
25471cb0ef41Sopenharmony_ci                                       UCPMAP_RANGE_FIXED_LEAD_SURROGATES, Normalizer2Impl::INERT,
25481cb0ef41Sopenharmony_ci                                       nullptr, nullptr, &value)) >= 0) {
25491cb0ef41Sopenharmony_ci            // Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
25501cb0ef41Sopenharmony_ci            if (value != Normalizer2Impl::INERT) {
25511cb0ef41Sopenharmony_ci                impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode);
25521cb0ef41Sopenharmony_ci            }
25531cb0ef41Sopenharmony_ci            start = end + 1;
25541cb0ef41Sopenharmony_ci        }
25551cb0ef41Sopenharmony_ci#ifdef UCPTRIE_DEBUG
25561cb0ef41Sopenharmony_ci        umutablecptrie_setName(impl->fCanonIterData->mutableTrie, "CanonIterData");
25571cb0ef41Sopenharmony_ci#endif
25581cb0ef41Sopenharmony_ci        impl->fCanonIterData->trie = umutablecptrie_buildImmutable(
25591cb0ef41Sopenharmony_ci            impl->fCanonIterData->mutableTrie, UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_32, &errorCode);
25601cb0ef41Sopenharmony_ci        umutablecptrie_close(impl->fCanonIterData->mutableTrie);
25611cb0ef41Sopenharmony_ci        impl->fCanonIterData->mutableTrie = nullptr;
25621cb0ef41Sopenharmony_ci    }
25631cb0ef41Sopenharmony_ci    if (U_FAILURE(errorCode)) {
25641cb0ef41Sopenharmony_ci        delete impl->fCanonIterData;
25651cb0ef41Sopenharmony_ci        impl->fCanonIterData = nullptr;
25661cb0ef41Sopenharmony_ci    }
25671cb0ef41Sopenharmony_ci}
25681cb0ef41Sopenharmony_ci
25691cb0ef41Sopenharmony_civoid Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
25701cb0ef41Sopenharmony_ci                                                  CanonIterData &newData,
25711cb0ef41Sopenharmony_ci                                                  UErrorCode &errorCode) const {
25721cb0ef41Sopenharmony_ci    if(isInert(norm16) || (minYesNo<=norm16 && norm16<minNoNo)) {
25731cb0ef41Sopenharmony_ci        // Inert, or 2-way mapping (including Hangul syllable).
25741cb0ef41Sopenharmony_ci        // We do not write a canonStartSet for any yesNo character.
25751cb0ef41Sopenharmony_ci        // Composites from 2-way mappings are added at runtime from the
25761cb0ef41Sopenharmony_ci        // starter's compositions list, and the other characters in
25771cb0ef41Sopenharmony_ci        // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
25781cb0ef41Sopenharmony_ci        // "maybe" characters.
25791cb0ef41Sopenharmony_ci        return;
25801cb0ef41Sopenharmony_ci    }
25811cb0ef41Sopenharmony_ci    for(UChar32 c=start; c<=end; ++c) {
25821cb0ef41Sopenharmony_ci        uint32_t oldValue = umutablecptrie_get(newData.mutableTrie, c);
25831cb0ef41Sopenharmony_ci        uint32_t newValue=oldValue;
25841cb0ef41Sopenharmony_ci        if(isMaybeOrNonZeroCC(norm16)) {
25851cb0ef41Sopenharmony_ci            // not a segment starter if it occurs in a decomposition or has cc!=0
25861cb0ef41Sopenharmony_ci            newValue|=CANON_NOT_SEGMENT_STARTER;
25871cb0ef41Sopenharmony_ci            if(norm16<MIN_NORMAL_MAYBE_YES) {
25881cb0ef41Sopenharmony_ci                newValue|=CANON_HAS_COMPOSITIONS;
25891cb0ef41Sopenharmony_ci            }
25901cb0ef41Sopenharmony_ci        } else if(norm16<minYesNo) {
25911cb0ef41Sopenharmony_ci            newValue|=CANON_HAS_COMPOSITIONS;
25921cb0ef41Sopenharmony_ci        } else {
25931cb0ef41Sopenharmony_ci            // c has a one-way decomposition
25941cb0ef41Sopenharmony_ci            UChar32 c2=c;
25951cb0ef41Sopenharmony_ci            // Do not modify the whole-range norm16 value.
25961cb0ef41Sopenharmony_ci            uint16_t norm16_2=norm16;
25971cb0ef41Sopenharmony_ci            if (isDecompNoAlgorithmic(norm16_2)) {
25981cb0ef41Sopenharmony_ci                // Maps to an isCompYesAndZeroCC.
25991cb0ef41Sopenharmony_ci                c2 = mapAlgorithmic(c2, norm16_2);
26001cb0ef41Sopenharmony_ci                norm16_2 = getRawNorm16(c2);
26011cb0ef41Sopenharmony_ci                // No compatibility mappings for the CanonicalIterator.
26021cb0ef41Sopenharmony_ci                U_ASSERT(!(isHangulLV(norm16_2) || isHangulLVT(norm16_2)));
26031cb0ef41Sopenharmony_ci            }
26041cb0ef41Sopenharmony_ci            if (norm16_2 > minYesNo) {
26051cb0ef41Sopenharmony_ci                // c decomposes, get everything from the variable-length extra data
26061cb0ef41Sopenharmony_ci                const uint16_t *mapping=getMapping(norm16_2);
26071cb0ef41Sopenharmony_ci                uint16_t firstUnit=*mapping;
26081cb0ef41Sopenharmony_ci                int32_t length=firstUnit&MAPPING_LENGTH_MASK;
26091cb0ef41Sopenharmony_ci                if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
26101cb0ef41Sopenharmony_ci                    if(c==c2 && (*(mapping-1)&0xff)!=0) {
26111cb0ef41Sopenharmony_ci                        newValue|=CANON_NOT_SEGMENT_STARTER;  // original c has cc!=0
26121cb0ef41Sopenharmony_ci                    }
26131cb0ef41Sopenharmony_ci                }
26141cb0ef41Sopenharmony_ci                // Skip empty mappings (no characters in the decomposition).
26151cb0ef41Sopenharmony_ci                if(length!=0) {
26161cb0ef41Sopenharmony_ci                    ++mapping;  // skip over the firstUnit
26171cb0ef41Sopenharmony_ci                    // add c to first code point's start set
26181cb0ef41Sopenharmony_ci                    int32_t i=0;
26191cb0ef41Sopenharmony_ci                    U16_NEXT_UNSAFE(mapping, i, c2);
26201cb0ef41Sopenharmony_ci                    newData.addToStartSet(c, c2, errorCode);
26211cb0ef41Sopenharmony_ci                    // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a
26221cb0ef41Sopenharmony_ci                    // one-way mapping. A 2-way mapping is possible here after
26231cb0ef41Sopenharmony_ci                    // intermediate algorithmic mapping.
26241cb0ef41Sopenharmony_ci                    if(norm16_2>=minNoNo) {
26251cb0ef41Sopenharmony_ci                        while(i<length) {
26261cb0ef41Sopenharmony_ci                            U16_NEXT_UNSAFE(mapping, i, c2);
26271cb0ef41Sopenharmony_ci                            uint32_t c2Value = umutablecptrie_get(newData.mutableTrie, c2);
26281cb0ef41Sopenharmony_ci                            if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
26291cb0ef41Sopenharmony_ci                                umutablecptrie_set(newData.mutableTrie, c2,
26301cb0ef41Sopenharmony_ci                                                   c2Value|CANON_NOT_SEGMENT_STARTER, &errorCode);
26311cb0ef41Sopenharmony_ci                            }
26321cb0ef41Sopenharmony_ci                        }
26331cb0ef41Sopenharmony_ci                    }
26341cb0ef41Sopenharmony_ci                }
26351cb0ef41Sopenharmony_ci            } else {
26361cb0ef41Sopenharmony_ci                // c decomposed to c2 algorithmically; c has cc==0
26371cb0ef41Sopenharmony_ci                newData.addToStartSet(c, c2, errorCode);
26381cb0ef41Sopenharmony_ci            }
26391cb0ef41Sopenharmony_ci        }
26401cb0ef41Sopenharmony_ci        if(newValue!=oldValue) {
26411cb0ef41Sopenharmony_ci            umutablecptrie_set(newData.mutableTrie, c, newValue, &errorCode);
26421cb0ef41Sopenharmony_ci        }
26431cb0ef41Sopenharmony_ci    }
26441cb0ef41Sopenharmony_ci}
26451cb0ef41Sopenharmony_ci
26461cb0ef41Sopenharmony_ciUBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const {
26471cb0ef41Sopenharmony_ci    // Logically const: Synchronized instantiation.
26481cb0ef41Sopenharmony_ci    Normalizer2Impl *me=const_cast<Normalizer2Impl *>(this);
26491cb0ef41Sopenharmony_ci    umtx_initOnce(me->fCanonIterDataInitOnce, &initCanonIterData, me, errorCode);
26501cb0ef41Sopenharmony_ci    return U_SUCCESS(errorCode);
26511cb0ef41Sopenharmony_ci}
26521cb0ef41Sopenharmony_ci
26531cb0ef41Sopenharmony_ciint32_t Normalizer2Impl::getCanonValue(UChar32 c) const {
26541cb0ef41Sopenharmony_ci    return (int32_t)ucptrie_get(fCanonIterData->trie, c);
26551cb0ef41Sopenharmony_ci}
26561cb0ef41Sopenharmony_ci
26571cb0ef41Sopenharmony_ciconst UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const {
26581cb0ef41Sopenharmony_ci    return *(const UnicodeSet *)fCanonIterData->canonStartSets[n];
26591cb0ef41Sopenharmony_ci}
26601cb0ef41Sopenharmony_ci
26611cb0ef41Sopenharmony_ciUBool Normalizer2Impl::isCanonSegmentStarter(UChar32 c) const {
26621cb0ef41Sopenharmony_ci    return getCanonValue(c)>=0;
26631cb0ef41Sopenharmony_ci}
26641cb0ef41Sopenharmony_ci
26651cb0ef41Sopenharmony_ciUBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const {
26661cb0ef41Sopenharmony_ci    int32_t canonValue=getCanonValue(c)&~CANON_NOT_SEGMENT_STARTER;
26671cb0ef41Sopenharmony_ci    if(canonValue==0) {
26681cb0ef41Sopenharmony_ci        return false;
26691cb0ef41Sopenharmony_ci    }
26701cb0ef41Sopenharmony_ci    set.clear();
26711cb0ef41Sopenharmony_ci    int32_t value=canonValue&CANON_VALUE_MASK;
26721cb0ef41Sopenharmony_ci    if((canonValue&CANON_HAS_SET)!=0) {
26731cb0ef41Sopenharmony_ci        set.addAll(getCanonStartSet(value));
26741cb0ef41Sopenharmony_ci    } else if(value!=0) {
26751cb0ef41Sopenharmony_ci        set.add(value);
26761cb0ef41Sopenharmony_ci    }
26771cb0ef41Sopenharmony_ci    if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
26781cb0ef41Sopenharmony_ci        uint16_t norm16=getRawNorm16(c);
26791cb0ef41Sopenharmony_ci        if(norm16==JAMO_L) {
26801cb0ef41Sopenharmony_ci            UChar32 syllable=
26811cb0ef41Sopenharmony_ci                (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT);
26821cb0ef41Sopenharmony_ci            set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1);
26831cb0ef41Sopenharmony_ci        } else {
26841cb0ef41Sopenharmony_ci            addComposites(getCompositionsList(norm16), set);
26851cb0ef41Sopenharmony_ci        }
26861cb0ef41Sopenharmony_ci    }
26871cb0ef41Sopenharmony_ci    return true;
26881cb0ef41Sopenharmony_ci}
26891cb0ef41Sopenharmony_ci
26901cb0ef41Sopenharmony_ciU_NAMESPACE_END
26911cb0ef41Sopenharmony_ci
26921cb0ef41Sopenharmony_ci// Normalizer2 data swapping ----------------------------------------------- ***
26931cb0ef41Sopenharmony_ci
26941cb0ef41Sopenharmony_ciU_NAMESPACE_USE
26951cb0ef41Sopenharmony_ci
26961cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2
26971cb0ef41Sopenharmony_ciunorm2_swap(const UDataSwapper *ds,
26981cb0ef41Sopenharmony_ci            const void *inData, int32_t length, void *outData,
26991cb0ef41Sopenharmony_ci            UErrorCode *pErrorCode) {
27001cb0ef41Sopenharmony_ci    const UDataInfo *pInfo;
27011cb0ef41Sopenharmony_ci    int32_t headerSize;
27021cb0ef41Sopenharmony_ci
27031cb0ef41Sopenharmony_ci    const uint8_t *inBytes;
27041cb0ef41Sopenharmony_ci    uint8_t *outBytes;
27051cb0ef41Sopenharmony_ci
27061cb0ef41Sopenharmony_ci    const int32_t *inIndexes;
27071cb0ef41Sopenharmony_ci    int32_t indexes[Normalizer2Impl::IX_TOTAL_SIZE+1];
27081cb0ef41Sopenharmony_ci
27091cb0ef41Sopenharmony_ci    int32_t i, offset, nextOffset, size;
27101cb0ef41Sopenharmony_ci
27111cb0ef41Sopenharmony_ci    /* udata_swapDataHeader checks the arguments */
27121cb0ef41Sopenharmony_ci    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
27131cb0ef41Sopenharmony_ci    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
27141cb0ef41Sopenharmony_ci        return 0;
27151cb0ef41Sopenharmony_ci    }
27161cb0ef41Sopenharmony_ci
27171cb0ef41Sopenharmony_ci    /* check data format and format version */
27181cb0ef41Sopenharmony_ci    pInfo=(const UDataInfo *)((const char *)inData+4);
27191cb0ef41Sopenharmony_ci    uint8_t formatVersion0=pInfo->formatVersion[0];
27201cb0ef41Sopenharmony_ci    if(!(
27211cb0ef41Sopenharmony_ci        pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Nrm2" */
27221cb0ef41Sopenharmony_ci        pInfo->dataFormat[1]==0x72 &&
27231cb0ef41Sopenharmony_ci        pInfo->dataFormat[2]==0x6d &&
27241cb0ef41Sopenharmony_ci        pInfo->dataFormat[3]==0x32 &&
27251cb0ef41Sopenharmony_ci        (1<=formatVersion0 && formatVersion0<=4)
27261cb0ef41Sopenharmony_ci    )) {
27271cb0ef41Sopenharmony_ci        udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
27281cb0ef41Sopenharmony_ci                         pInfo->dataFormat[0], pInfo->dataFormat[1],
27291cb0ef41Sopenharmony_ci                         pInfo->dataFormat[2], pInfo->dataFormat[3],
27301cb0ef41Sopenharmony_ci                         pInfo->formatVersion[0]);
27311cb0ef41Sopenharmony_ci        *pErrorCode=U_UNSUPPORTED_ERROR;
27321cb0ef41Sopenharmony_ci        return 0;
27331cb0ef41Sopenharmony_ci    }
27341cb0ef41Sopenharmony_ci
27351cb0ef41Sopenharmony_ci    inBytes=(const uint8_t *)inData+headerSize;
27361cb0ef41Sopenharmony_ci    outBytes=(outData == nullptr) ? nullptr : (uint8_t *)outData+headerSize;
27371cb0ef41Sopenharmony_ci
27381cb0ef41Sopenharmony_ci    inIndexes=(const int32_t *)inBytes;
27391cb0ef41Sopenharmony_ci    int32_t minIndexesLength;
27401cb0ef41Sopenharmony_ci    if(formatVersion0==1) {
27411cb0ef41Sopenharmony_ci        minIndexesLength=Normalizer2Impl::IX_MIN_MAYBE_YES+1;
27421cb0ef41Sopenharmony_ci    } else if(formatVersion0==2) {
27431cb0ef41Sopenharmony_ci        minIndexesLength=Normalizer2Impl::IX_MIN_YES_NO_MAPPINGS_ONLY+1;
27441cb0ef41Sopenharmony_ci    } else {
27451cb0ef41Sopenharmony_ci        minIndexesLength=Normalizer2Impl::IX_MIN_LCCC_CP+1;
27461cb0ef41Sopenharmony_ci    }
27471cb0ef41Sopenharmony_ci
27481cb0ef41Sopenharmony_ci    if(length>=0) {
27491cb0ef41Sopenharmony_ci        length-=headerSize;
27501cb0ef41Sopenharmony_ci        if(length<minIndexesLength*4) {
27511cb0ef41Sopenharmony_ci            udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n",
27521cb0ef41Sopenharmony_ci                             length);
27531cb0ef41Sopenharmony_ci            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
27541cb0ef41Sopenharmony_ci            return 0;
27551cb0ef41Sopenharmony_ci        }
27561cb0ef41Sopenharmony_ci    }
27571cb0ef41Sopenharmony_ci
27581cb0ef41Sopenharmony_ci    /* read the first few indexes */
27591cb0ef41Sopenharmony_ci    for(i=0; i<UPRV_LENGTHOF(indexes); ++i) {
27601cb0ef41Sopenharmony_ci        indexes[i]=udata_readInt32(ds, inIndexes[i]);
27611cb0ef41Sopenharmony_ci    }
27621cb0ef41Sopenharmony_ci
27631cb0ef41Sopenharmony_ci    /* get the total length of the data */
27641cb0ef41Sopenharmony_ci    size=indexes[Normalizer2Impl::IX_TOTAL_SIZE];
27651cb0ef41Sopenharmony_ci
27661cb0ef41Sopenharmony_ci    if(length>=0) {
27671cb0ef41Sopenharmony_ci        if(length<size) {
27681cb0ef41Sopenharmony_ci            udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for all of Normalizer2 data\n",
27691cb0ef41Sopenharmony_ci                             length);
27701cb0ef41Sopenharmony_ci            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
27711cb0ef41Sopenharmony_ci            return 0;
27721cb0ef41Sopenharmony_ci        }
27731cb0ef41Sopenharmony_ci
27741cb0ef41Sopenharmony_ci        /* copy the data for inaccessible bytes */
27751cb0ef41Sopenharmony_ci        if(inBytes!=outBytes) {
27761cb0ef41Sopenharmony_ci            uprv_memcpy(outBytes, inBytes, size);
27771cb0ef41Sopenharmony_ci        }
27781cb0ef41Sopenharmony_ci
27791cb0ef41Sopenharmony_ci        offset=0;
27801cb0ef41Sopenharmony_ci
27811cb0ef41Sopenharmony_ci        /* swap the int32_t indexes[] */
27821cb0ef41Sopenharmony_ci        nextOffset=indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET];
27831cb0ef41Sopenharmony_ci        ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode);
27841cb0ef41Sopenharmony_ci        offset=nextOffset;
27851cb0ef41Sopenharmony_ci
27861cb0ef41Sopenharmony_ci        /* swap the trie */
27871cb0ef41Sopenharmony_ci        nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET];
27881cb0ef41Sopenharmony_ci        utrie_swapAnyVersion(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
27891cb0ef41Sopenharmony_ci        offset=nextOffset;
27901cb0ef41Sopenharmony_ci
27911cb0ef41Sopenharmony_ci        /* swap the uint16_t extraData[] */
27921cb0ef41Sopenharmony_ci        nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET];
27931cb0ef41Sopenharmony_ci        ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
27941cb0ef41Sopenharmony_ci        offset=nextOffset;
27951cb0ef41Sopenharmony_ci
27961cb0ef41Sopenharmony_ci        /* no need to swap the uint8_t smallFCD[] (new in formatVersion 2) */
27971cb0ef41Sopenharmony_ci        nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1];
27981cb0ef41Sopenharmony_ci        offset=nextOffset;
27991cb0ef41Sopenharmony_ci
28001cb0ef41Sopenharmony_ci        U_ASSERT(offset==size);
28011cb0ef41Sopenharmony_ci    }
28021cb0ef41Sopenharmony_ci
28031cb0ef41Sopenharmony_ci    return headerSize+size;
28041cb0ef41Sopenharmony_ci}
28051cb0ef41Sopenharmony_ci
28061cb0ef41Sopenharmony_ci#endif  // !UCONFIG_NO_NORMALIZATION
2807