12e5b6d6dSopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
32e5b6d6dSopenharmony_ci/*
42e5b6d6dSopenharmony_ci******************************************************************************
52e5b6d6dSopenharmony_ci*
62e5b6d6dSopenharmony_ci*   Copyright (C) 2001-2016, International Business Machines
72e5b6d6dSopenharmony_ci*   Corporation and others.  All Rights Reserved.
82e5b6d6dSopenharmony_ci*
92e5b6d6dSopenharmony_ci******************************************************************************
102e5b6d6dSopenharmony_ci*
112e5b6d6dSopenharmony_ci* File ustrtrns.cpp
122e5b6d6dSopenharmony_ci*
132e5b6d6dSopenharmony_ci* Modification History:
142e5b6d6dSopenharmony_ci*
152e5b6d6dSopenharmony_ci*   Date        Name        Description
162e5b6d6dSopenharmony_ci*   9/10/2001    Ram    Creation.
172e5b6d6dSopenharmony_ci******************************************************************************
182e5b6d6dSopenharmony_ci*/
192e5b6d6dSopenharmony_ci
202e5b6d6dSopenharmony_ci/*******************************************************************************
212e5b6d6dSopenharmony_ci *
222e5b6d6dSopenharmony_ci * u_strTo* and u_strFrom* APIs
232e5b6d6dSopenharmony_ci * WCS functions moved to ustr_wcs.c for better modularization
242e5b6d6dSopenharmony_ci *
252e5b6d6dSopenharmony_ci *******************************************************************************
262e5b6d6dSopenharmony_ci */
272e5b6d6dSopenharmony_ci
282e5b6d6dSopenharmony_ci
292e5b6d6dSopenharmony_ci#include "unicode/putil.h"
302e5b6d6dSopenharmony_ci#include "unicode/ustring.h"
312e5b6d6dSopenharmony_ci#include "unicode/utf.h"
322e5b6d6dSopenharmony_ci#include "unicode/utf8.h"
332e5b6d6dSopenharmony_ci#include "unicode/utf16.h"
342e5b6d6dSopenharmony_ci#include "cstring.h"
352e5b6d6dSopenharmony_ci#include "cmemory.h"
362e5b6d6dSopenharmony_ci#include "ustr_imp.h"
372e5b6d6dSopenharmony_ci#include "uassert.h"
382e5b6d6dSopenharmony_ci
392e5b6d6dSopenharmony_ciU_CAPI UChar* U_EXPORT2
402e5b6d6dSopenharmony_ciu_strFromUTF32WithSub(UChar *dest,
412e5b6d6dSopenharmony_ci               int32_t destCapacity,
422e5b6d6dSopenharmony_ci               int32_t *pDestLength,
432e5b6d6dSopenharmony_ci               const UChar32 *src,
442e5b6d6dSopenharmony_ci               int32_t srcLength,
452e5b6d6dSopenharmony_ci               UChar32 subchar, int32_t *pNumSubstitutions,
462e5b6d6dSopenharmony_ci               UErrorCode *pErrorCode) {
472e5b6d6dSopenharmony_ci    const UChar32 *srcLimit;
482e5b6d6dSopenharmony_ci    UChar32 ch;
492e5b6d6dSopenharmony_ci    UChar *destLimit;
502e5b6d6dSopenharmony_ci    UChar *pDest;
512e5b6d6dSopenharmony_ci    int32_t reqLength;
522e5b6d6dSopenharmony_ci    int32_t numSubstitutions;
532e5b6d6dSopenharmony_ci
542e5b6d6dSopenharmony_ci    /* args check */
552e5b6d6dSopenharmony_ci    if(U_FAILURE(*pErrorCode)){
562e5b6d6dSopenharmony_ci        return NULL;
572e5b6d6dSopenharmony_ci    }
582e5b6d6dSopenharmony_ci    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
592e5b6d6dSopenharmony_ci        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
602e5b6d6dSopenharmony_ci        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
612e5b6d6dSopenharmony_ci    ) {
622e5b6d6dSopenharmony_ci        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
632e5b6d6dSopenharmony_ci        return NULL;
642e5b6d6dSopenharmony_ci    }
652e5b6d6dSopenharmony_ci
662e5b6d6dSopenharmony_ci    if(pNumSubstitutions != NULL) {
672e5b6d6dSopenharmony_ci        *pNumSubstitutions = 0;
682e5b6d6dSopenharmony_ci    }
692e5b6d6dSopenharmony_ci
702e5b6d6dSopenharmony_ci    pDest = dest;
712e5b6d6dSopenharmony_ci    destLimit = (dest!=NULL)?(dest + destCapacity):NULL;
722e5b6d6dSopenharmony_ci    reqLength = 0;
732e5b6d6dSopenharmony_ci    numSubstitutions = 0;
742e5b6d6dSopenharmony_ci
752e5b6d6dSopenharmony_ci    if(srcLength < 0) {
762e5b6d6dSopenharmony_ci        /* simple loop for conversion of a NUL-terminated BMP string */
772e5b6d6dSopenharmony_ci        while((ch=*src) != 0 &&
782e5b6d6dSopenharmony_ci              ((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) {
792e5b6d6dSopenharmony_ci            ++src;
802e5b6d6dSopenharmony_ci            if(pDest < destLimit) {
812e5b6d6dSopenharmony_ci                *pDest++ = (UChar)ch;
822e5b6d6dSopenharmony_ci            } else {
832e5b6d6dSopenharmony_ci                ++reqLength;
842e5b6d6dSopenharmony_ci            }
852e5b6d6dSopenharmony_ci        }
862e5b6d6dSopenharmony_ci        srcLimit = src;
872e5b6d6dSopenharmony_ci        if(ch != 0) {
882e5b6d6dSopenharmony_ci            /* "complicated" case, find the end of the remaining string */
892e5b6d6dSopenharmony_ci            while(*++srcLimit != 0) {}
902e5b6d6dSopenharmony_ci        }
912e5b6d6dSopenharmony_ci    } else {
922e5b6d6dSopenharmony_ci      srcLimit = (src!=NULL)?(src + srcLength):NULL;
932e5b6d6dSopenharmony_ci    }
942e5b6d6dSopenharmony_ci
952e5b6d6dSopenharmony_ci    /* convert with length */
962e5b6d6dSopenharmony_ci    while(src < srcLimit) {
972e5b6d6dSopenharmony_ci        ch = *src++;
982e5b6d6dSopenharmony_ci        do {
992e5b6d6dSopenharmony_ci            /* usually "loops" once; twice only for writing subchar */
1002e5b6d6dSopenharmony_ci            if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
1012e5b6d6dSopenharmony_ci                if(pDest < destLimit) {
1022e5b6d6dSopenharmony_ci                    *pDest++ = (UChar)ch;
1032e5b6d6dSopenharmony_ci                } else {
1042e5b6d6dSopenharmony_ci                    ++reqLength;
1052e5b6d6dSopenharmony_ci                }
1062e5b6d6dSopenharmony_ci                break;
1072e5b6d6dSopenharmony_ci            } else if(0x10000 <= ch && ch <= 0x10ffff) {
1082e5b6d6dSopenharmony_ci                if(pDest!=NULL && ((pDest + 2) <= destLimit)) {
1092e5b6d6dSopenharmony_ci                    *pDest++ = U16_LEAD(ch);
1102e5b6d6dSopenharmony_ci                    *pDest++ = U16_TRAIL(ch);
1112e5b6d6dSopenharmony_ci                } else {
1122e5b6d6dSopenharmony_ci                    reqLength += 2;
1132e5b6d6dSopenharmony_ci                }
1142e5b6d6dSopenharmony_ci                break;
1152e5b6d6dSopenharmony_ci            } else if((ch = subchar) < 0) {
1162e5b6d6dSopenharmony_ci                /* surrogate code point, or not a Unicode code point at all */
1172e5b6d6dSopenharmony_ci                *pErrorCode = U_INVALID_CHAR_FOUND;
1182e5b6d6dSopenharmony_ci                return NULL;
1192e5b6d6dSopenharmony_ci            } else {
1202e5b6d6dSopenharmony_ci                ++numSubstitutions;
1212e5b6d6dSopenharmony_ci            }
1222e5b6d6dSopenharmony_ci        } while(true);
1232e5b6d6dSopenharmony_ci    }
1242e5b6d6dSopenharmony_ci
1252e5b6d6dSopenharmony_ci    reqLength += (int32_t)(pDest - dest);
1262e5b6d6dSopenharmony_ci    if(pDestLength) {
1272e5b6d6dSopenharmony_ci        *pDestLength = reqLength;
1282e5b6d6dSopenharmony_ci    }
1292e5b6d6dSopenharmony_ci    if(pNumSubstitutions != NULL) {
1302e5b6d6dSopenharmony_ci        *pNumSubstitutions = numSubstitutions;
1312e5b6d6dSopenharmony_ci    }
1322e5b6d6dSopenharmony_ci
1332e5b6d6dSopenharmony_ci    /* Terminate the buffer */
1342e5b6d6dSopenharmony_ci    u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
1352e5b6d6dSopenharmony_ci
1362e5b6d6dSopenharmony_ci    return dest;
1372e5b6d6dSopenharmony_ci}
1382e5b6d6dSopenharmony_ci
1392e5b6d6dSopenharmony_ciU_CAPI UChar* U_EXPORT2
1402e5b6d6dSopenharmony_ciu_strFromUTF32(UChar *dest,
1412e5b6d6dSopenharmony_ci               int32_t destCapacity,
1422e5b6d6dSopenharmony_ci               int32_t *pDestLength,
1432e5b6d6dSopenharmony_ci               const UChar32 *src,
1442e5b6d6dSopenharmony_ci               int32_t srcLength,
1452e5b6d6dSopenharmony_ci               UErrorCode *pErrorCode) {
1462e5b6d6dSopenharmony_ci    return u_strFromUTF32WithSub(
1472e5b6d6dSopenharmony_ci            dest, destCapacity, pDestLength,
1482e5b6d6dSopenharmony_ci            src, srcLength,
1492e5b6d6dSopenharmony_ci            U_SENTINEL, NULL,
1502e5b6d6dSopenharmony_ci            pErrorCode);
1512e5b6d6dSopenharmony_ci}
1522e5b6d6dSopenharmony_ci
1532e5b6d6dSopenharmony_ciU_CAPI UChar32* U_EXPORT2
1542e5b6d6dSopenharmony_ciu_strToUTF32WithSub(UChar32 *dest,
1552e5b6d6dSopenharmony_ci             int32_t destCapacity,
1562e5b6d6dSopenharmony_ci             int32_t *pDestLength,
1572e5b6d6dSopenharmony_ci             const UChar *src,
1582e5b6d6dSopenharmony_ci             int32_t srcLength,
1592e5b6d6dSopenharmony_ci             UChar32 subchar, int32_t *pNumSubstitutions,
1602e5b6d6dSopenharmony_ci             UErrorCode *pErrorCode) {
1612e5b6d6dSopenharmony_ci    const UChar *srcLimit;
1622e5b6d6dSopenharmony_ci    UChar32 ch;
1632e5b6d6dSopenharmony_ci    UChar ch2;
1642e5b6d6dSopenharmony_ci    UChar32 *destLimit;
1652e5b6d6dSopenharmony_ci    UChar32 *pDest;
1662e5b6d6dSopenharmony_ci    int32_t reqLength;
1672e5b6d6dSopenharmony_ci    int32_t numSubstitutions;
1682e5b6d6dSopenharmony_ci
1692e5b6d6dSopenharmony_ci    /* args check */
1702e5b6d6dSopenharmony_ci    if(U_FAILURE(*pErrorCode)){
1712e5b6d6dSopenharmony_ci        return NULL;
1722e5b6d6dSopenharmony_ci    }
1732e5b6d6dSopenharmony_ci    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
1742e5b6d6dSopenharmony_ci        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
1752e5b6d6dSopenharmony_ci        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
1762e5b6d6dSopenharmony_ci    ) {
1772e5b6d6dSopenharmony_ci        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
1782e5b6d6dSopenharmony_ci        return NULL;
1792e5b6d6dSopenharmony_ci    }
1802e5b6d6dSopenharmony_ci
1812e5b6d6dSopenharmony_ci    if(pNumSubstitutions != NULL) {
1822e5b6d6dSopenharmony_ci        *pNumSubstitutions = 0;
1832e5b6d6dSopenharmony_ci    }
1842e5b6d6dSopenharmony_ci
1852e5b6d6dSopenharmony_ci    pDest = dest;
1862e5b6d6dSopenharmony_ci    destLimit = (dest!=NULL)?(dest + destCapacity):NULL;
1872e5b6d6dSopenharmony_ci    reqLength = 0;
1882e5b6d6dSopenharmony_ci    numSubstitutions = 0;
1892e5b6d6dSopenharmony_ci
1902e5b6d6dSopenharmony_ci    if(srcLength < 0) {
1912e5b6d6dSopenharmony_ci        /* simple loop for conversion of a NUL-terminated BMP string */
1922e5b6d6dSopenharmony_ci        while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) {
1932e5b6d6dSopenharmony_ci            ++src;
1942e5b6d6dSopenharmony_ci            if(pDest < destLimit) {
1952e5b6d6dSopenharmony_ci                *pDest++ = ch;
1962e5b6d6dSopenharmony_ci            } else {
1972e5b6d6dSopenharmony_ci                ++reqLength;
1982e5b6d6dSopenharmony_ci            }
1992e5b6d6dSopenharmony_ci        }
2002e5b6d6dSopenharmony_ci        srcLimit = src;
2012e5b6d6dSopenharmony_ci        if(ch != 0) {
2022e5b6d6dSopenharmony_ci            /* "complicated" case, find the end of the remaining string */
2032e5b6d6dSopenharmony_ci            while(*++srcLimit != 0) {}
2042e5b6d6dSopenharmony_ci        }
2052e5b6d6dSopenharmony_ci    } else {
2062e5b6d6dSopenharmony_ci        srcLimit = (src!=NULL)?(src + srcLength):NULL;
2072e5b6d6dSopenharmony_ci    }
2082e5b6d6dSopenharmony_ci
2092e5b6d6dSopenharmony_ci    /* convert with length */
2102e5b6d6dSopenharmony_ci    while(src < srcLimit) {
2112e5b6d6dSopenharmony_ci        ch = *src++;
2122e5b6d6dSopenharmony_ci        if(!U16_IS_SURROGATE(ch)) {
2132e5b6d6dSopenharmony_ci            /* write or count ch below */
2142e5b6d6dSopenharmony_ci        } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) {
2152e5b6d6dSopenharmony_ci            ++src;
2162e5b6d6dSopenharmony_ci            ch = U16_GET_SUPPLEMENTARY(ch, ch2);
2172e5b6d6dSopenharmony_ci        } else if((ch = subchar) < 0) {
2182e5b6d6dSopenharmony_ci            /* unpaired surrogate */
2192e5b6d6dSopenharmony_ci            *pErrorCode = U_INVALID_CHAR_FOUND;
2202e5b6d6dSopenharmony_ci            return NULL;
2212e5b6d6dSopenharmony_ci        } else {
2222e5b6d6dSopenharmony_ci            ++numSubstitutions;
2232e5b6d6dSopenharmony_ci        }
2242e5b6d6dSopenharmony_ci        if(pDest < destLimit) {
2252e5b6d6dSopenharmony_ci            *pDest++ = ch;
2262e5b6d6dSopenharmony_ci        } else {
2272e5b6d6dSopenharmony_ci            ++reqLength;
2282e5b6d6dSopenharmony_ci        }
2292e5b6d6dSopenharmony_ci    }
2302e5b6d6dSopenharmony_ci
2312e5b6d6dSopenharmony_ci    reqLength += (int32_t)(pDest - dest);
2322e5b6d6dSopenharmony_ci    if(pDestLength) {
2332e5b6d6dSopenharmony_ci        *pDestLength = reqLength;
2342e5b6d6dSopenharmony_ci    }
2352e5b6d6dSopenharmony_ci    if(pNumSubstitutions != NULL) {
2362e5b6d6dSopenharmony_ci        *pNumSubstitutions = numSubstitutions;
2372e5b6d6dSopenharmony_ci    }
2382e5b6d6dSopenharmony_ci
2392e5b6d6dSopenharmony_ci    /* Terminate the buffer */
2402e5b6d6dSopenharmony_ci    u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode);
2412e5b6d6dSopenharmony_ci
2422e5b6d6dSopenharmony_ci    return dest;
2432e5b6d6dSopenharmony_ci}
2442e5b6d6dSopenharmony_ci
2452e5b6d6dSopenharmony_ciU_CAPI UChar32* U_EXPORT2
2462e5b6d6dSopenharmony_ciu_strToUTF32(UChar32 *dest,
2472e5b6d6dSopenharmony_ci             int32_t destCapacity,
2482e5b6d6dSopenharmony_ci             int32_t *pDestLength,
2492e5b6d6dSopenharmony_ci             const UChar *src,
2502e5b6d6dSopenharmony_ci             int32_t srcLength,
2512e5b6d6dSopenharmony_ci             UErrorCode *pErrorCode) {
2522e5b6d6dSopenharmony_ci    return u_strToUTF32WithSub(
2532e5b6d6dSopenharmony_ci            dest, destCapacity, pDestLength,
2542e5b6d6dSopenharmony_ci            src, srcLength,
2552e5b6d6dSopenharmony_ci            U_SENTINEL, NULL,
2562e5b6d6dSopenharmony_ci            pErrorCode);
2572e5b6d6dSopenharmony_ci}
2582e5b6d6dSopenharmony_ci
2592e5b6d6dSopenharmony_ciU_CAPI UChar* U_EXPORT2
2602e5b6d6dSopenharmony_ciu_strFromUTF8WithSub(UChar *dest,
2612e5b6d6dSopenharmony_ci              int32_t destCapacity,
2622e5b6d6dSopenharmony_ci              int32_t *pDestLength,
2632e5b6d6dSopenharmony_ci              const char* src,
2642e5b6d6dSopenharmony_ci              int32_t srcLength,
2652e5b6d6dSopenharmony_ci              UChar32 subchar, int32_t *pNumSubstitutions,
2662e5b6d6dSopenharmony_ci              UErrorCode *pErrorCode){
2672e5b6d6dSopenharmony_ci    /* args check */
2682e5b6d6dSopenharmony_ci    if(U_FAILURE(*pErrorCode)) {
2692e5b6d6dSopenharmony_ci        return NULL;
2702e5b6d6dSopenharmony_ci    }
2712e5b6d6dSopenharmony_ci    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
2722e5b6d6dSopenharmony_ci        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
2732e5b6d6dSopenharmony_ci        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
2742e5b6d6dSopenharmony_ci    ) {
2752e5b6d6dSopenharmony_ci        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
2762e5b6d6dSopenharmony_ci        return NULL;
2772e5b6d6dSopenharmony_ci    }
2782e5b6d6dSopenharmony_ci
2792e5b6d6dSopenharmony_ci    if(pNumSubstitutions!=NULL) {
2802e5b6d6dSopenharmony_ci        *pNumSubstitutions=0;
2812e5b6d6dSopenharmony_ci    }
2822e5b6d6dSopenharmony_ci    UChar *pDest = dest;
2832e5b6d6dSopenharmony_ci    UChar *pDestLimit = dest+destCapacity;
2842e5b6d6dSopenharmony_ci    int32_t reqLength = 0;
2852e5b6d6dSopenharmony_ci    int32_t numSubstitutions=0;
2862e5b6d6dSopenharmony_ci
2872e5b6d6dSopenharmony_ci    /*
2882e5b6d6dSopenharmony_ci     * Inline processing of UTF-8 byte sequences:
2892e5b6d6dSopenharmony_ci     *
2902e5b6d6dSopenharmony_ci     * Byte sequences for the most common characters are handled inline in
2912e5b6d6dSopenharmony_ci     * the conversion loops. In order to reduce the path lengths for those
2922e5b6d6dSopenharmony_ci     * characters, the tests are arranged in a kind of binary search.
2932e5b6d6dSopenharmony_ci     * ASCII (<=0x7f) is checked first, followed by the dividing point
2942e5b6d6dSopenharmony_ci     * between 2- and 3-byte sequences (0xe0).
2952e5b6d6dSopenharmony_ci     * The 3-byte branch is tested first to speed up CJK text.
2962e5b6d6dSopenharmony_ci     * The compiler should combine the subtractions for the two tests for 0xe0.
2972e5b6d6dSopenharmony_ci     * Each branch then tests for the other end of its range.
2982e5b6d6dSopenharmony_ci     */
2992e5b6d6dSopenharmony_ci
3002e5b6d6dSopenharmony_ci    if(srcLength < 0){
3012e5b6d6dSopenharmony_ci        /*
3022e5b6d6dSopenharmony_ci         * Transform a NUL-terminated string.
3032e5b6d6dSopenharmony_ci         * The code explicitly checks for NULs only in the lead byte position.
3042e5b6d6dSopenharmony_ci         * A NUL byte in the trail byte position fails the trail byte range check anyway.
3052e5b6d6dSopenharmony_ci         */
3062e5b6d6dSopenharmony_ci        int32_t i;
3072e5b6d6dSopenharmony_ci        UChar32 c;
3082e5b6d6dSopenharmony_ci        for(i = 0; (c = (uint8_t)src[i]) != 0 && (pDest < pDestLimit);) {
3092e5b6d6dSopenharmony_ci            // modified copy of U8_NEXT()
3102e5b6d6dSopenharmony_ci            ++i;
3112e5b6d6dSopenharmony_ci            if(U8_IS_SINGLE(c)) {
3122e5b6d6dSopenharmony_ci                *pDest++=(UChar)c;
3132e5b6d6dSopenharmony_ci            } else {
3142e5b6d6dSopenharmony_ci                uint8_t __t1, __t2;
3152e5b6d6dSopenharmony_ci                if( /* handle U+0800..U+FFFF inline */
3162e5b6d6dSopenharmony_ci                        (0xe0<=(c) && (c)<0xf0) &&
3172e5b6d6dSopenharmony_ci                        U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
3182e5b6d6dSopenharmony_ci                        (__t2=src[(i)+1]-0x80)<=0x3f) {
3192e5b6d6dSopenharmony_ci                    *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2;
3202e5b6d6dSopenharmony_ci                    i+=2;
3212e5b6d6dSopenharmony_ci                } else if( /* handle U+0080..U+07FF inline */
3222e5b6d6dSopenharmony_ci                        ((c)<0xe0 && (c)>=0xc2) &&
3232e5b6d6dSopenharmony_ci                        (__t1=src[i]-0x80)<=0x3f) {
3242e5b6d6dSopenharmony_ci                    *pDest++ = (((c)&0x1f)<<6)|__t1;
3252e5b6d6dSopenharmony_ci                    ++(i);
3262e5b6d6dSopenharmony_ci                } else {
3272e5b6d6dSopenharmony_ci                    /* function call for "complicated" and error cases */
3282e5b6d6dSopenharmony_ci                    (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1);
3292e5b6d6dSopenharmony_ci                    if(c<0 && (++numSubstitutions, c = subchar) < 0) {
3302e5b6d6dSopenharmony_ci                        *pErrorCode = U_INVALID_CHAR_FOUND;
3312e5b6d6dSopenharmony_ci                        return NULL;
3322e5b6d6dSopenharmony_ci                    } else if(c<=0xFFFF) {
3332e5b6d6dSopenharmony_ci                        *(pDest++)=(UChar)c;
3342e5b6d6dSopenharmony_ci                    } else {
3352e5b6d6dSopenharmony_ci                        *(pDest++)=U16_LEAD(c);
3362e5b6d6dSopenharmony_ci                        if(pDest<pDestLimit) {
3372e5b6d6dSopenharmony_ci                            *(pDest++)=U16_TRAIL(c);
3382e5b6d6dSopenharmony_ci                        } else {
3392e5b6d6dSopenharmony_ci                            reqLength++;
3402e5b6d6dSopenharmony_ci                            break;
3412e5b6d6dSopenharmony_ci                        }
3422e5b6d6dSopenharmony_ci                    }
3432e5b6d6dSopenharmony_ci                }
3442e5b6d6dSopenharmony_ci            }
3452e5b6d6dSopenharmony_ci        }
3462e5b6d6dSopenharmony_ci
3472e5b6d6dSopenharmony_ci        /* Pre-flight the rest of the string. */
3482e5b6d6dSopenharmony_ci        while((c = (uint8_t)src[i]) != 0) {
3492e5b6d6dSopenharmony_ci            // modified copy of U8_NEXT()
3502e5b6d6dSopenharmony_ci            ++i;
3512e5b6d6dSopenharmony_ci            if(U8_IS_SINGLE(c)) {
3522e5b6d6dSopenharmony_ci                ++reqLength;
3532e5b6d6dSopenharmony_ci            } else {
3542e5b6d6dSopenharmony_ci                uint8_t __t1, __t2;
3552e5b6d6dSopenharmony_ci                if( /* handle U+0800..U+FFFF inline */
3562e5b6d6dSopenharmony_ci                        (0xe0<=(c) && (c)<0xf0) &&
3572e5b6d6dSopenharmony_ci                        U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
3582e5b6d6dSopenharmony_ci                        (__t2=src[(i)+1]-0x80)<=0x3f) {
3592e5b6d6dSopenharmony_ci                    ++reqLength;
3602e5b6d6dSopenharmony_ci                    i+=2;
3612e5b6d6dSopenharmony_ci                } else if( /* handle U+0080..U+07FF inline */
3622e5b6d6dSopenharmony_ci                        ((c)<0xe0 && (c)>=0xc2) &&
3632e5b6d6dSopenharmony_ci                        (__t1=src[i]-0x80)<=0x3f) {
3642e5b6d6dSopenharmony_ci                    ++reqLength;
3652e5b6d6dSopenharmony_ci                    ++(i);
3662e5b6d6dSopenharmony_ci                } else {
3672e5b6d6dSopenharmony_ci                    /* function call for "complicated" and error cases */
3682e5b6d6dSopenharmony_ci                    (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1);
3692e5b6d6dSopenharmony_ci                    if(c<0 && (++numSubstitutions, c = subchar) < 0) {
3702e5b6d6dSopenharmony_ci                        *pErrorCode = U_INVALID_CHAR_FOUND;
3712e5b6d6dSopenharmony_ci                        return NULL;
3722e5b6d6dSopenharmony_ci                    }
3732e5b6d6dSopenharmony_ci                    reqLength += U16_LENGTH(c);
3742e5b6d6dSopenharmony_ci                }
3752e5b6d6dSopenharmony_ci            }
3762e5b6d6dSopenharmony_ci        }
3772e5b6d6dSopenharmony_ci    } else /* srcLength >= 0 */ {
3782e5b6d6dSopenharmony_ci        /* Faster loop without ongoing checking for srcLength and pDestLimit. */
3792e5b6d6dSopenharmony_ci        int32_t i = 0;
3802e5b6d6dSopenharmony_ci        UChar32 c;
3812e5b6d6dSopenharmony_ci        for(;;) {
3822e5b6d6dSopenharmony_ci            /*
3832e5b6d6dSopenharmony_ci             * Each iteration of the inner loop progresses by at most 3 UTF-8
3842e5b6d6dSopenharmony_ci             * bytes and one UChar, for most characters.
3852e5b6d6dSopenharmony_ci             * For supplementary code points (4 & 2), which are rare,
3862e5b6d6dSopenharmony_ci             * there is an additional adjustment.
3872e5b6d6dSopenharmony_ci             */
3882e5b6d6dSopenharmony_ci            int32_t count = (int32_t)(pDestLimit - pDest);
3892e5b6d6dSopenharmony_ci            int32_t count2 = (srcLength - i) / 3;
3902e5b6d6dSopenharmony_ci            if(count > count2) {
3912e5b6d6dSopenharmony_ci                count = count2; /* min(remaining dest, remaining src/3) */
3922e5b6d6dSopenharmony_ci            }
3932e5b6d6dSopenharmony_ci            if(count < 3) {
3942e5b6d6dSopenharmony_ci                /*
3952e5b6d6dSopenharmony_ci                 * Too much overhead if we get near the end of the string,
3962e5b6d6dSopenharmony_ci                 * continue with the next loop.
3972e5b6d6dSopenharmony_ci                 */
3982e5b6d6dSopenharmony_ci                break;
3992e5b6d6dSopenharmony_ci            }
4002e5b6d6dSopenharmony_ci
4012e5b6d6dSopenharmony_ci            do {
4022e5b6d6dSopenharmony_ci                // modified copy of U8_NEXT()
4032e5b6d6dSopenharmony_ci                c = (uint8_t)src[i++];
4042e5b6d6dSopenharmony_ci                if(U8_IS_SINGLE(c)) {
4052e5b6d6dSopenharmony_ci                    *pDest++=(UChar)c;
4062e5b6d6dSopenharmony_ci                } else {
4072e5b6d6dSopenharmony_ci                    uint8_t __t1, __t2;
4082e5b6d6dSopenharmony_ci                    if( /* handle U+0800..U+FFFF inline */
4092e5b6d6dSopenharmony_ci                            (0xe0<=(c) && (c)<0xf0) &&
4102e5b6d6dSopenharmony_ci                            ((i)+1)<srcLength &&
4112e5b6d6dSopenharmony_ci                            U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
4122e5b6d6dSopenharmony_ci                            (__t2=src[(i)+1]-0x80)<=0x3f) {
4132e5b6d6dSopenharmony_ci                        *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2;
4142e5b6d6dSopenharmony_ci                        i+=2;
4152e5b6d6dSopenharmony_ci                    } else if( /* handle U+0080..U+07FF inline */
4162e5b6d6dSopenharmony_ci                            ((c)<0xe0 && (c)>=0xc2) &&
4172e5b6d6dSopenharmony_ci                            ((i)!=srcLength) &&
4182e5b6d6dSopenharmony_ci                            (__t1=src[i]-0x80)<=0x3f) {
4192e5b6d6dSopenharmony_ci                        *pDest++ = (((c)&0x1f)<<6)|__t1;
4202e5b6d6dSopenharmony_ci                        ++(i);
4212e5b6d6dSopenharmony_ci                    } else {
4222e5b6d6dSopenharmony_ci                        if(c >= 0xf0 || subchar > 0xffff) {
4232e5b6d6dSopenharmony_ci                            // We may read up to four bytes and write up to two UChars,
4242e5b6d6dSopenharmony_ci                            // which we didn't account for with computing count,
4252e5b6d6dSopenharmony_ci                            // so we adjust it here.
4262e5b6d6dSopenharmony_ci                            if(--count == 0) {
4272e5b6d6dSopenharmony_ci                                --i;  // back out byte c
4282e5b6d6dSopenharmony_ci                                break;
4292e5b6d6dSopenharmony_ci                            }
4302e5b6d6dSopenharmony_ci                        }
4312e5b6d6dSopenharmony_ci
4322e5b6d6dSopenharmony_ci                        /* function call for "complicated" and error cases */
4332e5b6d6dSopenharmony_ci                        (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1);
4342e5b6d6dSopenharmony_ci                        if(c<0 && (++numSubstitutions, c = subchar) < 0) {
4352e5b6d6dSopenharmony_ci                            *pErrorCode = U_INVALID_CHAR_FOUND;
4362e5b6d6dSopenharmony_ci                            return NULL;
4372e5b6d6dSopenharmony_ci                        } else if(c<=0xFFFF) {
4382e5b6d6dSopenharmony_ci                            *(pDest++)=(UChar)c;
4392e5b6d6dSopenharmony_ci                        } else {
4402e5b6d6dSopenharmony_ci                            *(pDest++)=U16_LEAD(c);
4412e5b6d6dSopenharmony_ci                            *(pDest++)=U16_TRAIL(c);
4422e5b6d6dSopenharmony_ci                        }
4432e5b6d6dSopenharmony_ci                    }
4442e5b6d6dSopenharmony_ci                }
4452e5b6d6dSopenharmony_ci            } while(--count > 0);
4462e5b6d6dSopenharmony_ci        }
4472e5b6d6dSopenharmony_ci
4482e5b6d6dSopenharmony_ci        while(i < srcLength && (pDest < pDestLimit)) {
4492e5b6d6dSopenharmony_ci            // modified copy of U8_NEXT()
4502e5b6d6dSopenharmony_ci            c = (uint8_t)src[i++];
4512e5b6d6dSopenharmony_ci            if(U8_IS_SINGLE(c)) {
4522e5b6d6dSopenharmony_ci                *pDest++=(UChar)c;
4532e5b6d6dSopenharmony_ci            } else {
4542e5b6d6dSopenharmony_ci                uint8_t __t1, __t2;
4552e5b6d6dSopenharmony_ci                if( /* handle U+0800..U+FFFF inline */
4562e5b6d6dSopenharmony_ci                        (0xe0<=(c) && (c)<0xf0) &&
4572e5b6d6dSopenharmony_ci                        ((i)+1)<srcLength &&
4582e5b6d6dSopenharmony_ci                        U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
4592e5b6d6dSopenharmony_ci                        (__t2=src[(i)+1]-0x80)<=0x3f) {
4602e5b6d6dSopenharmony_ci                    *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2;
4612e5b6d6dSopenharmony_ci                    i+=2;
4622e5b6d6dSopenharmony_ci                } else if( /* handle U+0080..U+07FF inline */
4632e5b6d6dSopenharmony_ci                        ((c)<0xe0 && (c)>=0xc2) &&
4642e5b6d6dSopenharmony_ci                        ((i)!=srcLength) &&
4652e5b6d6dSopenharmony_ci                        (__t1=src[i]-0x80)<=0x3f) {
4662e5b6d6dSopenharmony_ci                    *pDest++ = (((c)&0x1f)<<6)|__t1;
4672e5b6d6dSopenharmony_ci                    ++(i);
4682e5b6d6dSopenharmony_ci                } else {
4692e5b6d6dSopenharmony_ci                    /* function call for "complicated" and error cases */
4702e5b6d6dSopenharmony_ci                    (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1);
4712e5b6d6dSopenharmony_ci                    if(c<0 && (++numSubstitutions, c = subchar) < 0) {
4722e5b6d6dSopenharmony_ci                        *pErrorCode = U_INVALID_CHAR_FOUND;
4732e5b6d6dSopenharmony_ci                        return NULL;
4742e5b6d6dSopenharmony_ci                    } else if(c<=0xFFFF) {
4752e5b6d6dSopenharmony_ci                        *(pDest++)=(UChar)c;
4762e5b6d6dSopenharmony_ci                    } else {
4772e5b6d6dSopenharmony_ci                        *(pDest++)=U16_LEAD(c);
4782e5b6d6dSopenharmony_ci                        if(pDest<pDestLimit) {
4792e5b6d6dSopenharmony_ci                            *(pDest++)=U16_TRAIL(c);
4802e5b6d6dSopenharmony_ci                        } else {
4812e5b6d6dSopenharmony_ci                            reqLength++;
4822e5b6d6dSopenharmony_ci                            break;
4832e5b6d6dSopenharmony_ci                        }
4842e5b6d6dSopenharmony_ci                    }
4852e5b6d6dSopenharmony_ci                }
4862e5b6d6dSopenharmony_ci            }
4872e5b6d6dSopenharmony_ci        }
4882e5b6d6dSopenharmony_ci
4892e5b6d6dSopenharmony_ci        /* Pre-flight the rest of the string. */
4902e5b6d6dSopenharmony_ci        while(i < srcLength) {
4912e5b6d6dSopenharmony_ci            // modified copy of U8_NEXT()
4922e5b6d6dSopenharmony_ci            c = (uint8_t)src[i++];
4932e5b6d6dSopenharmony_ci            if(U8_IS_SINGLE(c)) {
4942e5b6d6dSopenharmony_ci                ++reqLength;
4952e5b6d6dSopenharmony_ci            } else {
4962e5b6d6dSopenharmony_ci                uint8_t __t1, __t2;
4972e5b6d6dSopenharmony_ci                if( /* handle U+0800..U+FFFF inline */
4982e5b6d6dSopenharmony_ci                        (0xe0<=(c) && (c)<0xf0) &&
4992e5b6d6dSopenharmony_ci                        ((i)+1)<srcLength &&
5002e5b6d6dSopenharmony_ci                        U8_IS_VALID_LEAD3_AND_T1((c), src[i]) &&
5012e5b6d6dSopenharmony_ci                        (__t2=src[(i)+1]-0x80)<=0x3f) {
5022e5b6d6dSopenharmony_ci                    ++reqLength;
5032e5b6d6dSopenharmony_ci                    i+=2;
5042e5b6d6dSopenharmony_ci                } else if( /* handle U+0080..U+07FF inline */
5052e5b6d6dSopenharmony_ci                        ((c)<0xe0 && (c)>=0xc2) &&
5062e5b6d6dSopenharmony_ci                        ((i)!=srcLength) &&
5072e5b6d6dSopenharmony_ci                        (__t1=src[i]-0x80)<=0x3f) {
5082e5b6d6dSopenharmony_ci                    ++reqLength;
5092e5b6d6dSopenharmony_ci                    ++(i);
5102e5b6d6dSopenharmony_ci                } else {
5112e5b6d6dSopenharmony_ci                    /* function call for "complicated" and error cases */
5122e5b6d6dSopenharmony_ci                    (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1);
5132e5b6d6dSopenharmony_ci                    if(c<0 && (++numSubstitutions, c = subchar) < 0) {
5142e5b6d6dSopenharmony_ci                        *pErrorCode = U_INVALID_CHAR_FOUND;
5152e5b6d6dSopenharmony_ci                        return NULL;
5162e5b6d6dSopenharmony_ci                    }
5172e5b6d6dSopenharmony_ci                    reqLength += U16_LENGTH(c);
5182e5b6d6dSopenharmony_ci                }
5192e5b6d6dSopenharmony_ci            }
5202e5b6d6dSopenharmony_ci        }
5212e5b6d6dSopenharmony_ci    }
5222e5b6d6dSopenharmony_ci
5232e5b6d6dSopenharmony_ci    reqLength+=(int32_t)(pDest - dest);
5242e5b6d6dSopenharmony_ci
5252e5b6d6dSopenharmony_ci    if(pNumSubstitutions!=NULL) {
5262e5b6d6dSopenharmony_ci        *pNumSubstitutions=numSubstitutions;
5272e5b6d6dSopenharmony_ci    }
5282e5b6d6dSopenharmony_ci
5292e5b6d6dSopenharmony_ci    if(pDestLength){
5302e5b6d6dSopenharmony_ci        *pDestLength = reqLength;
5312e5b6d6dSopenharmony_ci    }
5322e5b6d6dSopenharmony_ci
5332e5b6d6dSopenharmony_ci    /* Terminate the buffer */
5342e5b6d6dSopenharmony_ci    u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
5352e5b6d6dSopenharmony_ci
5362e5b6d6dSopenharmony_ci    return dest;
5372e5b6d6dSopenharmony_ci}
5382e5b6d6dSopenharmony_ci
5392e5b6d6dSopenharmony_ciU_CAPI UChar* U_EXPORT2
5402e5b6d6dSopenharmony_ciu_strFromUTF8(UChar *dest,
5412e5b6d6dSopenharmony_ci              int32_t destCapacity,
5422e5b6d6dSopenharmony_ci              int32_t *pDestLength,
5432e5b6d6dSopenharmony_ci              const char* src,
5442e5b6d6dSopenharmony_ci              int32_t srcLength,
5452e5b6d6dSopenharmony_ci              UErrorCode *pErrorCode){
5462e5b6d6dSopenharmony_ci    return u_strFromUTF8WithSub(
5472e5b6d6dSopenharmony_ci            dest, destCapacity, pDestLength,
5482e5b6d6dSopenharmony_ci            src, srcLength,
5492e5b6d6dSopenharmony_ci            U_SENTINEL, NULL,
5502e5b6d6dSopenharmony_ci            pErrorCode);
5512e5b6d6dSopenharmony_ci}
5522e5b6d6dSopenharmony_ci
5532e5b6d6dSopenharmony_ciU_CAPI UChar * U_EXPORT2
5542e5b6d6dSopenharmony_ciu_strFromUTF8Lenient(UChar *dest,
5552e5b6d6dSopenharmony_ci                     int32_t destCapacity,
5562e5b6d6dSopenharmony_ci                     int32_t *pDestLength,
5572e5b6d6dSopenharmony_ci                     const char *src,
5582e5b6d6dSopenharmony_ci                     int32_t srcLength,
5592e5b6d6dSopenharmony_ci                     UErrorCode *pErrorCode) {
5602e5b6d6dSopenharmony_ci    UChar *pDest = dest;
5612e5b6d6dSopenharmony_ci    UChar32 ch;
5622e5b6d6dSopenharmony_ci    int32_t reqLength = 0;
5632e5b6d6dSopenharmony_ci    uint8_t* pSrc = (uint8_t*) src;
5642e5b6d6dSopenharmony_ci
5652e5b6d6dSopenharmony_ci    /* args check */
5662e5b6d6dSopenharmony_ci    if(U_FAILURE(*pErrorCode)){
5672e5b6d6dSopenharmony_ci        return NULL;
5682e5b6d6dSopenharmony_ci    }
5692e5b6d6dSopenharmony_ci
5702e5b6d6dSopenharmony_ci    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
5712e5b6d6dSopenharmony_ci        (destCapacity<0) || (dest == NULL && destCapacity > 0)
5722e5b6d6dSopenharmony_ci    ) {
5732e5b6d6dSopenharmony_ci        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
5742e5b6d6dSopenharmony_ci        return NULL;
5752e5b6d6dSopenharmony_ci    }
5762e5b6d6dSopenharmony_ci
5772e5b6d6dSopenharmony_ci    if(srcLength < 0) {
5782e5b6d6dSopenharmony_ci        /* Transform a NUL-terminated string. */
5792e5b6d6dSopenharmony_ci        UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL;
5802e5b6d6dSopenharmony_ci        uint8_t t1, t2, t3; /* trail bytes */
5812e5b6d6dSopenharmony_ci
5822e5b6d6dSopenharmony_ci        while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
5832e5b6d6dSopenharmony_ci            if(ch < 0xc0) {
5842e5b6d6dSopenharmony_ci                /*
5852e5b6d6dSopenharmony_ci                 * ASCII, or a trail byte in lead position which is treated like
5862e5b6d6dSopenharmony_ci                 * a single-byte sequence for better character boundary
5872e5b6d6dSopenharmony_ci                 * resynchronization after illegal sequences.
5882e5b6d6dSopenharmony_ci                 */
5892e5b6d6dSopenharmony_ci                *pDest++=(UChar)ch;
5902e5b6d6dSopenharmony_ci                ++pSrc;
5912e5b6d6dSopenharmony_ci                continue;
5922e5b6d6dSopenharmony_ci            } else if(ch < 0xe0) { /* U+0080..U+07FF */
5932e5b6d6dSopenharmony_ci                if((t1 = pSrc[1]) != 0) {
5942e5b6d6dSopenharmony_ci                    /* 0x3080 = (0xc0 << 6) + 0x80 */
5952e5b6d6dSopenharmony_ci                    *pDest++ = (UChar)((ch << 6) + t1 - 0x3080);
5962e5b6d6dSopenharmony_ci                    pSrc += 2;
5972e5b6d6dSopenharmony_ci                    continue;
5982e5b6d6dSopenharmony_ci                }
5992e5b6d6dSopenharmony_ci            } else if(ch < 0xf0) { /* U+0800..U+FFFF */
6002e5b6d6dSopenharmony_ci                if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0) {
6012e5b6d6dSopenharmony_ci                    /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
6022e5b6d6dSopenharmony_ci                    /* 0x2080 = (0x80 << 6) + 0x80 */
6032e5b6d6dSopenharmony_ci                    *pDest++ = (UChar)((ch << 12) + (t1 << 6) + t2 - 0x2080);
6042e5b6d6dSopenharmony_ci                    pSrc += 3;
6052e5b6d6dSopenharmony_ci                    continue;
6062e5b6d6dSopenharmony_ci                }
6072e5b6d6dSopenharmony_ci            } else /* f0..f4 */ { /* U+10000..U+10FFFF */
6082e5b6d6dSopenharmony_ci                if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0 && (t3 = pSrc[3]) != 0) {
6092e5b6d6dSopenharmony_ci                    pSrc += 4;
6102e5b6d6dSopenharmony_ci                    /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
6112e5b6d6dSopenharmony_ci                    ch = (ch << 18) + (t1 << 12) + (t2 << 6) + t3 - 0x3c82080;
6122e5b6d6dSopenharmony_ci                    *(pDest++) = U16_LEAD(ch);
6132e5b6d6dSopenharmony_ci                    if(pDest < pDestLimit) {
6142e5b6d6dSopenharmony_ci                        *(pDest++) = U16_TRAIL(ch);
6152e5b6d6dSopenharmony_ci                    } else {
6162e5b6d6dSopenharmony_ci                        reqLength = 1;
6172e5b6d6dSopenharmony_ci                        break;
6182e5b6d6dSopenharmony_ci                    }
6192e5b6d6dSopenharmony_ci                    continue;
6202e5b6d6dSopenharmony_ci                }
6212e5b6d6dSopenharmony_ci            }
6222e5b6d6dSopenharmony_ci
6232e5b6d6dSopenharmony_ci            /* truncated character at the end */
6242e5b6d6dSopenharmony_ci            *pDest++ = 0xfffd;
6252e5b6d6dSopenharmony_ci            while(*++pSrc != 0) {}
6262e5b6d6dSopenharmony_ci            break;
6272e5b6d6dSopenharmony_ci        }
6282e5b6d6dSopenharmony_ci
6292e5b6d6dSopenharmony_ci        /* Pre-flight the rest of the string. */
6302e5b6d6dSopenharmony_ci        while((ch = *pSrc) != 0) {
6312e5b6d6dSopenharmony_ci            if(ch < 0xc0) {
6322e5b6d6dSopenharmony_ci                /*
6332e5b6d6dSopenharmony_ci                 * ASCII, or a trail byte in lead position which is treated like
6342e5b6d6dSopenharmony_ci                 * a single-byte sequence for better character boundary
6352e5b6d6dSopenharmony_ci                 * resynchronization after illegal sequences.
6362e5b6d6dSopenharmony_ci                 */
6372e5b6d6dSopenharmony_ci                ++reqLength;
6382e5b6d6dSopenharmony_ci                ++pSrc;
6392e5b6d6dSopenharmony_ci                continue;
6402e5b6d6dSopenharmony_ci            } else if(ch < 0xe0) { /* U+0080..U+07FF */
6412e5b6d6dSopenharmony_ci                if(pSrc[1] != 0) {
6422e5b6d6dSopenharmony_ci                    ++reqLength;
6432e5b6d6dSopenharmony_ci                    pSrc += 2;
6442e5b6d6dSopenharmony_ci                    continue;
6452e5b6d6dSopenharmony_ci                }
6462e5b6d6dSopenharmony_ci            } else if(ch < 0xf0) { /* U+0800..U+FFFF */
6472e5b6d6dSopenharmony_ci                if(pSrc[1] != 0 && pSrc[2] != 0) {
6482e5b6d6dSopenharmony_ci                    ++reqLength;
6492e5b6d6dSopenharmony_ci                    pSrc += 3;
6502e5b6d6dSopenharmony_ci                    continue;
6512e5b6d6dSopenharmony_ci                }
6522e5b6d6dSopenharmony_ci            } else /* f0..f4 */ { /* U+10000..U+10FFFF */
6532e5b6d6dSopenharmony_ci                if(pSrc[1] != 0 && pSrc[2] != 0 && pSrc[3] != 0) {
6542e5b6d6dSopenharmony_ci                    reqLength += 2;
6552e5b6d6dSopenharmony_ci                    pSrc += 4;
6562e5b6d6dSopenharmony_ci                    continue;
6572e5b6d6dSopenharmony_ci                }
6582e5b6d6dSopenharmony_ci            }
6592e5b6d6dSopenharmony_ci
6602e5b6d6dSopenharmony_ci            /* truncated character at the end */
6612e5b6d6dSopenharmony_ci            ++reqLength;
6622e5b6d6dSopenharmony_ci            break;
6632e5b6d6dSopenharmony_ci        }
6642e5b6d6dSopenharmony_ci    } else /* srcLength >= 0 */ {
6652e5b6d6dSopenharmony_ci      const uint8_t *pSrcLimit = (pSrc!=NULL)?(pSrc + srcLength):NULL;
6662e5b6d6dSopenharmony_ci
6672e5b6d6dSopenharmony_ci        /*
6682e5b6d6dSopenharmony_ci         * This function requires that if srcLength is given, then it must be
6692e5b6d6dSopenharmony_ci         * destCapatity >= srcLength so that we need not check for
6702e5b6d6dSopenharmony_ci         * destination buffer overflow in the loop.
6712e5b6d6dSopenharmony_ci         */
6722e5b6d6dSopenharmony_ci        if(destCapacity < srcLength) {
6732e5b6d6dSopenharmony_ci            if(pDestLength != NULL) {
6742e5b6d6dSopenharmony_ci                *pDestLength = srcLength; /* this likely overestimates the true destLength! */
6752e5b6d6dSopenharmony_ci            }
6762e5b6d6dSopenharmony_ci            *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
6772e5b6d6dSopenharmony_ci            return NULL;
6782e5b6d6dSopenharmony_ci        }
6792e5b6d6dSopenharmony_ci
6802e5b6d6dSopenharmony_ci        if((pSrcLimit - pSrc) >= 4) {
6812e5b6d6dSopenharmony_ci            pSrcLimit -= 3; /* temporarily reduce pSrcLimit */
6822e5b6d6dSopenharmony_ci
6832e5b6d6dSopenharmony_ci            /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */
6842e5b6d6dSopenharmony_ci            do {
6852e5b6d6dSopenharmony_ci                ch = *pSrc++;
6862e5b6d6dSopenharmony_ci                if(ch < 0xc0) {
6872e5b6d6dSopenharmony_ci                    /*
6882e5b6d6dSopenharmony_ci                     * ASCII, or a trail byte in lead position which is treated like
6892e5b6d6dSopenharmony_ci                     * a single-byte sequence for better character boundary
6902e5b6d6dSopenharmony_ci                     * resynchronization after illegal sequences.
6912e5b6d6dSopenharmony_ci                     */
6922e5b6d6dSopenharmony_ci                    *pDest++=(UChar)ch;
6932e5b6d6dSopenharmony_ci                } else if(ch < 0xe0) { /* U+0080..U+07FF */
6942e5b6d6dSopenharmony_ci                    /* 0x3080 = (0xc0 << 6) + 0x80 */
6952e5b6d6dSopenharmony_ci                    *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
6962e5b6d6dSopenharmony_ci                } else if(ch < 0xf0) { /* U+0800..U+FFFF */
6972e5b6d6dSopenharmony_ci                    /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
6982e5b6d6dSopenharmony_ci                    /* 0x2080 = (0x80 << 6) + 0x80 */
6992e5b6d6dSopenharmony_ci                    ch = (ch << 12) + (*pSrc++ << 6);
7002e5b6d6dSopenharmony_ci                    *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
7012e5b6d6dSopenharmony_ci                } else /* f0..f4 */ { /* U+10000..U+10FFFF */
7022e5b6d6dSopenharmony_ci                    /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
7032e5b6d6dSopenharmony_ci                    ch = (ch << 18) + (*pSrc++ << 12);
7042e5b6d6dSopenharmony_ci                    ch += *pSrc++ << 6;
7052e5b6d6dSopenharmony_ci                    ch += *pSrc++ - 0x3c82080;
7062e5b6d6dSopenharmony_ci                    *(pDest++) = U16_LEAD(ch);
7072e5b6d6dSopenharmony_ci                    *(pDest++) = U16_TRAIL(ch);
7082e5b6d6dSopenharmony_ci                }
7092e5b6d6dSopenharmony_ci            } while(pSrc < pSrcLimit);
7102e5b6d6dSopenharmony_ci
7112e5b6d6dSopenharmony_ci            pSrcLimit += 3; /* restore original pSrcLimit */
7122e5b6d6dSopenharmony_ci        }
7132e5b6d6dSopenharmony_ci
7142e5b6d6dSopenharmony_ci        while(pSrc < pSrcLimit) {
7152e5b6d6dSopenharmony_ci            ch = *pSrc++;
7162e5b6d6dSopenharmony_ci            if(ch < 0xc0) {
7172e5b6d6dSopenharmony_ci                /*
7182e5b6d6dSopenharmony_ci                 * ASCII, or a trail byte in lead position which is treated like
7192e5b6d6dSopenharmony_ci                 * a single-byte sequence for better character boundary
7202e5b6d6dSopenharmony_ci                 * resynchronization after illegal sequences.
7212e5b6d6dSopenharmony_ci                 */
7222e5b6d6dSopenharmony_ci                *pDest++=(UChar)ch;
7232e5b6d6dSopenharmony_ci                continue;
7242e5b6d6dSopenharmony_ci            } else if(ch < 0xe0) { /* U+0080..U+07FF */
7252e5b6d6dSopenharmony_ci                if(pSrc < pSrcLimit) {
7262e5b6d6dSopenharmony_ci                    /* 0x3080 = (0xc0 << 6) + 0x80 */
7272e5b6d6dSopenharmony_ci                    *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
7282e5b6d6dSopenharmony_ci                    continue;
7292e5b6d6dSopenharmony_ci                }
7302e5b6d6dSopenharmony_ci            } else if(ch < 0xf0) { /* U+0800..U+FFFF */
7312e5b6d6dSopenharmony_ci                if((pSrcLimit - pSrc) >= 2) {
7322e5b6d6dSopenharmony_ci                    /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
7332e5b6d6dSopenharmony_ci                    /* 0x2080 = (0x80 << 6) + 0x80 */
7342e5b6d6dSopenharmony_ci                    ch = (ch << 12) + (*pSrc++ << 6);
7352e5b6d6dSopenharmony_ci                    *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
7362e5b6d6dSopenharmony_ci                    pSrc += 3;
7372e5b6d6dSopenharmony_ci                    continue;
7382e5b6d6dSopenharmony_ci                }
7392e5b6d6dSopenharmony_ci            } else /* f0..f4 */ { /* U+10000..U+10FFFF */
7402e5b6d6dSopenharmony_ci                if((pSrcLimit - pSrc) >= 3) {
7412e5b6d6dSopenharmony_ci                    /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
7422e5b6d6dSopenharmony_ci                    ch = (ch << 18) + (*pSrc++ << 12);
7432e5b6d6dSopenharmony_ci                    ch += *pSrc++ << 6;
7442e5b6d6dSopenharmony_ci                    ch += *pSrc++ - 0x3c82080;
7452e5b6d6dSopenharmony_ci                    *(pDest++) = U16_LEAD(ch);
7462e5b6d6dSopenharmony_ci                    *(pDest++) = U16_TRAIL(ch);
7472e5b6d6dSopenharmony_ci                    pSrc += 4;
7482e5b6d6dSopenharmony_ci                    continue;
7492e5b6d6dSopenharmony_ci                }
7502e5b6d6dSopenharmony_ci            }
7512e5b6d6dSopenharmony_ci
7522e5b6d6dSopenharmony_ci            /* truncated character at the end */
7532e5b6d6dSopenharmony_ci            *pDest++ = 0xfffd;
7542e5b6d6dSopenharmony_ci            break;
7552e5b6d6dSopenharmony_ci        }
7562e5b6d6dSopenharmony_ci    }
7572e5b6d6dSopenharmony_ci
7582e5b6d6dSopenharmony_ci    reqLength+=(int32_t)(pDest - dest);
7592e5b6d6dSopenharmony_ci
7602e5b6d6dSopenharmony_ci    if(pDestLength){
7612e5b6d6dSopenharmony_ci        *pDestLength = reqLength;
7622e5b6d6dSopenharmony_ci    }
7632e5b6d6dSopenharmony_ci
7642e5b6d6dSopenharmony_ci    /* Terminate the buffer */
7652e5b6d6dSopenharmony_ci    u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
7662e5b6d6dSopenharmony_ci
7672e5b6d6dSopenharmony_ci    return dest;
7682e5b6d6dSopenharmony_ci}
7692e5b6d6dSopenharmony_ci
7702e5b6d6dSopenharmony_cistatic inline uint8_t *
7712e5b6d6dSopenharmony_ci_appendUTF8(uint8_t *pDest, UChar32 c) {
7722e5b6d6dSopenharmony_ci    /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */
7732e5b6d6dSopenharmony_ci    if((c)<=0x7f) {
7742e5b6d6dSopenharmony_ci        *pDest++=(uint8_t)c;
7752e5b6d6dSopenharmony_ci    } else if(c<=0x7ff) {
7762e5b6d6dSopenharmony_ci        *pDest++=(uint8_t)((c>>6)|0xc0);
7772e5b6d6dSopenharmony_ci        *pDest++=(uint8_t)((c&0x3f)|0x80);
7782e5b6d6dSopenharmony_ci    } else if(c<=0xffff) {
7792e5b6d6dSopenharmony_ci        *pDest++=(uint8_t)((c>>12)|0xe0);
7802e5b6d6dSopenharmony_ci        *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80);
7812e5b6d6dSopenharmony_ci        *pDest++=(uint8_t)(((c)&0x3f)|0x80);
7822e5b6d6dSopenharmony_ci    } else /* if((uint32_t)(c)<=0x10ffff) */ {
7832e5b6d6dSopenharmony_ci        *pDest++=(uint8_t)(((c)>>18)|0xf0);
7842e5b6d6dSopenharmony_ci        *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80);
7852e5b6d6dSopenharmony_ci        *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80);
7862e5b6d6dSopenharmony_ci        *pDest++=(uint8_t)(((c)&0x3f)|0x80);
7872e5b6d6dSopenharmony_ci    }
7882e5b6d6dSopenharmony_ci    return pDest;
7892e5b6d6dSopenharmony_ci}
7902e5b6d6dSopenharmony_ci
7912e5b6d6dSopenharmony_ci
7922e5b6d6dSopenharmony_ciU_CAPI char* U_EXPORT2
7932e5b6d6dSopenharmony_ciu_strToUTF8WithSub(char *dest,
7942e5b6d6dSopenharmony_ci            int32_t destCapacity,
7952e5b6d6dSopenharmony_ci            int32_t *pDestLength,
7962e5b6d6dSopenharmony_ci            const UChar *pSrc,
7972e5b6d6dSopenharmony_ci            int32_t srcLength,
7982e5b6d6dSopenharmony_ci            UChar32 subchar, int32_t *pNumSubstitutions,
7992e5b6d6dSopenharmony_ci            UErrorCode *pErrorCode){
8002e5b6d6dSopenharmony_ci    int32_t reqLength=0;
8012e5b6d6dSopenharmony_ci    uint32_t ch=0,ch2=0;
8022e5b6d6dSopenharmony_ci    uint8_t *pDest = (uint8_t *)dest;
8032e5b6d6dSopenharmony_ci    uint8_t *pDestLimit = (pDest!=NULL)?(pDest + destCapacity):NULL;
8042e5b6d6dSopenharmony_ci    int32_t numSubstitutions;
8052e5b6d6dSopenharmony_ci
8062e5b6d6dSopenharmony_ci    /* args check */
8072e5b6d6dSopenharmony_ci    if(U_FAILURE(*pErrorCode)){
8082e5b6d6dSopenharmony_ci        return NULL;
8092e5b6d6dSopenharmony_ci    }
8102e5b6d6dSopenharmony_ci
8112e5b6d6dSopenharmony_ci    if( (pSrc==NULL && srcLength!=0) || srcLength < -1 ||
8122e5b6d6dSopenharmony_ci        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
8132e5b6d6dSopenharmony_ci        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
8142e5b6d6dSopenharmony_ci    ) {
8152e5b6d6dSopenharmony_ci        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
8162e5b6d6dSopenharmony_ci        return NULL;
8172e5b6d6dSopenharmony_ci    }
8182e5b6d6dSopenharmony_ci
8192e5b6d6dSopenharmony_ci    if(pNumSubstitutions!=NULL) {
8202e5b6d6dSopenharmony_ci        *pNumSubstitutions=0;
8212e5b6d6dSopenharmony_ci    }
8222e5b6d6dSopenharmony_ci    numSubstitutions=0;
8232e5b6d6dSopenharmony_ci
8242e5b6d6dSopenharmony_ci    if(srcLength==-1) {
8252e5b6d6dSopenharmony_ci        while((ch=*pSrc)!=0) {
8262e5b6d6dSopenharmony_ci            ++pSrc;
8272e5b6d6dSopenharmony_ci            if(ch <= 0x7f) {
8282e5b6d6dSopenharmony_ci                if(pDest<pDestLimit) {
8292e5b6d6dSopenharmony_ci                    *pDest++ = (uint8_t)ch;
8302e5b6d6dSopenharmony_ci                } else {
8312e5b6d6dSopenharmony_ci                    reqLength = 1;
8322e5b6d6dSopenharmony_ci                    break;
8332e5b6d6dSopenharmony_ci                }
8342e5b6d6dSopenharmony_ci            } else if(ch <= 0x7ff) {
8352e5b6d6dSopenharmony_ci                if((pDestLimit - pDest) >= 2) {
8362e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)((ch>>6)|0xc0);
8372e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
8382e5b6d6dSopenharmony_ci                } else {
8392e5b6d6dSopenharmony_ci                    reqLength = 2;
8402e5b6d6dSopenharmony_ci                    break;
8412e5b6d6dSopenharmony_ci                }
8422e5b6d6dSopenharmony_ci            } else if(ch <= 0xd7ff || ch >= 0xe000) {
8432e5b6d6dSopenharmony_ci                if((pDestLimit - pDest) >= 3) {
8442e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)((ch>>12)|0xe0);
8452e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
8462e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
8472e5b6d6dSopenharmony_ci                } else {
8482e5b6d6dSopenharmony_ci                    reqLength = 3;
8492e5b6d6dSopenharmony_ci                    break;
8502e5b6d6dSopenharmony_ci                }
8512e5b6d6dSopenharmony_ci            } else /* ch is a surrogate */ {
8522e5b6d6dSopenharmony_ci                int32_t length;
8532e5b6d6dSopenharmony_ci
8542e5b6d6dSopenharmony_ci                /*need not check for NUL because NUL fails U16_IS_TRAIL() anyway*/
8552e5b6d6dSopenharmony_ci                if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
8562e5b6d6dSopenharmony_ci                    ++pSrc;
8572e5b6d6dSopenharmony_ci                    ch=U16_GET_SUPPLEMENTARY(ch, ch2);
8582e5b6d6dSopenharmony_ci                } else if(subchar>=0) {
8592e5b6d6dSopenharmony_ci                    ch=subchar;
8602e5b6d6dSopenharmony_ci                    ++numSubstitutions;
8612e5b6d6dSopenharmony_ci                } else {
8622e5b6d6dSopenharmony_ci                    /* Unicode 3.2 forbids surrogate code points in UTF-8 */
8632e5b6d6dSopenharmony_ci                    *pErrorCode = U_INVALID_CHAR_FOUND;
8642e5b6d6dSopenharmony_ci                    return NULL;
8652e5b6d6dSopenharmony_ci                }
8662e5b6d6dSopenharmony_ci
8672e5b6d6dSopenharmony_ci                length = U8_LENGTH(ch);
8682e5b6d6dSopenharmony_ci                if((pDestLimit - pDest) >= length) {
8692e5b6d6dSopenharmony_ci                    /* convert and append*/
8702e5b6d6dSopenharmony_ci                    pDest=_appendUTF8(pDest, ch);
8712e5b6d6dSopenharmony_ci                } else {
8722e5b6d6dSopenharmony_ci                    reqLength = length;
8732e5b6d6dSopenharmony_ci                    break;
8742e5b6d6dSopenharmony_ci                }
8752e5b6d6dSopenharmony_ci            }
8762e5b6d6dSopenharmony_ci        }
8772e5b6d6dSopenharmony_ci        while((ch=*pSrc++)!=0) {
8782e5b6d6dSopenharmony_ci            if(ch<=0x7f) {
8792e5b6d6dSopenharmony_ci                ++reqLength;
8802e5b6d6dSopenharmony_ci            } else if(ch<=0x7ff) {
8812e5b6d6dSopenharmony_ci                reqLength+=2;
8822e5b6d6dSopenharmony_ci            } else if(!U16_IS_SURROGATE(ch)) {
8832e5b6d6dSopenharmony_ci                reqLength+=3;
8842e5b6d6dSopenharmony_ci            } else if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
8852e5b6d6dSopenharmony_ci                ++pSrc;
8862e5b6d6dSopenharmony_ci                reqLength+=4;
8872e5b6d6dSopenharmony_ci            } else if(subchar>=0) {
8882e5b6d6dSopenharmony_ci                reqLength+=U8_LENGTH(subchar);
8892e5b6d6dSopenharmony_ci                ++numSubstitutions;
8902e5b6d6dSopenharmony_ci            } else {
8912e5b6d6dSopenharmony_ci                /* Unicode 3.2 forbids surrogate code points in UTF-8 */
8922e5b6d6dSopenharmony_ci                *pErrorCode = U_INVALID_CHAR_FOUND;
8932e5b6d6dSopenharmony_ci                return NULL;
8942e5b6d6dSopenharmony_ci            }
8952e5b6d6dSopenharmony_ci        }
8962e5b6d6dSopenharmony_ci    } else {
8972e5b6d6dSopenharmony_ci        const UChar *pSrcLimit = (pSrc!=NULL)?(pSrc+srcLength):NULL;
8982e5b6d6dSopenharmony_ci        int32_t count;
8992e5b6d6dSopenharmony_ci
9002e5b6d6dSopenharmony_ci        /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
9012e5b6d6dSopenharmony_ci        for(;;) {
9022e5b6d6dSopenharmony_ci            /*
9032e5b6d6dSopenharmony_ci             * Each iteration of the inner loop progresses by at most 3 UTF-8
9042e5b6d6dSopenharmony_ci             * bytes and one UChar, for most characters.
9052e5b6d6dSopenharmony_ci             * For supplementary code points (4 & 2), which are rare,
9062e5b6d6dSopenharmony_ci             * there is an additional adjustment.
9072e5b6d6dSopenharmony_ci             */
9082e5b6d6dSopenharmony_ci            count = (int32_t)((pDestLimit - pDest) / 3);
9092e5b6d6dSopenharmony_ci            srcLength = (int32_t)(pSrcLimit - pSrc);
9102e5b6d6dSopenharmony_ci            if(count > srcLength) {
9112e5b6d6dSopenharmony_ci                count = srcLength; /* min(remaining dest/3, remaining src) */
9122e5b6d6dSopenharmony_ci            }
9132e5b6d6dSopenharmony_ci            if(count < 3) {
9142e5b6d6dSopenharmony_ci                /*
9152e5b6d6dSopenharmony_ci                 * Too much overhead if we get near the end of the string,
9162e5b6d6dSopenharmony_ci                 * continue with the next loop.
9172e5b6d6dSopenharmony_ci                 */
9182e5b6d6dSopenharmony_ci                break;
9192e5b6d6dSopenharmony_ci            }
9202e5b6d6dSopenharmony_ci            do {
9212e5b6d6dSopenharmony_ci                ch=*pSrc++;
9222e5b6d6dSopenharmony_ci                if(ch <= 0x7f) {
9232e5b6d6dSopenharmony_ci                    *pDest++ = (uint8_t)ch;
9242e5b6d6dSopenharmony_ci                } else if(ch <= 0x7ff) {
9252e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)((ch>>6)|0xc0);
9262e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
9272e5b6d6dSopenharmony_ci                } else if(ch <= 0xd7ff || ch >= 0xe000) {
9282e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)((ch>>12)|0xe0);
9292e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
9302e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
9312e5b6d6dSopenharmony_ci                } else /* ch is a surrogate */ {
9322e5b6d6dSopenharmony_ci                    /*
9332e5b6d6dSopenharmony_ci                     * We will read two UChars and probably output four bytes,
9342e5b6d6dSopenharmony_ci                     * which we didn't account for with computing count,
9352e5b6d6dSopenharmony_ci                     * so we adjust it here.
9362e5b6d6dSopenharmony_ci                     */
9372e5b6d6dSopenharmony_ci                    if(--count == 0) {
9382e5b6d6dSopenharmony_ci                        --pSrc; /* undo ch=*pSrc++ for the lead surrogate */
9392e5b6d6dSopenharmony_ci                        break;  /* recompute count */
9402e5b6d6dSopenharmony_ci                    }
9412e5b6d6dSopenharmony_ci
9422e5b6d6dSopenharmony_ci                    if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
9432e5b6d6dSopenharmony_ci                        ++pSrc;
9442e5b6d6dSopenharmony_ci                        ch=U16_GET_SUPPLEMENTARY(ch, ch2);
9452e5b6d6dSopenharmony_ci
9462e5b6d6dSopenharmony_ci                        /* writing 4 bytes per 2 UChars is ok */
9472e5b6d6dSopenharmony_ci                        *pDest++=(uint8_t)((ch>>18)|0xf0);
9482e5b6d6dSopenharmony_ci                        *pDest++=(uint8_t)(((ch>>12)&0x3f)|0x80);
9492e5b6d6dSopenharmony_ci                        *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
9502e5b6d6dSopenharmony_ci                        *pDest++=(uint8_t)((ch&0x3f)|0x80);
9512e5b6d6dSopenharmony_ci                    } else  {
9522e5b6d6dSopenharmony_ci                        /* Unicode 3.2 forbids surrogate code points in UTF-8 */
9532e5b6d6dSopenharmony_ci                        if(subchar>=0) {
9542e5b6d6dSopenharmony_ci                            ch=subchar;
9552e5b6d6dSopenharmony_ci                            ++numSubstitutions;
9562e5b6d6dSopenharmony_ci                        } else {
9572e5b6d6dSopenharmony_ci                            *pErrorCode = U_INVALID_CHAR_FOUND;
9582e5b6d6dSopenharmony_ci                            return NULL;
9592e5b6d6dSopenharmony_ci                        }
9602e5b6d6dSopenharmony_ci
9612e5b6d6dSopenharmony_ci                        /* convert and append*/
9622e5b6d6dSopenharmony_ci                        pDest=_appendUTF8(pDest, ch);
9632e5b6d6dSopenharmony_ci                    }
9642e5b6d6dSopenharmony_ci                }
9652e5b6d6dSopenharmony_ci            } while(--count > 0);
9662e5b6d6dSopenharmony_ci        }
9672e5b6d6dSopenharmony_ci
9682e5b6d6dSopenharmony_ci        while(pSrc<pSrcLimit) {
9692e5b6d6dSopenharmony_ci            ch=*pSrc++;
9702e5b6d6dSopenharmony_ci            if(ch <= 0x7f) {
9712e5b6d6dSopenharmony_ci                if(pDest<pDestLimit) {
9722e5b6d6dSopenharmony_ci                    *pDest++ = (uint8_t)ch;
9732e5b6d6dSopenharmony_ci                } else {
9742e5b6d6dSopenharmony_ci                    reqLength = 1;
9752e5b6d6dSopenharmony_ci                    break;
9762e5b6d6dSopenharmony_ci                }
9772e5b6d6dSopenharmony_ci            } else if(ch <= 0x7ff) {
9782e5b6d6dSopenharmony_ci                if((pDestLimit - pDest) >= 2) {
9792e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)((ch>>6)|0xc0);
9802e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
9812e5b6d6dSopenharmony_ci                } else {
9822e5b6d6dSopenharmony_ci                    reqLength = 2;
9832e5b6d6dSopenharmony_ci                    break;
9842e5b6d6dSopenharmony_ci                }
9852e5b6d6dSopenharmony_ci            } else if(ch <= 0xd7ff || ch >= 0xe000) {
9862e5b6d6dSopenharmony_ci                if((pDestLimit - pDest) >= 3) {
9872e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)((ch>>12)|0xe0);
9882e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
9892e5b6d6dSopenharmony_ci                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
9902e5b6d6dSopenharmony_ci                } else {
9912e5b6d6dSopenharmony_ci                    reqLength = 3;
9922e5b6d6dSopenharmony_ci                    break;
9932e5b6d6dSopenharmony_ci                }
9942e5b6d6dSopenharmony_ci            } else /* ch is a surrogate */ {
9952e5b6d6dSopenharmony_ci                int32_t length;
9962e5b6d6dSopenharmony_ci
9972e5b6d6dSopenharmony_ci                if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) {
9982e5b6d6dSopenharmony_ci                    ++pSrc;
9992e5b6d6dSopenharmony_ci                    ch=U16_GET_SUPPLEMENTARY(ch, ch2);
10002e5b6d6dSopenharmony_ci                } else if(subchar>=0) {
10012e5b6d6dSopenharmony_ci                    ch=subchar;
10022e5b6d6dSopenharmony_ci                    ++numSubstitutions;
10032e5b6d6dSopenharmony_ci                } else {
10042e5b6d6dSopenharmony_ci                    /* Unicode 3.2 forbids surrogate code points in UTF-8 */
10052e5b6d6dSopenharmony_ci                    *pErrorCode = U_INVALID_CHAR_FOUND;
10062e5b6d6dSopenharmony_ci                    return NULL;
10072e5b6d6dSopenharmony_ci                }
10082e5b6d6dSopenharmony_ci
10092e5b6d6dSopenharmony_ci                length = U8_LENGTH(ch);
10102e5b6d6dSopenharmony_ci                if((pDestLimit - pDest) >= length) {
10112e5b6d6dSopenharmony_ci                    /* convert and append*/
10122e5b6d6dSopenharmony_ci                    pDest=_appendUTF8(pDest, ch);
10132e5b6d6dSopenharmony_ci                } else {
10142e5b6d6dSopenharmony_ci                    reqLength = length;
10152e5b6d6dSopenharmony_ci                    break;
10162e5b6d6dSopenharmony_ci                }
10172e5b6d6dSopenharmony_ci            }
10182e5b6d6dSopenharmony_ci        }
10192e5b6d6dSopenharmony_ci        while(pSrc<pSrcLimit) {
10202e5b6d6dSopenharmony_ci            ch=*pSrc++;
10212e5b6d6dSopenharmony_ci            if(ch<=0x7f) {
10222e5b6d6dSopenharmony_ci                ++reqLength;
10232e5b6d6dSopenharmony_ci            } else if(ch<=0x7ff) {
10242e5b6d6dSopenharmony_ci                reqLength+=2;
10252e5b6d6dSopenharmony_ci            } else if(!U16_IS_SURROGATE(ch)) {
10262e5b6d6dSopenharmony_ci                reqLength+=3;
10272e5b6d6dSopenharmony_ci            } else if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) {
10282e5b6d6dSopenharmony_ci                ++pSrc;
10292e5b6d6dSopenharmony_ci                reqLength+=4;
10302e5b6d6dSopenharmony_ci            } else if(subchar>=0) {
10312e5b6d6dSopenharmony_ci                reqLength+=U8_LENGTH(subchar);
10322e5b6d6dSopenharmony_ci                ++numSubstitutions;
10332e5b6d6dSopenharmony_ci            } else {
10342e5b6d6dSopenharmony_ci                /* Unicode 3.2 forbids surrogate code points in UTF-8 */
10352e5b6d6dSopenharmony_ci                *pErrorCode = U_INVALID_CHAR_FOUND;
10362e5b6d6dSopenharmony_ci                return NULL;
10372e5b6d6dSopenharmony_ci            }
10382e5b6d6dSopenharmony_ci        }
10392e5b6d6dSopenharmony_ci    }
10402e5b6d6dSopenharmony_ci
10412e5b6d6dSopenharmony_ci    reqLength+=(int32_t)(pDest - (uint8_t *)dest);
10422e5b6d6dSopenharmony_ci
10432e5b6d6dSopenharmony_ci    if(pNumSubstitutions!=NULL) {
10442e5b6d6dSopenharmony_ci        *pNumSubstitutions=numSubstitutions;
10452e5b6d6dSopenharmony_ci    }
10462e5b6d6dSopenharmony_ci
10472e5b6d6dSopenharmony_ci    if(pDestLength){
10482e5b6d6dSopenharmony_ci        *pDestLength = reqLength;
10492e5b6d6dSopenharmony_ci    }
10502e5b6d6dSopenharmony_ci
10512e5b6d6dSopenharmony_ci    /* Terminate the buffer */
10522e5b6d6dSopenharmony_ci    u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
10532e5b6d6dSopenharmony_ci    return dest;
10542e5b6d6dSopenharmony_ci}
10552e5b6d6dSopenharmony_ci
10562e5b6d6dSopenharmony_ciU_CAPI char* U_EXPORT2
10572e5b6d6dSopenharmony_ciu_strToUTF8(char *dest,
10582e5b6d6dSopenharmony_ci            int32_t destCapacity,
10592e5b6d6dSopenharmony_ci            int32_t *pDestLength,
10602e5b6d6dSopenharmony_ci            const UChar *pSrc,
10612e5b6d6dSopenharmony_ci            int32_t srcLength,
10622e5b6d6dSopenharmony_ci            UErrorCode *pErrorCode){
10632e5b6d6dSopenharmony_ci    return u_strToUTF8WithSub(
10642e5b6d6dSopenharmony_ci            dest, destCapacity, pDestLength,
10652e5b6d6dSopenharmony_ci            pSrc, srcLength,
10662e5b6d6dSopenharmony_ci            U_SENTINEL, NULL,
10672e5b6d6dSopenharmony_ci            pErrorCode);
10682e5b6d6dSopenharmony_ci}
10692e5b6d6dSopenharmony_ci
10702e5b6d6dSopenharmony_ciU_CAPI UChar* U_EXPORT2
10712e5b6d6dSopenharmony_ciu_strFromJavaModifiedUTF8WithSub(
10722e5b6d6dSopenharmony_ci        UChar *dest,
10732e5b6d6dSopenharmony_ci        int32_t destCapacity,
10742e5b6d6dSopenharmony_ci        int32_t *pDestLength,
10752e5b6d6dSopenharmony_ci        const char *src,
10762e5b6d6dSopenharmony_ci        int32_t srcLength,
10772e5b6d6dSopenharmony_ci        UChar32 subchar, int32_t *pNumSubstitutions,
10782e5b6d6dSopenharmony_ci        UErrorCode *pErrorCode) {
10792e5b6d6dSopenharmony_ci    /* args check */
10802e5b6d6dSopenharmony_ci    if(U_FAILURE(*pErrorCode)) {
10812e5b6d6dSopenharmony_ci        return NULL;
10822e5b6d6dSopenharmony_ci    }
10832e5b6d6dSopenharmony_ci    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
10842e5b6d6dSopenharmony_ci        (dest==NULL && destCapacity!=0) || destCapacity<0 ||
10852e5b6d6dSopenharmony_ci        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
10862e5b6d6dSopenharmony_ci    ) {
10872e5b6d6dSopenharmony_ci        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
10882e5b6d6dSopenharmony_ci        return NULL;
10892e5b6d6dSopenharmony_ci    }
10902e5b6d6dSopenharmony_ci
10912e5b6d6dSopenharmony_ci    if(pNumSubstitutions!=NULL) {
10922e5b6d6dSopenharmony_ci        *pNumSubstitutions=0;
10932e5b6d6dSopenharmony_ci    }
10942e5b6d6dSopenharmony_ci    UChar *pDest = dest;
10952e5b6d6dSopenharmony_ci    UChar *pDestLimit = dest+destCapacity;
10962e5b6d6dSopenharmony_ci    int32_t reqLength = 0;
10972e5b6d6dSopenharmony_ci    int32_t numSubstitutions=0;
10982e5b6d6dSopenharmony_ci
10992e5b6d6dSopenharmony_ci    if(srcLength < 0) {
11002e5b6d6dSopenharmony_ci        /*
11012e5b6d6dSopenharmony_ci         * Transform a NUL-terminated ASCII string.
11022e5b6d6dSopenharmony_ci         * Handle non-ASCII strings with slower code.
11032e5b6d6dSopenharmony_ci         */
11042e5b6d6dSopenharmony_ci        UChar32 c;
11052e5b6d6dSopenharmony_ci        while(((c = (uint8_t)*src) != 0) && c <= 0x7f && (pDest < pDestLimit)) {
11062e5b6d6dSopenharmony_ci            *pDest++=(UChar)c;
11072e5b6d6dSopenharmony_ci            ++src;
11082e5b6d6dSopenharmony_ci        }
11092e5b6d6dSopenharmony_ci        if(c == 0) {
11102e5b6d6dSopenharmony_ci            reqLength=(int32_t)(pDest - dest);
11112e5b6d6dSopenharmony_ci            if(pDestLength) {
11122e5b6d6dSopenharmony_ci                *pDestLength = reqLength;
11132e5b6d6dSopenharmony_ci            }
11142e5b6d6dSopenharmony_ci
11152e5b6d6dSopenharmony_ci            /* Terminate the buffer */
11162e5b6d6dSopenharmony_ci            u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
11172e5b6d6dSopenharmony_ci            return dest;
11182e5b6d6dSopenharmony_ci        }
11192e5b6d6dSopenharmony_ci        srcLength = static_cast<int32_t>(uprv_strlen(src));
11202e5b6d6dSopenharmony_ci    }
11212e5b6d6dSopenharmony_ci
11222e5b6d6dSopenharmony_ci    /* Faster loop without ongoing checking for srcLength and pDestLimit. */
11232e5b6d6dSopenharmony_ci    UChar32 ch;
11242e5b6d6dSopenharmony_ci    uint8_t t1, t2;
11252e5b6d6dSopenharmony_ci    int32_t i = 0;
11262e5b6d6dSopenharmony_ci    for(;;) {
11272e5b6d6dSopenharmony_ci        int32_t count = (int32_t)(pDestLimit - pDest);
11282e5b6d6dSopenharmony_ci        int32_t count2 = srcLength - i;
11292e5b6d6dSopenharmony_ci        if(count >= count2 && srcLength > 0 && U8_IS_SINGLE(*src)) {
11302e5b6d6dSopenharmony_ci            /* fast ASCII loop */
11312e5b6d6dSopenharmony_ci            int32_t start = i;
11322e5b6d6dSopenharmony_ci            uint8_t b;
11332e5b6d6dSopenharmony_ci            while(i < srcLength && U8_IS_SINGLE(b = src[i])) {
11342e5b6d6dSopenharmony_ci                *pDest++=b;
11352e5b6d6dSopenharmony_ci                ++i;
11362e5b6d6dSopenharmony_ci            }
11372e5b6d6dSopenharmony_ci            int32_t delta = i - start;
11382e5b6d6dSopenharmony_ci            count -= delta;
11392e5b6d6dSopenharmony_ci            count2 -= delta;
11402e5b6d6dSopenharmony_ci        }
11412e5b6d6dSopenharmony_ci        /*
11422e5b6d6dSopenharmony_ci         * Each iteration of the inner loop progresses by at most 3 UTF-8
11432e5b6d6dSopenharmony_ci         * bytes and one UChar.
11442e5b6d6dSopenharmony_ci         */
11452e5b6d6dSopenharmony_ci        if(subchar > 0xFFFF) {
11462e5b6d6dSopenharmony_ci            break;
11472e5b6d6dSopenharmony_ci        }
11482e5b6d6dSopenharmony_ci        count2 /= 3;
11492e5b6d6dSopenharmony_ci        if(count > count2) {
11502e5b6d6dSopenharmony_ci            count = count2; /* min(remaining dest, remaining src/3) */
11512e5b6d6dSopenharmony_ci        }
11522e5b6d6dSopenharmony_ci        if(count < 3) {
11532e5b6d6dSopenharmony_ci            /*
11542e5b6d6dSopenharmony_ci             * Too much overhead if we get near the end of the string,
11552e5b6d6dSopenharmony_ci             * continue with the next loop.
11562e5b6d6dSopenharmony_ci             */
11572e5b6d6dSopenharmony_ci            break;
11582e5b6d6dSopenharmony_ci        }
11592e5b6d6dSopenharmony_ci        do {
11602e5b6d6dSopenharmony_ci            ch = (uint8_t)src[i++];
11612e5b6d6dSopenharmony_ci            if(U8_IS_SINGLE(ch)) {
11622e5b6d6dSopenharmony_ci                *pDest++=(UChar)ch;
11632e5b6d6dSopenharmony_ci            } else {
11642e5b6d6dSopenharmony_ci                if(ch >= 0xe0) {
11652e5b6d6dSopenharmony_ci                    if( /* handle U+0000..U+FFFF inline */
11662e5b6d6dSopenharmony_ci                        ch <= 0xef &&
11672e5b6d6dSopenharmony_ci                        (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f &&
11682e5b6d6dSopenharmony_ci                        (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f
11692e5b6d6dSopenharmony_ci                    ) {
11702e5b6d6dSopenharmony_ci                        /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
11712e5b6d6dSopenharmony_ci                        *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
11722e5b6d6dSopenharmony_ci                        i += 2;
11732e5b6d6dSopenharmony_ci                        continue;
11742e5b6d6dSopenharmony_ci                    }
11752e5b6d6dSopenharmony_ci                } else {
11762e5b6d6dSopenharmony_ci                    if( /* handle U+0000..U+07FF inline */
11772e5b6d6dSopenharmony_ci                        ch >= 0xc0 &&
11782e5b6d6dSopenharmony_ci                        (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f
11792e5b6d6dSopenharmony_ci                    ) {
11802e5b6d6dSopenharmony_ci                        *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
11812e5b6d6dSopenharmony_ci                        ++i;
11822e5b6d6dSopenharmony_ci                        continue;
11832e5b6d6dSopenharmony_ci                    }
11842e5b6d6dSopenharmony_ci                }
11852e5b6d6dSopenharmony_ci
11862e5b6d6dSopenharmony_ci                if(subchar < 0) {
11872e5b6d6dSopenharmony_ci                    *pErrorCode = U_INVALID_CHAR_FOUND;
11882e5b6d6dSopenharmony_ci                    return NULL;
11892e5b6d6dSopenharmony_ci                } else if(subchar > 0xffff && --count == 0) {
11902e5b6d6dSopenharmony_ci                    /*
11912e5b6d6dSopenharmony_ci                     * We need to write two UChars, adjusted count for that,
11922e5b6d6dSopenharmony_ci                     * and ran out of space.
11932e5b6d6dSopenharmony_ci                     */
11942e5b6d6dSopenharmony_ci                    --i;  // back out byte ch
11952e5b6d6dSopenharmony_ci                    break;
11962e5b6d6dSopenharmony_ci                } else {
11972e5b6d6dSopenharmony_ci                    /* function call for error cases */
11982e5b6d6dSopenharmony_ci                    utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1);
11992e5b6d6dSopenharmony_ci                    ++numSubstitutions;
12002e5b6d6dSopenharmony_ci                    *(pDest++)=(UChar)subchar;
12012e5b6d6dSopenharmony_ci                }
12022e5b6d6dSopenharmony_ci            }
12032e5b6d6dSopenharmony_ci        } while(--count > 0);
12042e5b6d6dSopenharmony_ci    }
12052e5b6d6dSopenharmony_ci
12062e5b6d6dSopenharmony_ci    while(i < srcLength && (pDest < pDestLimit)) {
12072e5b6d6dSopenharmony_ci        ch = (uint8_t)src[i++];
12082e5b6d6dSopenharmony_ci        if(U8_IS_SINGLE(ch)){
12092e5b6d6dSopenharmony_ci            *pDest++=(UChar)ch;
12102e5b6d6dSopenharmony_ci        } else {
12112e5b6d6dSopenharmony_ci            if(ch >= 0xe0) {
12122e5b6d6dSopenharmony_ci                if( /* handle U+0000..U+FFFF inline */
12132e5b6d6dSopenharmony_ci                    ch <= 0xef &&
12142e5b6d6dSopenharmony_ci                    (i+1) < srcLength &&
12152e5b6d6dSopenharmony_ci                    (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f &&
12162e5b6d6dSopenharmony_ci                    (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f
12172e5b6d6dSopenharmony_ci                ) {
12182e5b6d6dSopenharmony_ci                    /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
12192e5b6d6dSopenharmony_ci                    *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
12202e5b6d6dSopenharmony_ci                    i += 2;
12212e5b6d6dSopenharmony_ci                    continue;
12222e5b6d6dSopenharmony_ci                }
12232e5b6d6dSopenharmony_ci            } else {
12242e5b6d6dSopenharmony_ci                if( /* handle U+0000..U+07FF inline */
12252e5b6d6dSopenharmony_ci                    ch >= 0xc0 &&
12262e5b6d6dSopenharmony_ci                    i < srcLength &&
12272e5b6d6dSopenharmony_ci                    (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f
12282e5b6d6dSopenharmony_ci                ) {
12292e5b6d6dSopenharmony_ci                    *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
12302e5b6d6dSopenharmony_ci                    ++i;
12312e5b6d6dSopenharmony_ci                    continue;
12322e5b6d6dSopenharmony_ci                }
12332e5b6d6dSopenharmony_ci            }
12342e5b6d6dSopenharmony_ci
12352e5b6d6dSopenharmony_ci            if(subchar < 0) {
12362e5b6d6dSopenharmony_ci                *pErrorCode = U_INVALID_CHAR_FOUND;
12372e5b6d6dSopenharmony_ci                return NULL;
12382e5b6d6dSopenharmony_ci            } else {
12392e5b6d6dSopenharmony_ci                /* function call for error cases */
12402e5b6d6dSopenharmony_ci                utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1);
12412e5b6d6dSopenharmony_ci                ++numSubstitutions;
12422e5b6d6dSopenharmony_ci                if(subchar<=0xFFFF) {
12432e5b6d6dSopenharmony_ci                    *(pDest++)=(UChar)subchar;
12442e5b6d6dSopenharmony_ci                } else {
12452e5b6d6dSopenharmony_ci                    *(pDest++)=U16_LEAD(subchar);
12462e5b6d6dSopenharmony_ci                    if(pDest<pDestLimit) {
12472e5b6d6dSopenharmony_ci                        *(pDest++)=U16_TRAIL(subchar);
12482e5b6d6dSopenharmony_ci                    } else {
12492e5b6d6dSopenharmony_ci                        reqLength++;
12502e5b6d6dSopenharmony_ci                        break;
12512e5b6d6dSopenharmony_ci                    }
12522e5b6d6dSopenharmony_ci                }
12532e5b6d6dSopenharmony_ci            }
12542e5b6d6dSopenharmony_ci        }
12552e5b6d6dSopenharmony_ci    }
12562e5b6d6dSopenharmony_ci
12572e5b6d6dSopenharmony_ci    /* Pre-flight the rest of the string. */
12582e5b6d6dSopenharmony_ci    while(i < srcLength) {
12592e5b6d6dSopenharmony_ci        ch = (uint8_t)src[i++];
12602e5b6d6dSopenharmony_ci        if(U8_IS_SINGLE(ch)) {
12612e5b6d6dSopenharmony_ci            reqLength++;
12622e5b6d6dSopenharmony_ci        } else {
12632e5b6d6dSopenharmony_ci            if(ch >= 0xe0) {
12642e5b6d6dSopenharmony_ci                if( /* handle U+0000..U+FFFF inline */
12652e5b6d6dSopenharmony_ci                    ch <= 0xef &&
12662e5b6d6dSopenharmony_ci                    (i+1) < srcLength &&
12672e5b6d6dSopenharmony_ci                    (uint8_t)(src[i] - 0x80) <= 0x3f &&
12682e5b6d6dSopenharmony_ci                    (uint8_t)(src[i+1] - 0x80) <= 0x3f
12692e5b6d6dSopenharmony_ci                ) {
12702e5b6d6dSopenharmony_ci                    reqLength++;
12712e5b6d6dSopenharmony_ci                    i += 2;
12722e5b6d6dSopenharmony_ci                    continue;
12732e5b6d6dSopenharmony_ci                }
12742e5b6d6dSopenharmony_ci            } else {
12752e5b6d6dSopenharmony_ci                if( /* handle U+0000..U+07FF inline */
12762e5b6d6dSopenharmony_ci                    ch >= 0xc0 &&
12772e5b6d6dSopenharmony_ci                    i < srcLength &&
12782e5b6d6dSopenharmony_ci                    (uint8_t)(src[i] - 0x80) <= 0x3f
12792e5b6d6dSopenharmony_ci                ) {
12802e5b6d6dSopenharmony_ci                    reqLength++;
12812e5b6d6dSopenharmony_ci                    ++i;
12822e5b6d6dSopenharmony_ci                    continue;
12832e5b6d6dSopenharmony_ci                }
12842e5b6d6dSopenharmony_ci            }
12852e5b6d6dSopenharmony_ci
12862e5b6d6dSopenharmony_ci            if(subchar < 0) {
12872e5b6d6dSopenharmony_ci                *pErrorCode = U_INVALID_CHAR_FOUND;
12882e5b6d6dSopenharmony_ci                return NULL;
12892e5b6d6dSopenharmony_ci            } else {
12902e5b6d6dSopenharmony_ci                /* function call for error cases */
12912e5b6d6dSopenharmony_ci                utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1);
12922e5b6d6dSopenharmony_ci                ++numSubstitutions;
12932e5b6d6dSopenharmony_ci                reqLength+=U16_LENGTH(ch);
12942e5b6d6dSopenharmony_ci            }
12952e5b6d6dSopenharmony_ci        }
12962e5b6d6dSopenharmony_ci    }
12972e5b6d6dSopenharmony_ci
12982e5b6d6dSopenharmony_ci    if(pNumSubstitutions!=NULL) {
12992e5b6d6dSopenharmony_ci        *pNumSubstitutions=numSubstitutions;
13002e5b6d6dSopenharmony_ci    }
13012e5b6d6dSopenharmony_ci
13022e5b6d6dSopenharmony_ci    reqLength+=(int32_t)(pDest - dest);
13032e5b6d6dSopenharmony_ci    if(pDestLength) {
13042e5b6d6dSopenharmony_ci        *pDestLength = reqLength;
13052e5b6d6dSopenharmony_ci    }
13062e5b6d6dSopenharmony_ci
13072e5b6d6dSopenharmony_ci    /* Terminate the buffer */
13082e5b6d6dSopenharmony_ci    u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
13092e5b6d6dSopenharmony_ci    return dest;
13102e5b6d6dSopenharmony_ci}
13112e5b6d6dSopenharmony_ci
13122e5b6d6dSopenharmony_ciU_CAPI char* U_EXPORT2
13132e5b6d6dSopenharmony_ciu_strToJavaModifiedUTF8(
13142e5b6d6dSopenharmony_ci        char *dest,
13152e5b6d6dSopenharmony_ci        int32_t destCapacity,
13162e5b6d6dSopenharmony_ci        int32_t *pDestLength,
13172e5b6d6dSopenharmony_ci        const UChar *src,
13182e5b6d6dSopenharmony_ci        int32_t srcLength,
13192e5b6d6dSopenharmony_ci        UErrorCode *pErrorCode) {
13202e5b6d6dSopenharmony_ci    int32_t reqLength=0;
13212e5b6d6dSopenharmony_ci    uint32_t ch=0;
13222e5b6d6dSopenharmony_ci    uint8_t *pDest = (uint8_t *)dest;
13232e5b6d6dSopenharmony_ci    uint8_t *pDestLimit = pDest + destCapacity;
13242e5b6d6dSopenharmony_ci    const UChar *pSrcLimit;
13252e5b6d6dSopenharmony_ci    int32_t count;
13262e5b6d6dSopenharmony_ci
13272e5b6d6dSopenharmony_ci    /* args check */
13282e5b6d6dSopenharmony_ci    if(U_FAILURE(*pErrorCode)){
13292e5b6d6dSopenharmony_ci        return NULL;
13302e5b6d6dSopenharmony_ci    }
13312e5b6d6dSopenharmony_ci    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
13322e5b6d6dSopenharmony_ci        (dest==NULL && destCapacity!=0) || destCapacity<0
13332e5b6d6dSopenharmony_ci    ) {
13342e5b6d6dSopenharmony_ci        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
13352e5b6d6dSopenharmony_ci        return NULL;
13362e5b6d6dSopenharmony_ci    }
13372e5b6d6dSopenharmony_ci
13382e5b6d6dSopenharmony_ci    if(srcLength==-1) {
13392e5b6d6dSopenharmony_ci        /* Convert NUL-terminated ASCII, then find the string length. */
13402e5b6d6dSopenharmony_ci        while((ch=*src)<=0x7f && ch != 0 && pDest<pDestLimit) {
13412e5b6d6dSopenharmony_ci            *pDest++ = (uint8_t)ch;
13422e5b6d6dSopenharmony_ci            ++src;
13432e5b6d6dSopenharmony_ci        }
13442e5b6d6dSopenharmony_ci        if(ch == 0) {
13452e5b6d6dSopenharmony_ci            reqLength=(int32_t)(pDest - (uint8_t *)dest);
13462e5b6d6dSopenharmony_ci            if(pDestLength) {
13472e5b6d6dSopenharmony_ci                *pDestLength = reqLength;
13482e5b6d6dSopenharmony_ci            }
13492e5b6d6dSopenharmony_ci
13502e5b6d6dSopenharmony_ci            /* Terminate the buffer */
13512e5b6d6dSopenharmony_ci            u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
13522e5b6d6dSopenharmony_ci            return dest;
13532e5b6d6dSopenharmony_ci        }
13542e5b6d6dSopenharmony_ci        srcLength = u_strlen(src);
13552e5b6d6dSopenharmony_ci    }
13562e5b6d6dSopenharmony_ci
13572e5b6d6dSopenharmony_ci    /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
13582e5b6d6dSopenharmony_ci    pSrcLimit = (src!=NULL)?(src+srcLength):NULL;
13592e5b6d6dSopenharmony_ci    for(;;) {
13602e5b6d6dSopenharmony_ci        count = (int32_t)(pDestLimit - pDest);
13612e5b6d6dSopenharmony_ci        srcLength = (int32_t)(pSrcLimit - src);
13622e5b6d6dSopenharmony_ci        if(count >= srcLength && srcLength > 0 && *src <= 0x7f) {
13632e5b6d6dSopenharmony_ci            /* fast ASCII loop */
13642e5b6d6dSopenharmony_ci            const UChar *prevSrc = src;
13652e5b6d6dSopenharmony_ci            int32_t delta;
13662e5b6d6dSopenharmony_ci            while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) {
13672e5b6d6dSopenharmony_ci                *pDest++=(uint8_t)ch;
13682e5b6d6dSopenharmony_ci                ++src;
13692e5b6d6dSopenharmony_ci            }
13702e5b6d6dSopenharmony_ci            delta = (int32_t)(src - prevSrc);
13712e5b6d6dSopenharmony_ci            count -= delta;
13722e5b6d6dSopenharmony_ci            srcLength -= delta;
13732e5b6d6dSopenharmony_ci        }
13742e5b6d6dSopenharmony_ci        /*
13752e5b6d6dSopenharmony_ci         * Each iteration of the inner loop progresses by at most 3 UTF-8
13762e5b6d6dSopenharmony_ci         * bytes and one UChar.
13772e5b6d6dSopenharmony_ci         */
13782e5b6d6dSopenharmony_ci        count /= 3;
13792e5b6d6dSopenharmony_ci        if(count > srcLength) {
13802e5b6d6dSopenharmony_ci            count = srcLength; /* min(remaining dest/3, remaining src) */
13812e5b6d6dSopenharmony_ci        }
13822e5b6d6dSopenharmony_ci        if(count < 3) {
13832e5b6d6dSopenharmony_ci            /*
13842e5b6d6dSopenharmony_ci             * Too much overhead if we get near the end of the string,
13852e5b6d6dSopenharmony_ci             * continue with the next loop.
13862e5b6d6dSopenharmony_ci             */
13872e5b6d6dSopenharmony_ci            break;
13882e5b6d6dSopenharmony_ci        }
13892e5b6d6dSopenharmony_ci        do {
13902e5b6d6dSopenharmony_ci            ch=*src++;
13912e5b6d6dSopenharmony_ci            if(ch <= 0x7f && ch != 0) {
13922e5b6d6dSopenharmony_ci                *pDest++ = (uint8_t)ch;
13932e5b6d6dSopenharmony_ci            } else if(ch <= 0x7ff) {
13942e5b6d6dSopenharmony_ci                *pDest++=(uint8_t)((ch>>6)|0xc0);
13952e5b6d6dSopenharmony_ci                *pDest++=(uint8_t)((ch&0x3f)|0x80);
13962e5b6d6dSopenharmony_ci            } else {
13972e5b6d6dSopenharmony_ci                *pDest++=(uint8_t)((ch>>12)|0xe0);
13982e5b6d6dSopenharmony_ci                *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
13992e5b6d6dSopenharmony_ci                *pDest++=(uint8_t)((ch&0x3f)|0x80);
14002e5b6d6dSopenharmony_ci            }
14012e5b6d6dSopenharmony_ci        } while(--count > 0);
14022e5b6d6dSopenharmony_ci    }
14032e5b6d6dSopenharmony_ci
14042e5b6d6dSopenharmony_ci    while(src<pSrcLimit) {
14052e5b6d6dSopenharmony_ci        ch=*src++;
14062e5b6d6dSopenharmony_ci        if(ch <= 0x7f && ch != 0) {
14072e5b6d6dSopenharmony_ci            if(pDest<pDestLimit) {
14082e5b6d6dSopenharmony_ci                *pDest++ = (uint8_t)ch;
14092e5b6d6dSopenharmony_ci            } else {
14102e5b6d6dSopenharmony_ci                reqLength = 1;
14112e5b6d6dSopenharmony_ci                break;
14122e5b6d6dSopenharmony_ci            }
14132e5b6d6dSopenharmony_ci        } else if(ch <= 0x7ff) {
14142e5b6d6dSopenharmony_ci            if((pDestLimit - pDest) >= 2) {
14152e5b6d6dSopenharmony_ci                *pDest++=(uint8_t)((ch>>6)|0xc0);
14162e5b6d6dSopenharmony_ci                *pDest++=(uint8_t)((ch&0x3f)|0x80);
14172e5b6d6dSopenharmony_ci            } else {
14182e5b6d6dSopenharmony_ci                reqLength = 2;
14192e5b6d6dSopenharmony_ci                break;
14202e5b6d6dSopenharmony_ci            }
14212e5b6d6dSopenharmony_ci        } else {
14222e5b6d6dSopenharmony_ci            if((pDestLimit - pDest) >= 3) {
14232e5b6d6dSopenharmony_ci                *pDest++=(uint8_t)((ch>>12)|0xe0);
14242e5b6d6dSopenharmony_ci                *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
14252e5b6d6dSopenharmony_ci                *pDest++=(uint8_t)((ch&0x3f)|0x80);
14262e5b6d6dSopenharmony_ci            } else {
14272e5b6d6dSopenharmony_ci                reqLength = 3;
14282e5b6d6dSopenharmony_ci                break;
14292e5b6d6dSopenharmony_ci            }
14302e5b6d6dSopenharmony_ci        }
14312e5b6d6dSopenharmony_ci    }
14322e5b6d6dSopenharmony_ci    while(src<pSrcLimit) {
14332e5b6d6dSopenharmony_ci        ch=*src++;
14342e5b6d6dSopenharmony_ci        if(ch <= 0x7f && ch != 0) {
14352e5b6d6dSopenharmony_ci            ++reqLength;
14362e5b6d6dSopenharmony_ci        } else if(ch<=0x7ff) {
14372e5b6d6dSopenharmony_ci            reqLength+=2;
14382e5b6d6dSopenharmony_ci        } else {
14392e5b6d6dSopenharmony_ci            reqLength+=3;
14402e5b6d6dSopenharmony_ci        }
14412e5b6d6dSopenharmony_ci    }
14422e5b6d6dSopenharmony_ci
14432e5b6d6dSopenharmony_ci    reqLength+=(int32_t)(pDest - (uint8_t *)dest);
14442e5b6d6dSopenharmony_ci    if(pDestLength){
14452e5b6d6dSopenharmony_ci        *pDestLength = reqLength;
14462e5b6d6dSopenharmony_ci    }
14472e5b6d6dSopenharmony_ci
14482e5b6d6dSopenharmony_ci    /* Terminate the buffer */
14492e5b6d6dSopenharmony_ci    u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
14502e5b6d6dSopenharmony_ci    return dest;
14512e5b6d6dSopenharmony_ci}
1452