12e5b6d6dSopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 32e5b6d6dSopenharmony_ci/* 42e5b6d6dSopenharmony_ci****************************************************************************** 52e5b6d6dSopenharmony_ci* 62e5b6d6dSopenharmony_ci* Copyright (C) 2001-2016, International Business Machines 72e5b6d6dSopenharmony_ci* Corporation and others. All Rights Reserved. 82e5b6d6dSopenharmony_ci* 92e5b6d6dSopenharmony_ci****************************************************************************** 102e5b6d6dSopenharmony_ci* 112e5b6d6dSopenharmony_ci* File ustrtrns.cpp 122e5b6d6dSopenharmony_ci* 132e5b6d6dSopenharmony_ci* Modification History: 142e5b6d6dSopenharmony_ci* 152e5b6d6dSopenharmony_ci* Date Name Description 162e5b6d6dSopenharmony_ci* 9/10/2001 Ram Creation. 172e5b6d6dSopenharmony_ci****************************************************************************** 182e5b6d6dSopenharmony_ci*/ 192e5b6d6dSopenharmony_ci 202e5b6d6dSopenharmony_ci/******************************************************************************* 212e5b6d6dSopenharmony_ci * 222e5b6d6dSopenharmony_ci * u_strTo* and u_strFrom* APIs 232e5b6d6dSopenharmony_ci * WCS functions moved to ustr_wcs.c for better modularization 242e5b6d6dSopenharmony_ci * 252e5b6d6dSopenharmony_ci ******************************************************************************* 262e5b6d6dSopenharmony_ci */ 272e5b6d6dSopenharmony_ci 282e5b6d6dSopenharmony_ci 292e5b6d6dSopenharmony_ci#include "unicode/putil.h" 302e5b6d6dSopenharmony_ci#include "unicode/ustring.h" 312e5b6d6dSopenharmony_ci#include "unicode/utf.h" 322e5b6d6dSopenharmony_ci#include "unicode/utf8.h" 332e5b6d6dSopenharmony_ci#include "unicode/utf16.h" 342e5b6d6dSopenharmony_ci#include "cstring.h" 352e5b6d6dSopenharmony_ci#include "cmemory.h" 362e5b6d6dSopenharmony_ci#include "ustr_imp.h" 372e5b6d6dSopenharmony_ci#include "uassert.h" 382e5b6d6dSopenharmony_ci 392e5b6d6dSopenharmony_ciU_CAPI UChar* U_EXPORT2 402e5b6d6dSopenharmony_ciu_strFromUTF32WithSub(UChar *dest, 412e5b6d6dSopenharmony_ci int32_t destCapacity, 422e5b6d6dSopenharmony_ci int32_t *pDestLength, 432e5b6d6dSopenharmony_ci const UChar32 *src, 442e5b6d6dSopenharmony_ci int32_t srcLength, 452e5b6d6dSopenharmony_ci UChar32 subchar, int32_t *pNumSubstitutions, 462e5b6d6dSopenharmony_ci UErrorCode *pErrorCode) { 472e5b6d6dSopenharmony_ci const UChar32 *srcLimit; 482e5b6d6dSopenharmony_ci UChar32 ch; 492e5b6d6dSopenharmony_ci UChar *destLimit; 502e5b6d6dSopenharmony_ci UChar *pDest; 512e5b6d6dSopenharmony_ci int32_t reqLength; 522e5b6d6dSopenharmony_ci int32_t numSubstitutions; 532e5b6d6dSopenharmony_ci 542e5b6d6dSopenharmony_ci /* args check */ 552e5b6d6dSopenharmony_ci if(U_FAILURE(*pErrorCode)){ 562e5b6d6dSopenharmony_ci return NULL; 572e5b6d6dSopenharmony_ci } 582e5b6d6dSopenharmony_ci if( (src==NULL && srcLength!=0) || srcLength < -1 || 592e5b6d6dSopenharmony_ci (destCapacity<0) || (dest == NULL && destCapacity > 0) || 602e5b6d6dSopenharmony_ci subchar > 0x10ffff || U_IS_SURROGATE(subchar) 612e5b6d6dSopenharmony_ci ) { 622e5b6d6dSopenharmony_ci *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 632e5b6d6dSopenharmony_ci return NULL; 642e5b6d6dSopenharmony_ci } 652e5b6d6dSopenharmony_ci 662e5b6d6dSopenharmony_ci if(pNumSubstitutions != NULL) { 672e5b6d6dSopenharmony_ci *pNumSubstitutions = 0; 682e5b6d6dSopenharmony_ci } 692e5b6d6dSopenharmony_ci 702e5b6d6dSopenharmony_ci pDest = dest; 712e5b6d6dSopenharmony_ci destLimit = (dest!=NULL)?(dest + destCapacity):NULL; 722e5b6d6dSopenharmony_ci reqLength = 0; 732e5b6d6dSopenharmony_ci numSubstitutions = 0; 742e5b6d6dSopenharmony_ci 752e5b6d6dSopenharmony_ci if(srcLength < 0) { 762e5b6d6dSopenharmony_ci /* simple loop for conversion of a NUL-terminated BMP string */ 772e5b6d6dSopenharmony_ci while((ch=*src) != 0 && 782e5b6d6dSopenharmony_ci ((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) { 792e5b6d6dSopenharmony_ci ++src; 802e5b6d6dSopenharmony_ci if(pDest < destLimit) { 812e5b6d6dSopenharmony_ci *pDest++ = (UChar)ch; 822e5b6d6dSopenharmony_ci } else { 832e5b6d6dSopenharmony_ci ++reqLength; 842e5b6d6dSopenharmony_ci } 852e5b6d6dSopenharmony_ci } 862e5b6d6dSopenharmony_ci srcLimit = src; 872e5b6d6dSopenharmony_ci if(ch != 0) { 882e5b6d6dSopenharmony_ci /* "complicated" case, find the end of the remaining string */ 892e5b6d6dSopenharmony_ci while(*++srcLimit != 0) {} 902e5b6d6dSopenharmony_ci } 912e5b6d6dSopenharmony_ci } else { 922e5b6d6dSopenharmony_ci srcLimit = (src!=NULL)?(src + srcLength):NULL; 932e5b6d6dSopenharmony_ci } 942e5b6d6dSopenharmony_ci 952e5b6d6dSopenharmony_ci /* convert with length */ 962e5b6d6dSopenharmony_ci while(src < srcLimit) { 972e5b6d6dSopenharmony_ci ch = *src++; 982e5b6d6dSopenharmony_ci do { 992e5b6d6dSopenharmony_ci /* usually "loops" once; twice only for writing subchar */ 1002e5b6d6dSopenharmony_ci if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) { 1012e5b6d6dSopenharmony_ci if(pDest < destLimit) { 1022e5b6d6dSopenharmony_ci *pDest++ = (UChar)ch; 1032e5b6d6dSopenharmony_ci } else { 1042e5b6d6dSopenharmony_ci ++reqLength; 1052e5b6d6dSopenharmony_ci } 1062e5b6d6dSopenharmony_ci break; 1072e5b6d6dSopenharmony_ci } else if(0x10000 <= ch && ch <= 0x10ffff) { 1082e5b6d6dSopenharmony_ci if(pDest!=NULL && ((pDest + 2) <= destLimit)) { 1092e5b6d6dSopenharmony_ci *pDest++ = U16_LEAD(ch); 1102e5b6d6dSopenharmony_ci *pDest++ = U16_TRAIL(ch); 1112e5b6d6dSopenharmony_ci } else { 1122e5b6d6dSopenharmony_ci reqLength += 2; 1132e5b6d6dSopenharmony_ci } 1142e5b6d6dSopenharmony_ci break; 1152e5b6d6dSopenharmony_ci } else if((ch = subchar) < 0) { 1162e5b6d6dSopenharmony_ci /* surrogate code point, or not a Unicode code point at all */ 1172e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 1182e5b6d6dSopenharmony_ci return NULL; 1192e5b6d6dSopenharmony_ci } else { 1202e5b6d6dSopenharmony_ci ++numSubstitutions; 1212e5b6d6dSopenharmony_ci } 1222e5b6d6dSopenharmony_ci } while(true); 1232e5b6d6dSopenharmony_ci } 1242e5b6d6dSopenharmony_ci 1252e5b6d6dSopenharmony_ci reqLength += (int32_t)(pDest - dest); 1262e5b6d6dSopenharmony_ci if(pDestLength) { 1272e5b6d6dSopenharmony_ci *pDestLength = reqLength; 1282e5b6d6dSopenharmony_ci } 1292e5b6d6dSopenharmony_ci if(pNumSubstitutions != NULL) { 1302e5b6d6dSopenharmony_ci *pNumSubstitutions = numSubstitutions; 1312e5b6d6dSopenharmony_ci } 1322e5b6d6dSopenharmony_ci 1332e5b6d6dSopenharmony_ci /* Terminate the buffer */ 1342e5b6d6dSopenharmony_ci u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); 1352e5b6d6dSopenharmony_ci 1362e5b6d6dSopenharmony_ci return dest; 1372e5b6d6dSopenharmony_ci} 1382e5b6d6dSopenharmony_ci 1392e5b6d6dSopenharmony_ciU_CAPI UChar* U_EXPORT2 1402e5b6d6dSopenharmony_ciu_strFromUTF32(UChar *dest, 1412e5b6d6dSopenharmony_ci int32_t destCapacity, 1422e5b6d6dSopenharmony_ci int32_t *pDestLength, 1432e5b6d6dSopenharmony_ci const UChar32 *src, 1442e5b6d6dSopenharmony_ci int32_t srcLength, 1452e5b6d6dSopenharmony_ci UErrorCode *pErrorCode) { 1462e5b6d6dSopenharmony_ci return u_strFromUTF32WithSub( 1472e5b6d6dSopenharmony_ci dest, destCapacity, pDestLength, 1482e5b6d6dSopenharmony_ci src, srcLength, 1492e5b6d6dSopenharmony_ci U_SENTINEL, NULL, 1502e5b6d6dSopenharmony_ci pErrorCode); 1512e5b6d6dSopenharmony_ci} 1522e5b6d6dSopenharmony_ci 1532e5b6d6dSopenharmony_ciU_CAPI UChar32* U_EXPORT2 1542e5b6d6dSopenharmony_ciu_strToUTF32WithSub(UChar32 *dest, 1552e5b6d6dSopenharmony_ci int32_t destCapacity, 1562e5b6d6dSopenharmony_ci int32_t *pDestLength, 1572e5b6d6dSopenharmony_ci const UChar *src, 1582e5b6d6dSopenharmony_ci int32_t srcLength, 1592e5b6d6dSopenharmony_ci UChar32 subchar, int32_t *pNumSubstitutions, 1602e5b6d6dSopenharmony_ci UErrorCode *pErrorCode) { 1612e5b6d6dSopenharmony_ci const UChar *srcLimit; 1622e5b6d6dSopenharmony_ci UChar32 ch; 1632e5b6d6dSopenharmony_ci UChar ch2; 1642e5b6d6dSopenharmony_ci UChar32 *destLimit; 1652e5b6d6dSopenharmony_ci UChar32 *pDest; 1662e5b6d6dSopenharmony_ci int32_t reqLength; 1672e5b6d6dSopenharmony_ci int32_t numSubstitutions; 1682e5b6d6dSopenharmony_ci 1692e5b6d6dSopenharmony_ci /* args check */ 1702e5b6d6dSopenharmony_ci if(U_FAILURE(*pErrorCode)){ 1712e5b6d6dSopenharmony_ci return NULL; 1722e5b6d6dSopenharmony_ci } 1732e5b6d6dSopenharmony_ci if( (src==NULL && srcLength!=0) || srcLength < -1 || 1742e5b6d6dSopenharmony_ci (destCapacity<0) || (dest == NULL && destCapacity > 0) || 1752e5b6d6dSopenharmony_ci subchar > 0x10ffff || U_IS_SURROGATE(subchar) 1762e5b6d6dSopenharmony_ci ) { 1772e5b6d6dSopenharmony_ci *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 1782e5b6d6dSopenharmony_ci return NULL; 1792e5b6d6dSopenharmony_ci } 1802e5b6d6dSopenharmony_ci 1812e5b6d6dSopenharmony_ci if(pNumSubstitutions != NULL) { 1822e5b6d6dSopenharmony_ci *pNumSubstitutions = 0; 1832e5b6d6dSopenharmony_ci } 1842e5b6d6dSopenharmony_ci 1852e5b6d6dSopenharmony_ci pDest = dest; 1862e5b6d6dSopenharmony_ci destLimit = (dest!=NULL)?(dest + destCapacity):NULL; 1872e5b6d6dSopenharmony_ci reqLength = 0; 1882e5b6d6dSopenharmony_ci numSubstitutions = 0; 1892e5b6d6dSopenharmony_ci 1902e5b6d6dSopenharmony_ci if(srcLength < 0) { 1912e5b6d6dSopenharmony_ci /* simple loop for conversion of a NUL-terminated BMP string */ 1922e5b6d6dSopenharmony_ci while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) { 1932e5b6d6dSopenharmony_ci ++src; 1942e5b6d6dSopenharmony_ci if(pDest < destLimit) { 1952e5b6d6dSopenharmony_ci *pDest++ = ch; 1962e5b6d6dSopenharmony_ci } else { 1972e5b6d6dSopenharmony_ci ++reqLength; 1982e5b6d6dSopenharmony_ci } 1992e5b6d6dSopenharmony_ci } 2002e5b6d6dSopenharmony_ci srcLimit = src; 2012e5b6d6dSopenharmony_ci if(ch != 0) { 2022e5b6d6dSopenharmony_ci /* "complicated" case, find the end of the remaining string */ 2032e5b6d6dSopenharmony_ci while(*++srcLimit != 0) {} 2042e5b6d6dSopenharmony_ci } 2052e5b6d6dSopenharmony_ci } else { 2062e5b6d6dSopenharmony_ci srcLimit = (src!=NULL)?(src + srcLength):NULL; 2072e5b6d6dSopenharmony_ci } 2082e5b6d6dSopenharmony_ci 2092e5b6d6dSopenharmony_ci /* convert with length */ 2102e5b6d6dSopenharmony_ci while(src < srcLimit) { 2112e5b6d6dSopenharmony_ci ch = *src++; 2122e5b6d6dSopenharmony_ci if(!U16_IS_SURROGATE(ch)) { 2132e5b6d6dSopenharmony_ci /* write or count ch below */ 2142e5b6d6dSopenharmony_ci } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) { 2152e5b6d6dSopenharmony_ci ++src; 2162e5b6d6dSopenharmony_ci ch = U16_GET_SUPPLEMENTARY(ch, ch2); 2172e5b6d6dSopenharmony_ci } else if((ch = subchar) < 0) { 2182e5b6d6dSopenharmony_ci /* unpaired surrogate */ 2192e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 2202e5b6d6dSopenharmony_ci return NULL; 2212e5b6d6dSopenharmony_ci } else { 2222e5b6d6dSopenharmony_ci ++numSubstitutions; 2232e5b6d6dSopenharmony_ci } 2242e5b6d6dSopenharmony_ci if(pDest < destLimit) { 2252e5b6d6dSopenharmony_ci *pDest++ = ch; 2262e5b6d6dSopenharmony_ci } else { 2272e5b6d6dSopenharmony_ci ++reqLength; 2282e5b6d6dSopenharmony_ci } 2292e5b6d6dSopenharmony_ci } 2302e5b6d6dSopenharmony_ci 2312e5b6d6dSopenharmony_ci reqLength += (int32_t)(pDest - dest); 2322e5b6d6dSopenharmony_ci if(pDestLength) { 2332e5b6d6dSopenharmony_ci *pDestLength = reqLength; 2342e5b6d6dSopenharmony_ci } 2352e5b6d6dSopenharmony_ci if(pNumSubstitutions != NULL) { 2362e5b6d6dSopenharmony_ci *pNumSubstitutions = numSubstitutions; 2372e5b6d6dSopenharmony_ci } 2382e5b6d6dSopenharmony_ci 2392e5b6d6dSopenharmony_ci /* Terminate the buffer */ 2402e5b6d6dSopenharmony_ci u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode); 2412e5b6d6dSopenharmony_ci 2422e5b6d6dSopenharmony_ci return dest; 2432e5b6d6dSopenharmony_ci} 2442e5b6d6dSopenharmony_ci 2452e5b6d6dSopenharmony_ciU_CAPI UChar32* U_EXPORT2 2462e5b6d6dSopenharmony_ciu_strToUTF32(UChar32 *dest, 2472e5b6d6dSopenharmony_ci int32_t destCapacity, 2482e5b6d6dSopenharmony_ci int32_t *pDestLength, 2492e5b6d6dSopenharmony_ci const UChar *src, 2502e5b6d6dSopenharmony_ci int32_t srcLength, 2512e5b6d6dSopenharmony_ci UErrorCode *pErrorCode) { 2522e5b6d6dSopenharmony_ci return u_strToUTF32WithSub( 2532e5b6d6dSopenharmony_ci dest, destCapacity, pDestLength, 2542e5b6d6dSopenharmony_ci src, srcLength, 2552e5b6d6dSopenharmony_ci U_SENTINEL, NULL, 2562e5b6d6dSopenharmony_ci pErrorCode); 2572e5b6d6dSopenharmony_ci} 2582e5b6d6dSopenharmony_ci 2592e5b6d6dSopenharmony_ciU_CAPI UChar* U_EXPORT2 2602e5b6d6dSopenharmony_ciu_strFromUTF8WithSub(UChar *dest, 2612e5b6d6dSopenharmony_ci int32_t destCapacity, 2622e5b6d6dSopenharmony_ci int32_t *pDestLength, 2632e5b6d6dSopenharmony_ci const char* src, 2642e5b6d6dSopenharmony_ci int32_t srcLength, 2652e5b6d6dSopenharmony_ci UChar32 subchar, int32_t *pNumSubstitutions, 2662e5b6d6dSopenharmony_ci UErrorCode *pErrorCode){ 2672e5b6d6dSopenharmony_ci /* args check */ 2682e5b6d6dSopenharmony_ci if(U_FAILURE(*pErrorCode)) { 2692e5b6d6dSopenharmony_ci return NULL; 2702e5b6d6dSopenharmony_ci } 2712e5b6d6dSopenharmony_ci if( (src==NULL && srcLength!=0) || srcLength < -1 || 2722e5b6d6dSopenharmony_ci (destCapacity<0) || (dest == NULL && destCapacity > 0) || 2732e5b6d6dSopenharmony_ci subchar > 0x10ffff || U_IS_SURROGATE(subchar) 2742e5b6d6dSopenharmony_ci ) { 2752e5b6d6dSopenharmony_ci *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 2762e5b6d6dSopenharmony_ci return NULL; 2772e5b6d6dSopenharmony_ci } 2782e5b6d6dSopenharmony_ci 2792e5b6d6dSopenharmony_ci if(pNumSubstitutions!=NULL) { 2802e5b6d6dSopenharmony_ci *pNumSubstitutions=0; 2812e5b6d6dSopenharmony_ci } 2822e5b6d6dSopenharmony_ci UChar *pDest = dest; 2832e5b6d6dSopenharmony_ci UChar *pDestLimit = dest+destCapacity; 2842e5b6d6dSopenharmony_ci int32_t reqLength = 0; 2852e5b6d6dSopenharmony_ci int32_t numSubstitutions=0; 2862e5b6d6dSopenharmony_ci 2872e5b6d6dSopenharmony_ci /* 2882e5b6d6dSopenharmony_ci * Inline processing of UTF-8 byte sequences: 2892e5b6d6dSopenharmony_ci * 2902e5b6d6dSopenharmony_ci * Byte sequences for the most common characters are handled inline in 2912e5b6d6dSopenharmony_ci * the conversion loops. In order to reduce the path lengths for those 2922e5b6d6dSopenharmony_ci * characters, the tests are arranged in a kind of binary search. 2932e5b6d6dSopenharmony_ci * ASCII (<=0x7f) is checked first, followed by the dividing point 2942e5b6d6dSopenharmony_ci * between 2- and 3-byte sequences (0xe0). 2952e5b6d6dSopenharmony_ci * The 3-byte branch is tested first to speed up CJK text. 2962e5b6d6dSopenharmony_ci * The compiler should combine the subtractions for the two tests for 0xe0. 2972e5b6d6dSopenharmony_ci * Each branch then tests for the other end of its range. 2982e5b6d6dSopenharmony_ci */ 2992e5b6d6dSopenharmony_ci 3002e5b6d6dSopenharmony_ci if(srcLength < 0){ 3012e5b6d6dSopenharmony_ci /* 3022e5b6d6dSopenharmony_ci * Transform a NUL-terminated string. 3032e5b6d6dSopenharmony_ci * The code explicitly checks for NULs only in the lead byte position. 3042e5b6d6dSopenharmony_ci * A NUL byte in the trail byte position fails the trail byte range check anyway. 3052e5b6d6dSopenharmony_ci */ 3062e5b6d6dSopenharmony_ci int32_t i; 3072e5b6d6dSopenharmony_ci UChar32 c; 3082e5b6d6dSopenharmony_ci for(i = 0; (c = (uint8_t)src[i]) != 0 && (pDest < pDestLimit);) { 3092e5b6d6dSopenharmony_ci // modified copy of U8_NEXT() 3102e5b6d6dSopenharmony_ci ++i; 3112e5b6d6dSopenharmony_ci if(U8_IS_SINGLE(c)) { 3122e5b6d6dSopenharmony_ci *pDest++=(UChar)c; 3132e5b6d6dSopenharmony_ci } else { 3142e5b6d6dSopenharmony_ci uint8_t __t1, __t2; 3152e5b6d6dSopenharmony_ci if( /* handle U+0800..U+FFFF inline */ 3162e5b6d6dSopenharmony_ci (0xe0<=(c) && (c)<0xf0) && 3172e5b6d6dSopenharmony_ci U8_IS_VALID_LEAD3_AND_T1((c), src[i]) && 3182e5b6d6dSopenharmony_ci (__t2=src[(i)+1]-0x80)<=0x3f) { 3192e5b6d6dSopenharmony_ci *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2; 3202e5b6d6dSopenharmony_ci i+=2; 3212e5b6d6dSopenharmony_ci } else if( /* handle U+0080..U+07FF inline */ 3222e5b6d6dSopenharmony_ci ((c)<0xe0 && (c)>=0xc2) && 3232e5b6d6dSopenharmony_ci (__t1=src[i]-0x80)<=0x3f) { 3242e5b6d6dSopenharmony_ci *pDest++ = (((c)&0x1f)<<6)|__t1; 3252e5b6d6dSopenharmony_ci ++(i); 3262e5b6d6dSopenharmony_ci } else { 3272e5b6d6dSopenharmony_ci /* function call for "complicated" and error cases */ 3282e5b6d6dSopenharmony_ci (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1); 3292e5b6d6dSopenharmony_ci if(c<0 && (++numSubstitutions, c = subchar) < 0) { 3302e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 3312e5b6d6dSopenharmony_ci return NULL; 3322e5b6d6dSopenharmony_ci } else if(c<=0xFFFF) { 3332e5b6d6dSopenharmony_ci *(pDest++)=(UChar)c; 3342e5b6d6dSopenharmony_ci } else { 3352e5b6d6dSopenharmony_ci *(pDest++)=U16_LEAD(c); 3362e5b6d6dSopenharmony_ci if(pDest<pDestLimit) { 3372e5b6d6dSopenharmony_ci *(pDest++)=U16_TRAIL(c); 3382e5b6d6dSopenharmony_ci } else { 3392e5b6d6dSopenharmony_ci reqLength++; 3402e5b6d6dSopenharmony_ci break; 3412e5b6d6dSopenharmony_ci } 3422e5b6d6dSopenharmony_ci } 3432e5b6d6dSopenharmony_ci } 3442e5b6d6dSopenharmony_ci } 3452e5b6d6dSopenharmony_ci } 3462e5b6d6dSopenharmony_ci 3472e5b6d6dSopenharmony_ci /* Pre-flight the rest of the string. */ 3482e5b6d6dSopenharmony_ci while((c = (uint8_t)src[i]) != 0) { 3492e5b6d6dSopenharmony_ci // modified copy of U8_NEXT() 3502e5b6d6dSopenharmony_ci ++i; 3512e5b6d6dSopenharmony_ci if(U8_IS_SINGLE(c)) { 3522e5b6d6dSopenharmony_ci ++reqLength; 3532e5b6d6dSopenharmony_ci } else { 3542e5b6d6dSopenharmony_ci uint8_t __t1, __t2; 3552e5b6d6dSopenharmony_ci if( /* handle U+0800..U+FFFF inline */ 3562e5b6d6dSopenharmony_ci (0xe0<=(c) && (c)<0xf0) && 3572e5b6d6dSopenharmony_ci U8_IS_VALID_LEAD3_AND_T1((c), src[i]) && 3582e5b6d6dSopenharmony_ci (__t2=src[(i)+1]-0x80)<=0x3f) { 3592e5b6d6dSopenharmony_ci ++reqLength; 3602e5b6d6dSopenharmony_ci i+=2; 3612e5b6d6dSopenharmony_ci } else if( /* handle U+0080..U+07FF inline */ 3622e5b6d6dSopenharmony_ci ((c)<0xe0 && (c)>=0xc2) && 3632e5b6d6dSopenharmony_ci (__t1=src[i]-0x80)<=0x3f) { 3642e5b6d6dSopenharmony_ci ++reqLength; 3652e5b6d6dSopenharmony_ci ++(i); 3662e5b6d6dSopenharmony_ci } else { 3672e5b6d6dSopenharmony_ci /* function call for "complicated" and error cases */ 3682e5b6d6dSopenharmony_ci (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1); 3692e5b6d6dSopenharmony_ci if(c<0 && (++numSubstitutions, c = subchar) < 0) { 3702e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 3712e5b6d6dSopenharmony_ci return NULL; 3722e5b6d6dSopenharmony_ci } 3732e5b6d6dSopenharmony_ci reqLength += U16_LENGTH(c); 3742e5b6d6dSopenharmony_ci } 3752e5b6d6dSopenharmony_ci } 3762e5b6d6dSopenharmony_ci } 3772e5b6d6dSopenharmony_ci } else /* srcLength >= 0 */ { 3782e5b6d6dSopenharmony_ci /* Faster loop without ongoing checking for srcLength and pDestLimit. */ 3792e5b6d6dSopenharmony_ci int32_t i = 0; 3802e5b6d6dSopenharmony_ci UChar32 c; 3812e5b6d6dSopenharmony_ci for(;;) { 3822e5b6d6dSopenharmony_ci /* 3832e5b6d6dSopenharmony_ci * Each iteration of the inner loop progresses by at most 3 UTF-8 3842e5b6d6dSopenharmony_ci * bytes and one UChar, for most characters. 3852e5b6d6dSopenharmony_ci * For supplementary code points (4 & 2), which are rare, 3862e5b6d6dSopenharmony_ci * there is an additional adjustment. 3872e5b6d6dSopenharmony_ci */ 3882e5b6d6dSopenharmony_ci int32_t count = (int32_t)(pDestLimit - pDest); 3892e5b6d6dSopenharmony_ci int32_t count2 = (srcLength - i) / 3; 3902e5b6d6dSopenharmony_ci if(count > count2) { 3912e5b6d6dSopenharmony_ci count = count2; /* min(remaining dest, remaining src/3) */ 3922e5b6d6dSopenharmony_ci } 3932e5b6d6dSopenharmony_ci if(count < 3) { 3942e5b6d6dSopenharmony_ci /* 3952e5b6d6dSopenharmony_ci * Too much overhead if we get near the end of the string, 3962e5b6d6dSopenharmony_ci * continue with the next loop. 3972e5b6d6dSopenharmony_ci */ 3982e5b6d6dSopenharmony_ci break; 3992e5b6d6dSopenharmony_ci } 4002e5b6d6dSopenharmony_ci 4012e5b6d6dSopenharmony_ci do { 4022e5b6d6dSopenharmony_ci // modified copy of U8_NEXT() 4032e5b6d6dSopenharmony_ci c = (uint8_t)src[i++]; 4042e5b6d6dSopenharmony_ci if(U8_IS_SINGLE(c)) { 4052e5b6d6dSopenharmony_ci *pDest++=(UChar)c; 4062e5b6d6dSopenharmony_ci } else { 4072e5b6d6dSopenharmony_ci uint8_t __t1, __t2; 4082e5b6d6dSopenharmony_ci if( /* handle U+0800..U+FFFF inline */ 4092e5b6d6dSopenharmony_ci (0xe0<=(c) && (c)<0xf0) && 4102e5b6d6dSopenharmony_ci ((i)+1)<srcLength && 4112e5b6d6dSopenharmony_ci U8_IS_VALID_LEAD3_AND_T1((c), src[i]) && 4122e5b6d6dSopenharmony_ci (__t2=src[(i)+1]-0x80)<=0x3f) { 4132e5b6d6dSopenharmony_ci *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2; 4142e5b6d6dSopenharmony_ci i+=2; 4152e5b6d6dSopenharmony_ci } else if( /* handle U+0080..U+07FF inline */ 4162e5b6d6dSopenharmony_ci ((c)<0xe0 && (c)>=0xc2) && 4172e5b6d6dSopenharmony_ci ((i)!=srcLength) && 4182e5b6d6dSopenharmony_ci (__t1=src[i]-0x80)<=0x3f) { 4192e5b6d6dSopenharmony_ci *pDest++ = (((c)&0x1f)<<6)|__t1; 4202e5b6d6dSopenharmony_ci ++(i); 4212e5b6d6dSopenharmony_ci } else { 4222e5b6d6dSopenharmony_ci if(c >= 0xf0 || subchar > 0xffff) { 4232e5b6d6dSopenharmony_ci // We may read up to four bytes and write up to two UChars, 4242e5b6d6dSopenharmony_ci // which we didn't account for with computing count, 4252e5b6d6dSopenharmony_ci // so we adjust it here. 4262e5b6d6dSopenharmony_ci if(--count == 0) { 4272e5b6d6dSopenharmony_ci --i; // back out byte c 4282e5b6d6dSopenharmony_ci break; 4292e5b6d6dSopenharmony_ci } 4302e5b6d6dSopenharmony_ci } 4312e5b6d6dSopenharmony_ci 4322e5b6d6dSopenharmony_ci /* function call for "complicated" and error cases */ 4332e5b6d6dSopenharmony_ci (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1); 4342e5b6d6dSopenharmony_ci if(c<0 && (++numSubstitutions, c = subchar) < 0) { 4352e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 4362e5b6d6dSopenharmony_ci return NULL; 4372e5b6d6dSopenharmony_ci } else if(c<=0xFFFF) { 4382e5b6d6dSopenharmony_ci *(pDest++)=(UChar)c; 4392e5b6d6dSopenharmony_ci } else { 4402e5b6d6dSopenharmony_ci *(pDest++)=U16_LEAD(c); 4412e5b6d6dSopenharmony_ci *(pDest++)=U16_TRAIL(c); 4422e5b6d6dSopenharmony_ci } 4432e5b6d6dSopenharmony_ci } 4442e5b6d6dSopenharmony_ci } 4452e5b6d6dSopenharmony_ci } while(--count > 0); 4462e5b6d6dSopenharmony_ci } 4472e5b6d6dSopenharmony_ci 4482e5b6d6dSopenharmony_ci while(i < srcLength && (pDest < pDestLimit)) { 4492e5b6d6dSopenharmony_ci // modified copy of U8_NEXT() 4502e5b6d6dSopenharmony_ci c = (uint8_t)src[i++]; 4512e5b6d6dSopenharmony_ci if(U8_IS_SINGLE(c)) { 4522e5b6d6dSopenharmony_ci *pDest++=(UChar)c; 4532e5b6d6dSopenharmony_ci } else { 4542e5b6d6dSopenharmony_ci uint8_t __t1, __t2; 4552e5b6d6dSopenharmony_ci if( /* handle U+0800..U+FFFF inline */ 4562e5b6d6dSopenharmony_ci (0xe0<=(c) && (c)<0xf0) && 4572e5b6d6dSopenharmony_ci ((i)+1)<srcLength && 4582e5b6d6dSopenharmony_ci U8_IS_VALID_LEAD3_AND_T1((c), src[i]) && 4592e5b6d6dSopenharmony_ci (__t2=src[(i)+1]-0x80)<=0x3f) { 4602e5b6d6dSopenharmony_ci *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2; 4612e5b6d6dSopenharmony_ci i+=2; 4622e5b6d6dSopenharmony_ci } else if( /* handle U+0080..U+07FF inline */ 4632e5b6d6dSopenharmony_ci ((c)<0xe0 && (c)>=0xc2) && 4642e5b6d6dSopenharmony_ci ((i)!=srcLength) && 4652e5b6d6dSopenharmony_ci (__t1=src[i]-0x80)<=0x3f) { 4662e5b6d6dSopenharmony_ci *pDest++ = (((c)&0x1f)<<6)|__t1; 4672e5b6d6dSopenharmony_ci ++(i); 4682e5b6d6dSopenharmony_ci } else { 4692e5b6d6dSopenharmony_ci /* function call for "complicated" and error cases */ 4702e5b6d6dSopenharmony_ci (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1); 4712e5b6d6dSopenharmony_ci if(c<0 && (++numSubstitutions, c = subchar) < 0) { 4722e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 4732e5b6d6dSopenharmony_ci return NULL; 4742e5b6d6dSopenharmony_ci } else if(c<=0xFFFF) { 4752e5b6d6dSopenharmony_ci *(pDest++)=(UChar)c; 4762e5b6d6dSopenharmony_ci } else { 4772e5b6d6dSopenharmony_ci *(pDest++)=U16_LEAD(c); 4782e5b6d6dSopenharmony_ci if(pDest<pDestLimit) { 4792e5b6d6dSopenharmony_ci *(pDest++)=U16_TRAIL(c); 4802e5b6d6dSopenharmony_ci } else { 4812e5b6d6dSopenharmony_ci reqLength++; 4822e5b6d6dSopenharmony_ci break; 4832e5b6d6dSopenharmony_ci } 4842e5b6d6dSopenharmony_ci } 4852e5b6d6dSopenharmony_ci } 4862e5b6d6dSopenharmony_ci } 4872e5b6d6dSopenharmony_ci } 4882e5b6d6dSopenharmony_ci 4892e5b6d6dSopenharmony_ci /* Pre-flight the rest of the string. */ 4902e5b6d6dSopenharmony_ci while(i < srcLength) { 4912e5b6d6dSopenharmony_ci // modified copy of U8_NEXT() 4922e5b6d6dSopenharmony_ci c = (uint8_t)src[i++]; 4932e5b6d6dSopenharmony_ci if(U8_IS_SINGLE(c)) { 4942e5b6d6dSopenharmony_ci ++reqLength; 4952e5b6d6dSopenharmony_ci } else { 4962e5b6d6dSopenharmony_ci uint8_t __t1, __t2; 4972e5b6d6dSopenharmony_ci if( /* handle U+0800..U+FFFF inline */ 4982e5b6d6dSopenharmony_ci (0xe0<=(c) && (c)<0xf0) && 4992e5b6d6dSopenharmony_ci ((i)+1)<srcLength && 5002e5b6d6dSopenharmony_ci U8_IS_VALID_LEAD3_AND_T1((c), src[i]) && 5012e5b6d6dSopenharmony_ci (__t2=src[(i)+1]-0x80)<=0x3f) { 5022e5b6d6dSopenharmony_ci ++reqLength; 5032e5b6d6dSopenharmony_ci i+=2; 5042e5b6d6dSopenharmony_ci } else if( /* handle U+0080..U+07FF inline */ 5052e5b6d6dSopenharmony_ci ((c)<0xe0 && (c)>=0xc2) && 5062e5b6d6dSopenharmony_ci ((i)!=srcLength) && 5072e5b6d6dSopenharmony_ci (__t1=src[i]-0x80)<=0x3f) { 5082e5b6d6dSopenharmony_ci ++reqLength; 5092e5b6d6dSopenharmony_ci ++(i); 5102e5b6d6dSopenharmony_ci } else { 5112e5b6d6dSopenharmony_ci /* function call for "complicated" and error cases */ 5122e5b6d6dSopenharmony_ci (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1); 5132e5b6d6dSopenharmony_ci if(c<0 && (++numSubstitutions, c = subchar) < 0) { 5142e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 5152e5b6d6dSopenharmony_ci return NULL; 5162e5b6d6dSopenharmony_ci } 5172e5b6d6dSopenharmony_ci reqLength += U16_LENGTH(c); 5182e5b6d6dSopenharmony_ci } 5192e5b6d6dSopenharmony_ci } 5202e5b6d6dSopenharmony_ci } 5212e5b6d6dSopenharmony_ci } 5222e5b6d6dSopenharmony_ci 5232e5b6d6dSopenharmony_ci reqLength+=(int32_t)(pDest - dest); 5242e5b6d6dSopenharmony_ci 5252e5b6d6dSopenharmony_ci if(pNumSubstitutions!=NULL) { 5262e5b6d6dSopenharmony_ci *pNumSubstitutions=numSubstitutions; 5272e5b6d6dSopenharmony_ci } 5282e5b6d6dSopenharmony_ci 5292e5b6d6dSopenharmony_ci if(pDestLength){ 5302e5b6d6dSopenharmony_ci *pDestLength = reqLength; 5312e5b6d6dSopenharmony_ci } 5322e5b6d6dSopenharmony_ci 5332e5b6d6dSopenharmony_ci /* Terminate the buffer */ 5342e5b6d6dSopenharmony_ci u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); 5352e5b6d6dSopenharmony_ci 5362e5b6d6dSopenharmony_ci return dest; 5372e5b6d6dSopenharmony_ci} 5382e5b6d6dSopenharmony_ci 5392e5b6d6dSopenharmony_ciU_CAPI UChar* U_EXPORT2 5402e5b6d6dSopenharmony_ciu_strFromUTF8(UChar *dest, 5412e5b6d6dSopenharmony_ci int32_t destCapacity, 5422e5b6d6dSopenharmony_ci int32_t *pDestLength, 5432e5b6d6dSopenharmony_ci const char* src, 5442e5b6d6dSopenharmony_ci int32_t srcLength, 5452e5b6d6dSopenharmony_ci UErrorCode *pErrorCode){ 5462e5b6d6dSopenharmony_ci return u_strFromUTF8WithSub( 5472e5b6d6dSopenharmony_ci dest, destCapacity, pDestLength, 5482e5b6d6dSopenharmony_ci src, srcLength, 5492e5b6d6dSopenharmony_ci U_SENTINEL, NULL, 5502e5b6d6dSopenharmony_ci pErrorCode); 5512e5b6d6dSopenharmony_ci} 5522e5b6d6dSopenharmony_ci 5532e5b6d6dSopenharmony_ciU_CAPI UChar * U_EXPORT2 5542e5b6d6dSopenharmony_ciu_strFromUTF8Lenient(UChar *dest, 5552e5b6d6dSopenharmony_ci int32_t destCapacity, 5562e5b6d6dSopenharmony_ci int32_t *pDestLength, 5572e5b6d6dSopenharmony_ci const char *src, 5582e5b6d6dSopenharmony_ci int32_t srcLength, 5592e5b6d6dSopenharmony_ci UErrorCode *pErrorCode) { 5602e5b6d6dSopenharmony_ci UChar *pDest = dest; 5612e5b6d6dSopenharmony_ci UChar32 ch; 5622e5b6d6dSopenharmony_ci int32_t reqLength = 0; 5632e5b6d6dSopenharmony_ci uint8_t* pSrc = (uint8_t*) src; 5642e5b6d6dSopenharmony_ci 5652e5b6d6dSopenharmony_ci /* args check */ 5662e5b6d6dSopenharmony_ci if(U_FAILURE(*pErrorCode)){ 5672e5b6d6dSopenharmony_ci return NULL; 5682e5b6d6dSopenharmony_ci } 5692e5b6d6dSopenharmony_ci 5702e5b6d6dSopenharmony_ci if( (src==NULL && srcLength!=0) || srcLength < -1 || 5712e5b6d6dSopenharmony_ci (destCapacity<0) || (dest == NULL && destCapacity > 0) 5722e5b6d6dSopenharmony_ci ) { 5732e5b6d6dSopenharmony_ci *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 5742e5b6d6dSopenharmony_ci return NULL; 5752e5b6d6dSopenharmony_ci } 5762e5b6d6dSopenharmony_ci 5772e5b6d6dSopenharmony_ci if(srcLength < 0) { 5782e5b6d6dSopenharmony_ci /* Transform a NUL-terminated string. */ 5792e5b6d6dSopenharmony_ci UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL; 5802e5b6d6dSopenharmony_ci uint8_t t1, t2, t3; /* trail bytes */ 5812e5b6d6dSopenharmony_ci 5822e5b6d6dSopenharmony_ci while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) { 5832e5b6d6dSopenharmony_ci if(ch < 0xc0) { 5842e5b6d6dSopenharmony_ci /* 5852e5b6d6dSopenharmony_ci * ASCII, or a trail byte in lead position which is treated like 5862e5b6d6dSopenharmony_ci * a single-byte sequence for better character boundary 5872e5b6d6dSopenharmony_ci * resynchronization after illegal sequences. 5882e5b6d6dSopenharmony_ci */ 5892e5b6d6dSopenharmony_ci *pDest++=(UChar)ch; 5902e5b6d6dSopenharmony_ci ++pSrc; 5912e5b6d6dSopenharmony_ci continue; 5922e5b6d6dSopenharmony_ci } else if(ch < 0xe0) { /* U+0080..U+07FF */ 5932e5b6d6dSopenharmony_ci if((t1 = pSrc[1]) != 0) { 5942e5b6d6dSopenharmony_ci /* 0x3080 = (0xc0 << 6) + 0x80 */ 5952e5b6d6dSopenharmony_ci *pDest++ = (UChar)((ch << 6) + t1 - 0x3080); 5962e5b6d6dSopenharmony_ci pSrc += 2; 5972e5b6d6dSopenharmony_ci continue; 5982e5b6d6dSopenharmony_ci } 5992e5b6d6dSopenharmony_ci } else if(ch < 0xf0) { /* U+0800..U+FFFF */ 6002e5b6d6dSopenharmony_ci if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0) { 6012e5b6d6dSopenharmony_ci /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 6022e5b6d6dSopenharmony_ci /* 0x2080 = (0x80 << 6) + 0x80 */ 6032e5b6d6dSopenharmony_ci *pDest++ = (UChar)((ch << 12) + (t1 << 6) + t2 - 0x2080); 6042e5b6d6dSopenharmony_ci pSrc += 3; 6052e5b6d6dSopenharmony_ci continue; 6062e5b6d6dSopenharmony_ci } 6072e5b6d6dSopenharmony_ci } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 6082e5b6d6dSopenharmony_ci if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0 && (t3 = pSrc[3]) != 0) { 6092e5b6d6dSopenharmony_ci pSrc += 4; 6102e5b6d6dSopenharmony_ci /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 6112e5b6d6dSopenharmony_ci ch = (ch << 18) + (t1 << 12) + (t2 << 6) + t3 - 0x3c82080; 6122e5b6d6dSopenharmony_ci *(pDest++) = U16_LEAD(ch); 6132e5b6d6dSopenharmony_ci if(pDest < pDestLimit) { 6142e5b6d6dSopenharmony_ci *(pDest++) = U16_TRAIL(ch); 6152e5b6d6dSopenharmony_ci } else { 6162e5b6d6dSopenharmony_ci reqLength = 1; 6172e5b6d6dSopenharmony_ci break; 6182e5b6d6dSopenharmony_ci } 6192e5b6d6dSopenharmony_ci continue; 6202e5b6d6dSopenharmony_ci } 6212e5b6d6dSopenharmony_ci } 6222e5b6d6dSopenharmony_ci 6232e5b6d6dSopenharmony_ci /* truncated character at the end */ 6242e5b6d6dSopenharmony_ci *pDest++ = 0xfffd; 6252e5b6d6dSopenharmony_ci while(*++pSrc != 0) {} 6262e5b6d6dSopenharmony_ci break; 6272e5b6d6dSopenharmony_ci } 6282e5b6d6dSopenharmony_ci 6292e5b6d6dSopenharmony_ci /* Pre-flight the rest of the string. */ 6302e5b6d6dSopenharmony_ci while((ch = *pSrc) != 0) { 6312e5b6d6dSopenharmony_ci if(ch < 0xc0) { 6322e5b6d6dSopenharmony_ci /* 6332e5b6d6dSopenharmony_ci * ASCII, or a trail byte in lead position which is treated like 6342e5b6d6dSopenharmony_ci * a single-byte sequence for better character boundary 6352e5b6d6dSopenharmony_ci * resynchronization after illegal sequences. 6362e5b6d6dSopenharmony_ci */ 6372e5b6d6dSopenharmony_ci ++reqLength; 6382e5b6d6dSopenharmony_ci ++pSrc; 6392e5b6d6dSopenharmony_ci continue; 6402e5b6d6dSopenharmony_ci } else if(ch < 0xe0) { /* U+0080..U+07FF */ 6412e5b6d6dSopenharmony_ci if(pSrc[1] != 0) { 6422e5b6d6dSopenharmony_ci ++reqLength; 6432e5b6d6dSopenharmony_ci pSrc += 2; 6442e5b6d6dSopenharmony_ci continue; 6452e5b6d6dSopenharmony_ci } 6462e5b6d6dSopenharmony_ci } else if(ch < 0xf0) { /* U+0800..U+FFFF */ 6472e5b6d6dSopenharmony_ci if(pSrc[1] != 0 && pSrc[2] != 0) { 6482e5b6d6dSopenharmony_ci ++reqLength; 6492e5b6d6dSopenharmony_ci pSrc += 3; 6502e5b6d6dSopenharmony_ci continue; 6512e5b6d6dSopenharmony_ci } 6522e5b6d6dSopenharmony_ci } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 6532e5b6d6dSopenharmony_ci if(pSrc[1] != 0 && pSrc[2] != 0 && pSrc[3] != 0) { 6542e5b6d6dSopenharmony_ci reqLength += 2; 6552e5b6d6dSopenharmony_ci pSrc += 4; 6562e5b6d6dSopenharmony_ci continue; 6572e5b6d6dSopenharmony_ci } 6582e5b6d6dSopenharmony_ci } 6592e5b6d6dSopenharmony_ci 6602e5b6d6dSopenharmony_ci /* truncated character at the end */ 6612e5b6d6dSopenharmony_ci ++reqLength; 6622e5b6d6dSopenharmony_ci break; 6632e5b6d6dSopenharmony_ci } 6642e5b6d6dSopenharmony_ci } else /* srcLength >= 0 */ { 6652e5b6d6dSopenharmony_ci const uint8_t *pSrcLimit = (pSrc!=NULL)?(pSrc + srcLength):NULL; 6662e5b6d6dSopenharmony_ci 6672e5b6d6dSopenharmony_ci /* 6682e5b6d6dSopenharmony_ci * This function requires that if srcLength is given, then it must be 6692e5b6d6dSopenharmony_ci * destCapatity >= srcLength so that we need not check for 6702e5b6d6dSopenharmony_ci * destination buffer overflow in the loop. 6712e5b6d6dSopenharmony_ci */ 6722e5b6d6dSopenharmony_ci if(destCapacity < srcLength) { 6732e5b6d6dSopenharmony_ci if(pDestLength != NULL) { 6742e5b6d6dSopenharmony_ci *pDestLength = srcLength; /* this likely overestimates the true destLength! */ 6752e5b6d6dSopenharmony_ci } 6762e5b6d6dSopenharmony_ci *pErrorCode = U_BUFFER_OVERFLOW_ERROR; 6772e5b6d6dSopenharmony_ci return NULL; 6782e5b6d6dSopenharmony_ci } 6792e5b6d6dSopenharmony_ci 6802e5b6d6dSopenharmony_ci if((pSrcLimit - pSrc) >= 4) { 6812e5b6d6dSopenharmony_ci pSrcLimit -= 3; /* temporarily reduce pSrcLimit */ 6822e5b6d6dSopenharmony_ci 6832e5b6d6dSopenharmony_ci /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */ 6842e5b6d6dSopenharmony_ci do { 6852e5b6d6dSopenharmony_ci ch = *pSrc++; 6862e5b6d6dSopenharmony_ci if(ch < 0xc0) { 6872e5b6d6dSopenharmony_ci /* 6882e5b6d6dSopenharmony_ci * ASCII, or a trail byte in lead position which is treated like 6892e5b6d6dSopenharmony_ci * a single-byte sequence for better character boundary 6902e5b6d6dSopenharmony_ci * resynchronization after illegal sequences. 6912e5b6d6dSopenharmony_ci */ 6922e5b6d6dSopenharmony_ci *pDest++=(UChar)ch; 6932e5b6d6dSopenharmony_ci } else if(ch < 0xe0) { /* U+0080..U+07FF */ 6942e5b6d6dSopenharmony_ci /* 0x3080 = (0xc0 << 6) + 0x80 */ 6952e5b6d6dSopenharmony_ci *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080); 6962e5b6d6dSopenharmony_ci } else if(ch < 0xf0) { /* U+0800..U+FFFF */ 6972e5b6d6dSopenharmony_ci /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 6982e5b6d6dSopenharmony_ci /* 0x2080 = (0x80 << 6) + 0x80 */ 6992e5b6d6dSopenharmony_ci ch = (ch << 12) + (*pSrc++ << 6); 7002e5b6d6dSopenharmony_ci *pDest++ = (UChar)(ch + *pSrc++ - 0x2080); 7012e5b6d6dSopenharmony_ci } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 7022e5b6d6dSopenharmony_ci /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 7032e5b6d6dSopenharmony_ci ch = (ch << 18) + (*pSrc++ << 12); 7042e5b6d6dSopenharmony_ci ch += *pSrc++ << 6; 7052e5b6d6dSopenharmony_ci ch += *pSrc++ - 0x3c82080; 7062e5b6d6dSopenharmony_ci *(pDest++) = U16_LEAD(ch); 7072e5b6d6dSopenharmony_ci *(pDest++) = U16_TRAIL(ch); 7082e5b6d6dSopenharmony_ci } 7092e5b6d6dSopenharmony_ci } while(pSrc < pSrcLimit); 7102e5b6d6dSopenharmony_ci 7112e5b6d6dSopenharmony_ci pSrcLimit += 3; /* restore original pSrcLimit */ 7122e5b6d6dSopenharmony_ci } 7132e5b6d6dSopenharmony_ci 7142e5b6d6dSopenharmony_ci while(pSrc < pSrcLimit) { 7152e5b6d6dSopenharmony_ci ch = *pSrc++; 7162e5b6d6dSopenharmony_ci if(ch < 0xc0) { 7172e5b6d6dSopenharmony_ci /* 7182e5b6d6dSopenharmony_ci * ASCII, or a trail byte in lead position which is treated like 7192e5b6d6dSopenharmony_ci * a single-byte sequence for better character boundary 7202e5b6d6dSopenharmony_ci * resynchronization after illegal sequences. 7212e5b6d6dSopenharmony_ci */ 7222e5b6d6dSopenharmony_ci *pDest++=(UChar)ch; 7232e5b6d6dSopenharmony_ci continue; 7242e5b6d6dSopenharmony_ci } else if(ch < 0xe0) { /* U+0080..U+07FF */ 7252e5b6d6dSopenharmony_ci if(pSrc < pSrcLimit) { 7262e5b6d6dSopenharmony_ci /* 0x3080 = (0xc0 << 6) + 0x80 */ 7272e5b6d6dSopenharmony_ci *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080); 7282e5b6d6dSopenharmony_ci continue; 7292e5b6d6dSopenharmony_ci } 7302e5b6d6dSopenharmony_ci } else if(ch < 0xf0) { /* U+0800..U+FFFF */ 7312e5b6d6dSopenharmony_ci if((pSrcLimit - pSrc) >= 2) { 7322e5b6d6dSopenharmony_ci /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 7332e5b6d6dSopenharmony_ci /* 0x2080 = (0x80 << 6) + 0x80 */ 7342e5b6d6dSopenharmony_ci ch = (ch << 12) + (*pSrc++ << 6); 7352e5b6d6dSopenharmony_ci *pDest++ = (UChar)(ch + *pSrc++ - 0x2080); 7362e5b6d6dSopenharmony_ci pSrc += 3; 7372e5b6d6dSopenharmony_ci continue; 7382e5b6d6dSopenharmony_ci } 7392e5b6d6dSopenharmony_ci } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 7402e5b6d6dSopenharmony_ci if((pSrcLimit - pSrc) >= 3) { 7412e5b6d6dSopenharmony_ci /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 7422e5b6d6dSopenharmony_ci ch = (ch << 18) + (*pSrc++ << 12); 7432e5b6d6dSopenharmony_ci ch += *pSrc++ << 6; 7442e5b6d6dSopenharmony_ci ch += *pSrc++ - 0x3c82080; 7452e5b6d6dSopenharmony_ci *(pDest++) = U16_LEAD(ch); 7462e5b6d6dSopenharmony_ci *(pDest++) = U16_TRAIL(ch); 7472e5b6d6dSopenharmony_ci pSrc += 4; 7482e5b6d6dSopenharmony_ci continue; 7492e5b6d6dSopenharmony_ci } 7502e5b6d6dSopenharmony_ci } 7512e5b6d6dSopenharmony_ci 7522e5b6d6dSopenharmony_ci /* truncated character at the end */ 7532e5b6d6dSopenharmony_ci *pDest++ = 0xfffd; 7542e5b6d6dSopenharmony_ci break; 7552e5b6d6dSopenharmony_ci } 7562e5b6d6dSopenharmony_ci } 7572e5b6d6dSopenharmony_ci 7582e5b6d6dSopenharmony_ci reqLength+=(int32_t)(pDest - dest); 7592e5b6d6dSopenharmony_ci 7602e5b6d6dSopenharmony_ci if(pDestLength){ 7612e5b6d6dSopenharmony_ci *pDestLength = reqLength; 7622e5b6d6dSopenharmony_ci } 7632e5b6d6dSopenharmony_ci 7642e5b6d6dSopenharmony_ci /* Terminate the buffer */ 7652e5b6d6dSopenharmony_ci u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); 7662e5b6d6dSopenharmony_ci 7672e5b6d6dSopenharmony_ci return dest; 7682e5b6d6dSopenharmony_ci} 7692e5b6d6dSopenharmony_ci 7702e5b6d6dSopenharmony_cistatic inline uint8_t * 7712e5b6d6dSopenharmony_ci_appendUTF8(uint8_t *pDest, UChar32 c) { 7722e5b6d6dSopenharmony_ci /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */ 7732e5b6d6dSopenharmony_ci if((c)<=0x7f) { 7742e5b6d6dSopenharmony_ci *pDest++=(uint8_t)c; 7752e5b6d6dSopenharmony_ci } else if(c<=0x7ff) { 7762e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((c>>6)|0xc0); 7772e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((c&0x3f)|0x80); 7782e5b6d6dSopenharmony_ci } else if(c<=0xffff) { 7792e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((c>>12)|0xe0); 7802e5b6d6dSopenharmony_ci *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80); 7812e5b6d6dSopenharmony_ci *pDest++=(uint8_t)(((c)&0x3f)|0x80); 7822e5b6d6dSopenharmony_ci } else /* if((uint32_t)(c)<=0x10ffff) */ { 7832e5b6d6dSopenharmony_ci *pDest++=(uint8_t)(((c)>>18)|0xf0); 7842e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80); 7852e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80); 7862e5b6d6dSopenharmony_ci *pDest++=(uint8_t)(((c)&0x3f)|0x80); 7872e5b6d6dSopenharmony_ci } 7882e5b6d6dSopenharmony_ci return pDest; 7892e5b6d6dSopenharmony_ci} 7902e5b6d6dSopenharmony_ci 7912e5b6d6dSopenharmony_ci 7922e5b6d6dSopenharmony_ciU_CAPI char* U_EXPORT2 7932e5b6d6dSopenharmony_ciu_strToUTF8WithSub(char *dest, 7942e5b6d6dSopenharmony_ci int32_t destCapacity, 7952e5b6d6dSopenharmony_ci int32_t *pDestLength, 7962e5b6d6dSopenharmony_ci const UChar *pSrc, 7972e5b6d6dSopenharmony_ci int32_t srcLength, 7982e5b6d6dSopenharmony_ci UChar32 subchar, int32_t *pNumSubstitutions, 7992e5b6d6dSopenharmony_ci UErrorCode *pErrorCode){ 8002e5b6d6dSopenharmony_ci int32_t reqLength=0; 8012e5b6d6dSopenharmony_ci uint32_t ch=0,ch2=0; 8022e5b6d6dSopenharmony_ci uint8_t *pDest = (uint8_t *)dest; 8032e5b6d6dSopenharmony_ci uint8_t *pDestLimit = (pDest!=NULL)?(pDest + destCapacity):NULL; 8042e5b6d6dSopenharmony_ci int32_t numSubstitutions; 8052e5b6d6dSopenharmony_ci 8062e5b6d6dSopenharmony_ci /* args check */ 8072e5b6d6dSopenharmony_ci if(U_FAILURE(*pErrorCode)){ 8082e5b6d6dSopenharmony_ci return NULL; 8092e5b6d6dSopenharmony_ci } 8102e5b6d6dSopenharmony_ci 8112e5b6d6dSopenharmony_ci if( (pSrc==NULL && srcLength!=0) || srcLength < -1 || 8122e5b6d6dSopenharmony_ci (destCapacity<0) || (dest == NULL && destCapacity > 0) || 8132e5b6d6dSopenharmony_ci subchar > 0x10ffff || U_IS_SURROGATE(subchar) 8142e5b6d6dSopenharmony_ci ) { 8152e5b6d6dSopenharmony_ci *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 8162e5b6d6dSopenharmony_ci return NULL; 8172e5b6d6dSopenharmony_ci } 8182e5b6d6dSopenharmony_ci 8192e5b6d6dSopenharmony_ci if(pNumSubstitutions!=NULL) { 8202e5b6d6dSopenharmony_ci *pNumSubstitutions=0; 8212e5b6d6dSopenharmony_ci } 8222e5b6d6dSopenharmony_ci numSubstitutions=0; 8232e5b6d6dSopenharmony_ci 8242e5b6d6dSopenharmony_ci if(srcLength==-1) { 8252e5b6d6dSopenharmony_ci while((ch=*pSrc)!=0) { 8262e5b6d6dSopenharmony_ci ++pSrc; 8272e5b6d6dSopenharmony_ci if(ch <= 0x7f) { 8282e5b6d6dSopenharmony_ci if(pDest<pDestLimit) { 8292e5b6d6dSopenharmony_ci *pDest++ = (uint8_t)ch; 8302e5b6d6dSopenharmony_ci } else { 8312e5b6d6dSopenharmony_ci reqLength = 1; 8322e5b6d6dSopenharmony_ci break; 8332e5b6d6dSopenharmony_ci } 8342e5b6d6dSopenharmony_ci } else if(ch <= 0x7ff) { 8352e5b6d6dSopenharmony_ci if((pDestLimit - pDest) >= 2) { 8362e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch>>6)|0xc0); 8372e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch&0x3f)|0x80); 8382e5b6d6dSopenharmony_ci } else { 8392e5b6d6dSopenharmony_ci reqLength = 2; 8402e5b6d6dSopenharmony_ci break; 8412e5b6d6dSopenharmony_ci } 8422e5b6d6dSopenharmony_ci } else if(ch <= 0xd7ff || ch >= 0xe000) { 8432e5b6d6dSopenharmony_ci if((pDestLimit - pDest) >= 3) { 8442e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch>>12)|0xe0); 8452e5b6d6dSopenharmony_ci *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 8462e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch&0x3f)|0x80); 8472e5b6d6dSopenharmony_ci } else { 8482e5b6d6dSopenharmony_ci reqLength = 3; 8492e5b6d6dSopenharmony_ci break; 8502e5b6d6dSopenharmony_ci } 8512e5b6d6dSopenharmony_ci } else /* ch is a surrogate */ { 8522e5b6d6dSopenharmony_ci int32_t length; 8532e5b6d6dSopenharmony_ci 8542e5b6d6dSopenharmony_ci /*need not check for NUL because NUL fails U16_IS_TRAIL() anyway*/ 8552e5b6d6dSopenharmony_ci if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { 8562e5b6d6dSopenharmony_ci ++pSrc; 8572e5b6d6dSopenharmony_ci ch=U16_GET_SUPPLEMENTARY(ch, ch2); 8582e5b6d6dSopenharmony_ci } else if(subchar>=0) { 8592e5b6d6dSopenharmony_ci ch=subchar; 8602e5b6d6dSopenharmony_ci ++numSubstitutions; 8612e5b6d6dSopenharmony_ci } else { 8622e5b6d6dSopenharmony_ci /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 8632e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 8642e5b6d6dSopenharmony_ci return NULL; 8652e5b6d6dSopenharmony_ci } 8662e5b6d6dSopenharmony_ci 8672e5b6d6dSopenharmony_ci length = U8_LENGTH(ch); 8682e5b6d6dSopenharmony_ci if((pDestLimit - pDest) >= length) { 8692e5b6d6dSopenharmony_ci /* convert and append*/ 8702e5b6d6dSopenharmony_ci pDest=_appendUTF8(pDest, ch); 8712e5b6d6dSopenharmony_ci } else { 8722e5b6d6dSopenharmony_ci reqLength = length; 8732e5b6d6dSopenharmony_ci break; 8742e5b6d6dSopenharmony_ci } 8752e5b6d6dSopenharmony_ci } 8762e5b6d6dSopenharmony_ci } 8772e5b6d6dSopenharmony_ci while((ch=*pSrc++)!=0) { 8782e5b6d6dSopenharmony_ci if(ch<=0x7f) { 8792e5b6d6dSopenharmony_ci ++reqLength; 8802e5b6d6dSopenharmony_ci } else if(ch<=0x7ff) { 8812e5b6d6dSopenharmony_ci reqLength+=2; 8822e5b6d6dSopenharmony_ci } else if(!U16_IS_SURROGATE(ch)) { 8832e5b6d6dSopenharmony_ci reqLength+=3; 8842e5b6d6dSopenharmony_ci } else if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { 8852e5b6d6dSopenharmony_ci ++pSrc; 8862e5b6d6dSopenharmony_ci reqLength+=4; 8872e5b6d6dSopenharmony_ci } else if(subchar>=0) { 8882e5b6d6dSopenharmony_ci reqLength+=U8_LENGTH(subchar); 8892e5b6d6dSopenharmony_ci ++numSubstitutions; 8902e5b6d6dSopenharmony_ci } else { 8912e5b6d6dSopenharmony_ci /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 8922e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 8932e5b6d6dSopenharmony_ci return NULL; 8942e5b6d6dSopenharmony_ci } 8952e5b6d6dSopenharmony_ci } 8962e5b6d6dSopenharmony_ci } else { 8972e5b6d6dSopenharmony_ci const UChar *pSrcLimit = (pSrc!=NULL)?(pSrc+srcLength):NULL; 8982e5b6d6dSopenharmony_ci int32_t count; 8992e5b6d6dSopenharmony_ci 9002e5b6d6dSopenharmony_ci /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 9012e5b6d6dSopenharmony_ci for(;;) { 9022e5b6d6dSopenharmony_ci /* 9032e5b6d6dSopenharmony_ci * Each iteration of the inner loop progresses by at most 3 UTF-8 9042e5b6d6dSopenharmony_ci * bytes and one UChar, for most characters. 9052e5b6d6dSopenharmony_ci * For supplementary code points (4 & 2), which are rare, 9062e5b6d6dSopenharmony_ci * there is an additional adjustment. 9072e5b6d6dSopenharmony_ci */ 9082e5b6d6dSopenharmony_ci count = (int32_t)((pDestLimit - pDest) / 3); 9092e5b6d6dSopenharmony_ci srcLength = (int32_t)(pSrcLimit - pSrc); 9102e5b6d6dSopenharmony_ci if(count > srcLength) { 9112e5b6d6dSopenharmony_ci count = srcLength; /* min(remaining dest/3, remaining src) */ 9122e5b6d6dSopenharmony_ci } 9132e5b6d6dSopenharmony_ci if(count < 3) { 9142e5b6d6dSopenharmony_ci /* 9152e5b6d6dSopenharmony_ci * Too much overhead if we get near the end of the string, 9162e5b6d6dSopenharmony_ci * continue with the next loop. 9172e5b6d6dSopenharmony_ci */ 9182e5b6d6dSopenharmony_ci break; 9192e5b6d6dSopenharmony_ci } 9202e5b6d6dSopenharmony_ci do { 9212e5b6d6dSopenharmony_ci ch=*pSrc++; 9222e5b6d6dSopenharmony_ci if(ch <= 0x7f) { 9232e5b6d6dSopenharmony_ci *pDest++ = (uint8_t)ch; 9242e5b6d6dSopenharmony_ci } else if(ch <= 0x7ff) { 9252e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch>>6)|0xc0); 9262e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch&0x3f)|0x80); 9272e5b6d6dSopenharmony_ci } else if(ch <= 0xd7ff || ch >= 0xe000) { 9282e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch>>12)|0xe0); 9292e5b6d6dSopenharmony_ci *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 9302e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch&0x3f)|0x80); 9312e5b6d6dSopenharmony_ci } else /* ch is a surrogate */ { 9322e5b6d6dSopenharmony_ci /* 9332e5b6d6dSopenharmony_ci * We will read two UChars and probably output four bytes, 9342e5b6d6dSopenharmony_ci * which we didn't account for with computing count, 9352e5b6d6dSopenharmony_ci * so we adjust it here. 9362e5b6d6dSopenharmony_ci */ 9372e5b6d6dSopenharmony_ci if(--count == 0) { 9382e5b6d6dSopenharmony_ci --pSrc; /* undo ch=*pSrc++ for the lead surrogate */ 9392e5b6d6dSopenharmony_ci break; /* recompute count */ 9402e5b6d6dSopenharmony_ci } 9412e5b6d6dSopenharmony_ci 9422e5b6d6dSopenharmony_ci if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { 9432e5b6d6dSopenharmony_ci ++pSrc; 9442e5b6d6dSopenharmony_ci ch=U16_GET_SUPPLEMENTARY(ch, ch2); 9452e5b6d6dSopenharmony_ci 9462e5b6d6dSopenharmony_ci /* writing 4 bytes per 2 UChars is ok */ 9472e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch>>18)|0xf0); 9482e5b6d6dSopenharmony_ci *pDest++=(uint8_t)(((ch>>12)&0x3f)|0x80); 9492e5b6d6dSopenharmony_ci *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 9502e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch&0x3f)|0x80); 9512e5b6d6dSopenharmony_ci } else { 9522e5b6d6dSopenharmony_ci /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 9532e5b6d6dSopenharmony_ci if(subchar>=0) { 9542e5b6d6dSopenharmony_ci ch=subchar; 9552e5b6d6dSopenharmony_ci ++numSubstitutions; 9562e5b6d6dSopenharmony_ci } else { 9572e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 9582e5b6d6dSopenharmony_ci return NULL; 9592e5b6d6dSopenharmony_ci } 9602e5b6d6dSopenharmony_ci 9612e5b6d6dSopenharmony_ci /* convert and append*/ 9622e5b6d6dSopenharmony_ci pDest=_appendUTF8(pDest, ch); 9632e5b6d6dSopenharmony_ci } 9642e5b6d6dSopenharmony_ci } 9652e5b6d6dSopenharmony_ci } while(--count > 0); 9662e5b6d6dSopenharmony_ci } 9672e5b6d6dSopenharmony_ci 9682e5b6d6dSopenharmony_ci while(pSrc<pSrcLimit) { 9692e5b6d6dSopenharmony_ci ch=*pSrc++; 9702e5b6d6dSopenharmony_ci if(ch <= 0x7f) { 9712e5b6d6dSopenharmony_ci if(pDest<pDestLimit) { 9722e5b6d6dSopenharmony_ci *pDest++ = (uint8_t)ch; 9732e5b6d6dSopenharmony_ci } else { 9742e5b6d6dSopenharmony_ci reqLength = 1; 9752e5b6d6dSopenharmony_ci break; 9762e5b6d6dSopenharmony_ci } 9772e5b6d6dSopenharmony_ci } else if(ch <= 0x7ff) { 9782e5b6d6dSopenharmony_ci if((pDestLimit - pDest) >= 2) { 9792e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch>>6)|0xc0); 9802e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch&0x3f)|0x80); 9812e5b6d6dSopenharmony_ci } else { 9822e5b6d6dSopenharmony_ci reqLength = 2; 9832e5b6d6dSopenharmony_ci break; 9842e5b6d6dSopenharmony_ci } 9852e5b6d6dSopenharmony_ci } else if(ch <= 0xd7ff || ch >= 0xe000) { 9862e5b6d6dSopenharmony_ci if((pDestLimit - pDest) >= 3) { 9872e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch>>12)|0xe0); 9882e5b6d6dSopenharmony_ci *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 9892e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch&0x3f)|0x80); 9902e5b6d6dSopenharmony_ci } else { 9912e5b6d6dSopenharmony_ci reqLength = 3; 9922e5b6d6dSopenharmony_ci break; 9932e5b6d6dSopenharmony_ci } 9942e5b6d6dSopenharmony_ci } else /* ch is a surrogate */ { 9952e5b6d6dSopenharmony_ci int32_t length; 9962e5b6d6dSopenharmony_ci 9972e5b6d6dSopenharmony_ci if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) { 9982e5b6d6dSopenharmony_ci ++pSrc; 9992e5b6d6dSopenharmony_ci ch=U16_GET_SUPPLEMENTARY(ch, ch2); 10002e5b6d6dSopenharmony_ci } else if(subchar>=0) { 10012e5b6d6dSopenharmony_ci ch=subchar; 10022e5b6d6dSopenharmony_ci ++numSubstitutions; 10032e5b6d6dSopenharmony_ci } else { 10042e5b6d6dSopenharmony_ci /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 10052e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 10062e5b6d6dSopenharmony_ci return NULL; 10072e5b6d6dSopenharmony_ci } 10082e5b6d6dSopenharmony_ci 10092e5b6d6dSopenharmony_ci length = U8_LENGTH(ch); 10102e5b6d6dSopenharmony_ci if((pDestLimit - pDest) >= length) { 10112e5b6d6dSopenharmony_ci /* convert and append*/ 10122e5b6d6dSopenharmony_ci pDest=_appendUTF8(pDest, ch); 10132e5b6d6dSopenharmony_ci } else { 10142e5b6d6dSopenharmony_ci reqLength = length; 10152e5b6d6dSopenharmony_ci break; 10162e5b6d6dSopenharmony_ci } 10172e5b6d6dSopenharmony_ci } 10182e5b6d6dSopenharmony_ci } 10192e5b6d6dSopenharmony_ci while(pSrc<pSrcLimit) { 10202e5b6d6dSopenharmony_ci ch=*pSrc++; 10212e5b6d6dSopenharmony_ci if(ch<=0x7f) { 10222e5b6d6dSopenharmony_ci ++reqLength; 10232e5b6d6dSopenharmony_ci } else if(ch<=0x7ff) { 10242e5b6d6dSopenharmony_ci reqLength+=2; 10252e5b6d6dSopenharmony_ci } else if(!U16_IS_SURROGATE(ch)) { 10262e5b6d6dSopenharmony_ci reqLength+=3; 10272e5b6d6dSopenharmony_ci } else if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) { 10282e5b6d6dSopenharmony_ci ++pSrc; 10292e5b6d6dSopenharmony_ci reqLength+=4; 10302e5b6d6dSopenharmony_ci } else if(subchar>=0) { 10312e5b6d6dSopenharmony_ci reqLength+=U8_LENGTH(subchar); 10322e5b6d6dSopenharmony_ci ++numSubstitutions; 10332e5b6d6dSopenharmony_ci } else { 10342e5b6d6dSopenharmony_ci /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 10352e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 10362e5b6d6dSopenharmony_ci return NULL; 10372e5b6d6dSopenharmony_ci } 10382e5b6d6dSopenharmony_ci } 10392e5b6d6dSopenharmony_ci } 10402e5b6d6dSopenharmony_ci 10412e5b6d6dSopenharmony_ci reqLength+=(int32_t)(pDest - (uint8_t *)dest); 10422e5b6d6dSopenharmony_ci 10432e5b6d6dSopenharmony_ci if(pNumSubstitutions!=NULL) { 10442e5b6d6dSopenharmony_ci *pNumSubstitutions=numSubstitutions; 10452e5b6d6dSopenharmony_ci } 10462e5b6d6dSopenharmony_ci 10472e5b6d6dSopenharmony_ci if(pDestLength){ 10482e5b6d6dSopenharmony_ci *pDestLength = reqLength; 10492e5b6d6dSopenharmony_ci } 10502e5b6d6dSopenharmony_ci 10512e5b6d6dSopenharmony_ci /* Terminate the buffer */ 10522e5b6d6dSopenharmony_ci u_terminateChars(dest, destCapacity, reqLength, pErrorCode); 10532e5b6d6dSopenharmony_ci return dest; 10542e5b6d6dSopenharmony_ci} 10552e5b6d6dSopenharmony_ci 10562e5b6d6dSopenharmony_ciU_CAPI char* U_EXPORT2 10572e5b6d6dSopenharmony_ciu_strToUTF8(char *dest, 10582e5b6d6dSopenharmony_ci int32_t destCapacity, 10592e5b6d6dSopenharmony_ci int32_t *pDestLength, 10602e5b6d6dSopenharmony_ci const UChar *pSrc, 10612e5b6d6dSopenharmony_ci int32_t srcLength, 10622e5b6d6dSopenharmony_ci UErrorCode *pErrorCode){ 10632e5b6d6dSopenharmony_ci return u_strToUTF8WithSub( 10642e5b6d6dSopenharmony_ci dest, destCapacity, pDestLength, 10652e5b6d6dSopenharmony_ci pSrc, srcLength, 10662e5b6d6dSopenharmony_ci U_SENTINEL, NULL, 10672e5b6d6dSopenharmony_ci pErrorCode); 10682e5b6d6dSopenharmony_ci} 10692e5b6d6dSopenharmony_ci 10702e5b6d6dSopenharmony_ciU_CAPI UChar* U_EXPORT2 10712e5b6d6dSopenharmony_ciu_strFromJavaModifiedUTF8WithSub( 10722e5b6d6dSopenharmony_ci UChar *dest, 10732e5b6d6dSopenharmony_ci int32_t destCapacity, 10742e5b6d6dSopenharmony_ci int32_t *pDestLength, 10752e5b6d6dSopenharmony_ci const char *src, 10762e5b6d6dSopenharmony_ci int32_t srcLength, 10772e5b6d6dSopenharmony_ci UChar32 subchar, int32_t *pNumSubstitutions, 10782e5b6d6dSopenharmony_ci UErrorCode *pErrorCode) { 10792e5b6d6dSopenharmony_ci /* args check */ 10802e5b6d6dSopenharmony_ci if(U_FAILURE(*pErrorCode)) { 10812e5b6d6dSopenharmony_ci return NULL; 10822e5b6d6dSopenharmony_ci } 10832e5b6d6dSopenharmony_ci if( (src==NULL && srcLength!=0) || srcLength < -1 || 10842e5b6d6dSopenharmony_ci (dest==NULL && destCapacity!=0) || destCapacity<0 || 10852e5b6d6dSopenharmony_ci subchar > 0x10ffff || U_IS_SURROGATE(subchar) 10862e5b6d6dSopenharmony_ci ) { 10872e5b6d6dSopenharmony_ci *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 10882e5b6d6dSopenharmony_ci return NULL; 10892e5b6d6dSopenharmony_ci } 10902e5b6d6dSopenharmony_ci 10912e5b6d6dSopenharmony_ci if(pNumSubstitutions!=NULL) { 10922e5b6d6dSopenharmony_ci *pNumSubstitutions=0; 10932e5b6d6dSopenharmony_ci } 10942e5b6d6dSopenharmony_ci UChar *pDest = dest; 10952e5b6d6dSopenharmony_ci UChar *pDestLimit = dest+destCapacity; 10962e5b6d6dSopenharmony_ci int32_t reqLength = 0; 10972e5b6d6dSopenharmony_ci int32_t numSubstitutions=0; 10982e5b6d6dSopenharmony_ci 10992e5b6d6dSopenharmony_ci if(srcLength < 0) { 11002e5b6d6dSopenharmony_ci /* 11012e5b6d6dSopenharmony_ci * Transform a NUL-terminated ASCII string. 11022e5b6d6dSopenharmony_ci * Handle non-ASCII strings with slower code. 11032e5b6d6dSopenharmony_ci */ 11042e5b6d6dSopenharmony_ci UChar32 c; 11052e5b6d6dSopenharmony_ci while(((c = (uint8_t)*src) != 0) && c <= 0x7f && (pDest < pDestLimit)) { 11062e5b6d6dSopenharmony_ci *pDest++=(UChar)c; 11072e5b6d6dSopenharmony_ci ++src; 11082e5b6d6dSopenharmony_ci } 11092e5b6d6dSopenharmony_ci if(c == 0) { 11102e5b6d6dSopenharmony_ci reqLength=(int32_t)(pDest - dest); 11112e5b6d6dSopenharmony_ci if(pDestLength) { 11122e5b6d6dSopenharmony_ci *pDestLength = reqLength; 11132e5b6d6dSopenharmony_ci } 11142e5b6d6dSopenharmony_ci 11152e5b6d6dSopenharmony_ci /* Terminate the buffer */ 11162e5b6d6dSopenharmony_ci u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); 11172e5b6d6dSopenharmony_ci return dest; 11182e5b6d6dSopenharmony_ci } 11192e5b6d6dSopenharmony_ci srcLength = static_cast<int32_t>(uprv_strlen(src)); 11202e5b6d6dSopenharmony_ci } 11212e5b6d6dSopenharmony_ci 11222e5b6d6dSopenharmony_ci /* Faster loop without ongoing checking for srcLength and pDestLimit. */ 11232e5b6d6dSopenharmony_ci UChar32 ch; 11242e5b6d6dSopenharmony_ci uint8_t t1, t2; 11252e5b6d6dSopenharmony_ci int32_t i = 0; 11262e5b6d6dSopenharmony_ci for(;;) { 11272e5b6d6dSopenharmony_ci int32_t count = (int32_t)(pDestLimit - pDest); 11282e5b6d6dSopenharmony_ci int32_t count2 = srcLength - i; 11292e5b6d6dSopenharmony_ci if(count >= count2 && srcLength > 0 && U8_IS_SINGLE(*src)) { 11302e5b6d6dSopenharmony_ci /* fast ASCII loop */ 11312e5b6d6dSopenharmony_ci int32_t start = i; 11322e5b6d6dSopenharmony_ci uint8_t b; 11332e5b6d6dSopenharmony_ci while(i < srcLength && U8_IS_SINGLE(b = src[i])) { 11342e5b6d6dSopenharmony_ci *pDest++=b; 11352e5b6d6dSopenharmony_ci ++i; 11362e5b6d6dSopenharmony_ci } 11372e5b6d6dSopenharmony_ci int32_t delta = i - start; 11382e5b6d6dSopenharmony_ci count -= delta; 11392e5b6d6dSopenharmony_ci count2 -= delta; 11402e5b6d6dSopenharmony_ci } 11412e5b6d6dSopenharmony_ci /* 11422e5b6d6dSopenharmony_ci * Each iteration of the inner loop progresses by at most 3 UTF-8 11432e5b6d6dSopenharmony_ci * bytes and one UChar. 11442e5b6d6dSopenharmony_ci */ 11452e5b6d6dSopenharmony_ci if(subchar > 0xFFFF) { 11462e5b6d6dSopenharmony_ci break; 11472e5b6d6dSopenharmony_ci } 11482e5b6d6dSopenharmony_ci count2 /= 3; 11492e5b6d6dSopenharmony_ci if(count > count2) { 11502e5b6d6dSopenharmony_ci count = count2; /* min(remaining dest, remaining src/3) */ 11512e5b6d6dSopenharmony_ci } 11522e5b6d6dSopenharmony_ci if(count < 3) { 11532e5b6d6dSopenharmony_ci /* 11542e5b6d6dSopenharmony_ci * Too much overhead if we get near the end of the string, 11552e5b6d6dSopenharmony_ci * continue with the next loop. 11562e5b6d6dSopenharmony_ci */ 11572e5b6d6dSopenharmony_ci break; 11582e5b6d6dSopenharmony_ci } 11592e5b6d6dSopenharmony_ci do { 11602e5b6d6dSopenharmony_ci ch = (uint8_t)src[i++]; 11612e5b6d6dSopenharmony_ci if(U8_IS_SINGLE(ch)) { 11622e5b6d6dSopenharmony_ci *pDest++=(UChar)ch; 11632e5b6d6dSopenharmony_ci } else { 11642e5b6d6dSopenharmony_ci if(ch >= 0xe0) { 11652e5b6d6dSopenharmony_ci if( /* handle U+0000..U+FFFF inline */ 11662e5b6d6dSopenharmony_ci ch <= 0xef && 11672e5b6d6dSopenharmony_ci (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f && 11682e5b6d6dSopenharmony_ci (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f 11692e5b6d6dSopenharmony_ci ) { 11702e5b6d6dSopenharmony_ci /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 11712e5b6d6dSopenharmony_ci *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); 11722e5b6d6dSopenharmony_ci i += 2; 11732e5b6d6dSopenharmony_ci continue; 11742e5b6d6dSopenharmony_ci } 11752e5b6d6dSopenharmony_ci } else { 11762e5b6d6dSopenharmony_ci if( /* handle U+0000..U+07FF inline */ 11772e5b6d6dSopenharmony_ci ch >= 0xc0 && 11782e5b6d6dSopenharmony_ci (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f 11792e5b6d6dSopenharmony_ci ) { 11802e5b6d6dSopenharmony_ci *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); 11812e5b6d6dSopenharmony_ci ++i; 11822e5b6d6dSopenharmony_ci continue; 11832e5b6d6dSopenharmony_ci } 11842e5b6d6dSopenharmony_ci } 11852e5b6d6dSopenharmony_ci 11862e5b6d6dSopenharmony_ci if(subchar < 0) { 11872e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 11882e5b6d6dSopenharmony_ci return NULL; 11892e5b6d6dSopenharmony_ci } else if(subchar > 0xffff && --count == 0) { 11902e5b6d6dSopenharmony_ci /* 11912e5b6d6dSopenharmony_ci * We need to write two UChars, adjusted count for that, 11922e5b6d6dSopenharmony_ci * and ran out of space. 11932e5b6d6dSopenharmony_ci */ 11942e5b6d6dSopenharmony_ci --i; // back out byte ch 11952e5b6d6dSopenharmony_ci break; 11962e5b6d6dSopenharmony_ci } else { 11972e5b6d6dSopenharmony_ci /* function call for error cases */ 11982e5b6d6dSopenharmony_ci utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1); 11992e5b6d6dSopenharmony_ci ++numSubstitutions; 12002e5b6d6dSopenharmony_ci *(pDest++)=(UChar)subchar; 12012e5b6d6dSopenharmony_ci } 12022e5b6d6dSopenharmony_ci } 12032e5b6d6dSopenharmony_ci } while(--count > 0); 12042e5b6d6dSopenharmony_ci } 12052e5b6d6dSopenharmony_ci 12062e5b6d6dSopenharmony_ci while(i < srcLength && (pDest < pDestLimit)) { 12072e5b6d6dSopenharmony_ci ch = (uint8_t)src[i++]; 12082e5b6d6dSopenharmony_ci if(U8_IS_SINGLE(ch)){ 12092e5b6d6dSopenharmony_ci *pDest++=(UChar)ch; 12102e5b6d6dSopenharmony_ci } else { 12112e5b6d6dSopenharmony_ci if(ch >= 0xe0) { 12122e5b6d6dSopenharmony_ci if( /* handle U+0000..U+FFFF inline */ 12132e5b6d6dSopenharmony_ci ch <= 0xef && 12142e5b6d6dSopenharmony_ci (i+1) < srcLength && 12152e5b6d6dSopenharmony_ci (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f && 12162e5b6d6dSopenharmony_ci (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f 12172e5b6d6dSopenharmony_ci ) { 12182e5b6d6dSopenharmony_ci /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 12192e5b6d6dSopenharmony_ci *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); 12202e5b6d6dSopenharmony_ci i += 2; 12212e5b6d6dSopenharmony_ci continue; 12222e5b6d6dSopenharmony_ci } 12232e5b6d6dSopenharmony_ci } else { 12242e5b6d6dSopenharmony_ci if( /* handle U+0000..U+07FF inline */ 12252e5b6d6dSopenharmony_ci ch >= 0xc0 && 12262e5b6d6dSopenharmony_ci i < srcLength && 12272e5b6d6dSopenharmony_ci (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f 12282e5b6d6dSopenharmony_ci ) { 12292e5b6d6dSopenharmony_ci *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); 12302e5b6d6dSopenharmony_ci ++i; 12312e5b6d6dSopenharmony_ci continue; 12322e5b6d6dSopenharmony_ci } 12332e5b6d6dSopenharmony_ci } 12342e5b6d6dSopenharmony_ci 12352e5b6d6dSopenharmony_ci if(subchar < 0) { 12362e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 12372e5b6d6dSopenharmony_ci return NULL; 12382e5b6d6dSopenharmony_ci } else { 12392e5b6d6dSopenharmony_ci /* function call for error cases */ 12402e5b6d6dSopenharmony_ci utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1); 12412e5b6d6dSopenharmony_ci ++numSubstitutions; 12422e5b6d6dSopenharmony_ci if(subchar<=0xFFFF) { 12432e5b6d6dSopenharmony_ci *(pDest++)=(UChar)subchar; 12442e5b6d6dSopenharmony_ci } else { 12452e5b6d6dSopenharmony_ci *(pDest++)=U16_LEAD(subchar); 12462e5b6d6dSopenharmony_ci if(pDest<pDestLimit) { 12472e5b6d6dSopenharmony_ci *(pDest++)=U16_TRAIL(subchar); 12482e5b6d6dSopenharmony_ci } else { 12492e5b6d6dSopenharmony_ci reqLength++; 12502e5b6d6dSopenharmony_ci break; 12512e5b6d6dSopenharmony_ci } 12522e5b6d6dSopenharmony_ci } 12532e5b6d6dSopenharmony_ci } 12542e5b6d6dSopenharmony_ci } 12552e5b6d6dSopenharmony_ci } 12562e5b6d6dSopenharmony_ci 12572e5b6d6dSopenharmony_ci /* Pre-flight the rest of the string. */ 12582e5b6d6dSopenharmony_ci while(i < srcLength) { 12592e5b6d6dSopenharmony_ci ch = (uint8_t)src[i++]; 12602e5b6d6dSopenharmony_ci if(U8_IS_SINGLE(ch)) { 12612e5b6d6dSopenharmony_ci reqLength++; 12622e5b6d6dSopenharmony_ci } else { 12632e5b6d6dSopenharmony_ci if(ch >= 0xe0) { 12642e5b6d6dSopenharmony_ci if( /* handle U+0000..U+FFFF inline */ 12652e5b6d6dSopenharmony_ci ch <= 0xef && 12662e5b6d6dSopenharmony_ci (i+1) < srcLength && 12672e5b6d6dSopenharmony_ci (uint8_t)(src[i] - 0x80) <= 0x3f && 12682e5b6d6dSopenharmony_ci (uint8_t)(src[i+1] - 0x80) <= 0x3f 12692e5b6d6dSopenharmony_ci ) { 12702e5b6d6dSopenharmony_ci reqLength++; 12712e5b6d6dSopenharmony_ci i += 2; 12722e5b6d6dSopenharmony_ci continue; 12732e5b6d6dSopenharmony_ci } 12742e5b6d6dSopenharmony_ci } else { 12752e5b6d6dSopenharmony_ci if( /* handle U+0000..U+07FF inline */ 12762e5b6d6dSopenharmony_ci ch >= 0xc0 && 12772e5b6d6dSopenharmony_ci i < srcLength && 12782e5b6d6dSopenharmony_ci (uint8_t)(src[i] - 0x80) <= 0x3f 12792e5b6d6dSopenharmony_ci ) { 12802e5b6d6dSopenharmony_ci reqLength++; 12812e5b6d6dSopenharmony_ci ++i; 12822e5b6d6dSopenharmony_ci continue; 12832e5b6d6dSopenharmony_ci } 12842e5b6d6dSopenharmony_ci } 12852e5b6d6dSopenharmony_ci 12862e5b6d6dSopenharmony_ci if(subchar < 0) { 12872e5b6d6dSopenharmony_ci *pErrorCode = U_INVALID_CHAR_FOUND; 12882e5b6d6dSopenharmony_ci return NULL; 12892e5b6d6dSopenharmony_ci } else { 12902e5b6d6dSopenharmony_ci /* function call for error cases */ 12912e5b6d6dSopenharmony_ci utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1); 12922e5b6d6dSopenharmony_ci ++numSubstitutions; 12932e5b6d6dSopenharmony_ci reqLength+=U16_LENGTH(ch); 12942e5b6d6dSopenharmony_ci } 12952e5b6d6dSopenharmony_ci } 12962e5b6d6dSopenharmony_ci } 12972e5b6d6dSopenharmony_ci 12982e5b6d6dSopenharmony_ci if(pNumSubstitutions!=NULL) { 12992e5b6d6dSopenharmony_ci *pNumSubstitutions=numSubstitutions; 13002e5b6d6dSopenharmony_ci } 13012e5b6d6dSopenharmony_ci 13022e5b6d6dSopenharmony_ci reqLength+=(int32_t)(pDest - dest); 13032e5b6d6dSopenharmony_ci if(pDestLength) { 13042e5b6d6dSopenharmony_ci *pDestLength = reqLength; 13052e5b6d6dSopenharmony_ci } 13062e5b6d6dSopenharmony_ci 13072e5b6d6dSopenharmony_ci /* Terminate the buffer */ 13082e5b6d6dSopenharmony_ci u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); 13092e5b6d6dSopenharmony_ci return dest; 13102e5b6d6dSopenharmony_ci} 13112e5b6d6dSopenharmony_ci 13122e5b6d6dSopenharmony_ciU_CAPI char* U_EXPORT2 13132e5b6d6dSopenharmony_ciu_strToJavaModifiedUTF8( 13142e5b6d6dSopenharmony_ci char *dest, 13152e5b6d6dSopenharmony_ci int32_t destCapacity, 13162e5b6d6dSopenharmony_ci int32_t *pDestLength, 13172e5b6d6dSopenharmony_ci const UChar *src, 13182e5b6d6dSopenharmony_ci int32_t srcLength, 13192e5b6d6dSopenharmony_ci UErrorCode *pErrorCode) { 13202e5b6d6dSopenharmony_ci int32_t reqLength=0; 13212e5b6d6dSopenharmony_ci uint32_t ch=0; 13222e5b6d6dSopenharmony_ci uint8_t *pDest = (uint8_t *)dest; 13232e5b6d6dSopenharmony_ci uint8_t *pDestLimit = pDest + destCapacity; 13242e5b6d6dSopenharmony_ci const UChar *pSrcLimit; 13252e5b6d6dSopenharmony_ci int32_t count; 13262e5b6d6dSopenharmony_ci 13272e5b6d6dSopenharmony_ci /* args check */ 13282e5b6d6dSopenharmony_ci if(U_FAILURE(*pErrorCode)){ 13292e5b6d6dSopenharmony_ci return NULL; 13302e5b6d6dSopenharmony_ci } 13312e5b6d6dSopenharmony_ci if( (src==NULL && srcLength!=0) || srcLength < -1 || 13322e5b6d6dSopenharmony_ci (dest==NULL && destCapacity!=0) || destCapacity<0 13332e5b6d6dSopenharmony_ci ) { 13342e5b6d6dSopenharmony_ci *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 13352e5b6d6dSopenharmony_ci return NULL; 13362e5b6d6dSopenharmony_ci } 13372e5b6d6dSopenharmony_ci 13382e5b6d6dSopenharmony_ci if(srcLength==-1) { 13392e5b6d6dSopenharmony_ci /* Convert NUL-terminated ASCII, then find the string length. */ 13402e5b6d6dSopenharmony_ci while((ch=*src)<=0x7f && ch != 0 && pDest<pDestLimit) { 13412e5b6d6dSopenharmony_ci *pDest++ = (uint8_t)ch; 13422e5b6d6dSopenharmony_ci ++src; 13432e5b6d6dSopenharmony_ci } 13442e5b6d6dSopenharmony_ci if(ch == 0) { 13452e5b6d6dSopenharmony_ci reqLength=(int32_t)(pDest - (uint8_t *)dest); 13462e5b6d6dSopenharmony_ci if(pDestLength) { 13472e5b6d6dSopenharmony_ci *pDestLength = reqLength; 13482e5b6d6dSopenharmony_ci } 13492e5b6d6dSopenharmony_ci 13502e5b6d6dSopenharmony_ci /* Terminate the buffer */ 13512e5b6d6dSopenharmony_ci u_terminateChars(dest, destCapacity, reqLength, pErrorCode); 13522e5b6d6dSopenharmony_ci return dest; 13532e5b6d6dSopenharmony_ci } 13542e5b6d6dSopenharmony_ci srcLength = u_strlen(src); 13552e5b6d6dSopenharmony_ci } 13562e5b6d6dSopenharmony_ci 13572e5b6d6dSopenharmony_ci /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 13582e5b6d6dSopenharmony_ci pSrcLimit = (src!=NULL)?(src+srcLength):NULL; 13592e5b6d6dSopenharmony_ci for(;;) { 13602e5b6d6dSopenharmony_ci count = (int32_t)(pDestLimit - pDest); 13612e5b6d6dSopenharmony_ci srcLength = (int32_t)(pSrcLimit - src); 13622e5b6d6dSopenharmony_ci if(count >= srcLength && srcLength > 0 && *src <= 0x7f) { 13632e5b6d6dSopenharmony_ci /* fast ASCII loop */ 13642e5b6d6dSopenharmony_ci const UChar *prevSrc = src; 13652e5b6d6dSopenharmony_ci int32_t delta; 13662e5b6d6dSopenharmony_ci while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) { 13672e5b6d6dSopenharmony_ci *pDest++=(uint8_t)ch; 13682e5b6d6dSopenharmony_ci ++src; 13692e5b6d6dSopenharmony_ci } 13702e5b6d6dSopenharmony_ci delta = (int32_t)(src - prevSrc); 13712e5b6d6dSopenharmony_ci count -= delta; 13722e5b6d6dSopenharmony_ci srcLength -= delta; 13732e5b6d6dSopenharmony_ci } 13742e5b6d6dSopenharmony_ci /* 13752e5b6d6dSopenharmony_ci * Each iteration of the inner loop progresses by at most 3 UTF-8 13762e5b6d6dSopenharmony_ci * bytes and one UChar. 13772e5b6d6dSopenharmony_ci */ 13782e5b6d6dSopenharmony_ci count /= 3; 13792e5b6d6dSopenharmony_ci if(count > srcLength) { 13802e5b6d6dSopenharmony_ci count = srcLength; /* min(remaining dest/3, remaining src) */ 13812e5b6d6dSopenharmony_ci } 13822e5b6d6dSopenharmony_ci if(count < 3) { 13832e5b6d6dSopenharmony_ci /* 13842e5b6d6dSopenharmony_ci * Too much overhead if we get near the end of the string, 13852e5b6d6dSopenharmony_ci * continue with the next loop. 13862e5b6d6dSopenharmony_ci */ 13872e5b6d6dSopenharmony_ci break; 13882e5b6d6dSopenharmony_ci } 13892e5b6d6dSopenharmony_ci do { 13902e5b6d6dSopenharmony_ci ch=*src++; 13912e5b6d6dSopenharmony_ci if(ch <= 0x7f && ch != 0) { 13922e5b6d6dSopenharmony_ci *pDest++ = (uint8_t)ch; 13932e5b6d6dSopenharmony_ci } else if(ch <= 0x7ff) { 13942e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch>>6)|0xc0); 13952e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch&0x3f)|0x80); 13962e5b6d6dSopenharmony_ci } else { 13972e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch>>12)|0xe0); 13982e5b6d6dSopenharmony_ci *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 13992e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch&0x3f)|0x80); 14002e5b6d6dSopenharmony_ci } 14012e5b6d6dSopenharmony_ci } while(--count > 0); 14022e5b6d6dSopenharmony_ci } 14032e5b6d6dSopenharmony_ci 14042e5b6d6dSopenharmony_ci while(src<pSrcLimit) { 14052e5b6d6dSopenharmony_ci ch=*src++; 14062e5b6d6dSopenharmony_ci if(ch <= 0x7f && ch != 0) { 14072e5b6d6dSopenharmony_ci if(pDest<pDestLimit) { 14082e5b6d6dSopenharmony_ci *pDest++ = (uint8_t)ch; 14092e5b6d6dSopenharmony_ci } else { 14102e5b6d6dSopenharmony_ci reqLength = 1; 14112e5b6d6dSopenharmony_ci break; 14122e5b6d6dSopenharmony_ci } 14132e5b6d6dSopenharmony_ci } else if(ch <= 0x7ff) { 14142e5b6d6dSopenharmony_ci if((pDestLimit - pDest) >= 2) { 14152e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch>>6)|0xc0); 14162e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch&0x3f)|0x80); 14172e5b6d6dSopenharmony_ci } else { 14182e5b6d6dSopenharmony_ci reqLength = 2; 14192e5b6d6dSopenharmony_ci break; 14202e5b6d6dSopenharmony_ci } 14212e5b6d6dSopenharmony_ci } else { 14222e5b6d6dSopenharmony_ci if((pDestLimit - pDest) >= 3) { 14232e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch>>12)|0xe0); 14242e5b6d6dSopenharmony_ci *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 14252e5b6d6dSopenharmony_ci *pDest++=(uint8_t)((ch&0x3f)|0x80); 14262e5b6d6dSopenharmony_ci } else { 14272e5b6d6dSopenharmony_ci reqLength = 3; 14282e5b6d6dSopenharmony_ci break; 14292e5b6d6dSopenharmony_ci } 14302e5b6d6dSopenharmony_ci } 14312e5b6d6dSopenharmony_ci } 14322e5b6d6dSopenharmony_ci while(src<pSrcLimit) { 14332e5b6d6dSopenharmony_ci ch=*src++; 14342e5b6d6dSopenharmony_ci if(ch <= 0x7f && ch != 0) { 14352e5b6d6dSopenharmony_ci ++reqLength; 14362e5b6d6dSopenharmony_ci } else if(ch<=0x7ff) { 14372e5b6d6dSopenharmony_ci reqLength+=2; 14382e5b6d6dSopenharmony_ci } else { 14392e5b6d6dSopenharmony_ci reqLength+=3; 14402e5b6d6dSopenharmony_ci } 14412e5b6d6dSopenharmony_ci } 14422e5b6d6dSopenharmony_ci 14432e5b6d6dSopenharmony_ci reqLength+=(int32_t)(pDest - (uint8_t *)dest); 14442e5b6d6dSopenharmony_ci if(pDestLength){ 14452e5b6d6dSopenharmony_ci *pDestLength = reqLength; 14462e5b6d6dSopenharmony_ci } 14472e5b6d6dSopenharmony_ci 14482e5b6d6dSopenharmony_ci /* Terminate the buffer */ 14492e5b6d6dSopenharmony_ci u_terminateChars(dest, destCapacity, reqLength, pErrorCode); 14502e5b6d6dSopenharmony_ci return dest; 14512e5b6d6dSopenharmony_ci} 1452