13f4cbf05Sopenharmony_ci/* 23f4cbf05Sopenharmony_ci * Copyright (c) 2021 Huawei Device Co., Ltd. 33f4cbf05Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 43f4cbf05Sopenharmony_ci * you may not use this file except in compliance with the License. 53f4cbf05Sopenharmony_ci * You may obtain a copy of the License at 63f4cbf05Sopenharmony_ci * 73f4cbf05Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 83f4cbf05Sopenharmony_ci * 93f4cbf05Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software 103f4cbf05Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS, 113f4cbf05Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 123f4cbf05Sopenharmony_ci * See the License for the specific language governing permissions and 133f4cbf05Sopenharmony_ci * limitations under the License. 143f4cbf05Sopenharmony_ci */ 153f4cbf05Sopenharmony_ci 163f4cbf05Sopenharmony_ci#include "unicode_ex.h" 173f4cbf05Sopenharmony_ci 183f4cbf05Sopenharmony_ci#include <climits> 193f4cbf05Sopenharmony_ci#include <cstdio> 203f4cbf05Sopenharmony_ci#include <cstdlib> 213f4cbf05Sopenharmony_ci 223f4cbf05Sopenharmony_ci#include "utils_log.h" 233f4cbf05Sopenharmony_ciusing namespace std; 243f4cbf05Sopenharmony_ci/***************************************UTF8 and UTF16 unicode********************************************** 253f4cbf05Sopenharmony_ciUTF8 263f4cbf05Sopenharmony_ciUnicode utf8 273f4cbf05Sopenharmony_ciU + 0000~U + 007F 0??????? 283f4cbf05Sopenharmony_ciU + 0080~U + 07FF 110????? 10?????? 293f4cbf05Sopenharmony_ciU + 0800~U + FFFF 1110???? 10?????? 10?????? 303f4cbf05Sopenharmony_ciU + 10000~U + 10FFFF 11110??? 10?????? 10?????? 10?????? 313f4cbf05Sopenharmony_ci 323f4cbf05Sopenharmony_ciUTF16 333f4cbf05Sopenharmony_ciUnicode utf16 code 343f4cbf05Sopenharmony_ci 353f4cbf05Sopenharmony_ciU + 000~U + FFFF 2 Byte save, same with Unicode 363f4cbf05Sopenharmony_ciU + 10000~U + 10FFFF 4 Byte save Unicode 0x10000 373f4cbf05Sopenharmony_ci**************************************UTF8 and UTF16 unicode**********************************************/ 383f4cbf05Sopenharmony_cinamespace OHOS { 393f4cbf05Sopenharmony_cinamespace { 403f4cbf05Sopenharmony_ciconstexpr char32_t ONE_BYTE_UTF8 = 0x00000080; 413f4cbf05Sopenharmony_ciconstexpr char32_t TWO_BYTES_UTF8 = 0x00000800; 423f4cbf05Sopenharmony_ciconstexpr char32_t THREE_BYTES_UTF8 = 0x00010000; 433f4cbf05Sopenharmony_ci 443f4cbf05Sopenharmony_ci 453f4cbf05Sopenharmony_ciconstexpr char32_t UNICODE_RESERVED_START = 0x0000D800; 463f4cbf05Sopenharmony_ciconstexpr char32_t UNICODE_RESERVED_END = 0x0000DFFF; 473f4cbf05Sopenharmony_ciconstexpr char32_t UNICODE_MAX_NUM = 0x0010FFFF; 483f4cbf05Sopenharmony_ciconstexpr unsigned int UTF8_OFFSET = 6; 493f4cbf05Sopenharmony_ci 503f4cbf05Sopenharmony_ciconstexpr char32_t UTF8_BYTE_MASK = 0x000000BF; 513f4cbf05Sopenharmony_ciconstexpr char32_t UTF8_BYTE_MARK = 0x00000080; 523f4cbf05Sopenharmony_ciconstexpr char32_t UTF8_FIRST_BYTE_MARK[] = { 533f4cbf05Sopenharmony_ci 0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0 543f4cbf05Sopenharmony_ci}; 553f4cbf05Sopenharmony_ci} 563f4cbf05Sopenharmony_ci 573f4cbf05Sopenharmony_ci#define UTF8_LENGTH_INVALID 0 583f4cbf05Sopenharmony_ci#define UTF8_LENGTH_1 1 593f4cbf05Sopenharmony_ci#define UTF8_LENGTH_2 2 603f4cbf05Sopenharmony_ci#define UTF8_LENGTH_3 3 613f4cbf05Sopenharmony_ci#define UTF8_LENGTH_4 4 623f4cbf05Sopenharmony_ci#define UTF8_LEN_MASK 3 633f4cbf05Sopenharmony_ci#define UTF8_FIRST_BYTE_INDEX 0 643f4cbf05Sopenharmony_ci#define UTF8_SECOND_BYTE_INDEX 1 653f4cbf05Sopenharmony_ci#define UTF8_THIRD_BYTE_INDEX 2 663f4cbf05Sopenharmony_ci#define UTF8_FORTH_BYTE_INDEX 3 673f4cbf05Sopenharmony_ci#define UTF8_SHIFT_WIDTH 6 683f4cbf05Sopenharmony_ci#define STR16_TO_STR8_SHIFT_WIDTH 10 693f4cbf05Sopenharmony_ci#define UTF16_SHIFT_WIDTH 10 703f4cbf05Sopenharmony_ci#define UTF32_BYTE_SIZE_1 1 713f4cbf05Sopenharmony_ci#define UTF32_BYTE_SIZE_2 2 723f4cbf05Sopenharmony_ci#define UTF32_BYTE_SIZE_3 3 733f4cbf05Sopenharmony_ci#define UTF32_BYTE_SIZE_4 4 743f4cbf05Sopenharmony_ci 753f4cbf05Sopenharmony_ci// inner func and dstP is not nullptr 763f4cbf05Sopenharmony_civoid Utf32CodePointToUtf8(uint8_t* dstP, char32_t srcChar, size_t bytes) 773f4cbf05Sopenharmony_ci{ 783f4cbf05Sopenharmony_ci dstP += bytes; 793f4cbf05Sopenharmony_ci if (bytes >= UTF32_BYTE_SIZE_4) { 803f4cbf05Sopenharmony_ci *--dstP = static_cast<uint8_t>((srcChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); 813f4cbf05Sopenharmony_ci srcChar >>= UTF8_OFFSET; 823f4cbf05Sopenharmony_ci } 833f4cbf05Sopenharmony_ci 843f4cbf05Sopenharmony_ci if (bytes >= UTF32_BYTE_SIZE_3) { 853f4cbf05Sopenharmony_ci *--dstP = static_cast<uint8_t>((srcChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); 863f4cbf05Sopenharmony_ci srcChar >>= UTF8_OFFSET; 873f4cbf05Sopenharmony_ci } 883f4cbf05Sopenharmony_ci 893f4cbf05Sopenharmony_ci if (bytes >= UTF32_BYTE_SIZE_2) { 903f4cbf05Sopenharmony_ci *--dstP = static_cast<uint8_t>((srcChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); 913f4cbf05Sopenharmony_ci srcChar >>= UTF8_OFFSET; 923f4cbf05Sopenharmony_ci } 933f4cbf05Sopenharmony_ci 943f4cbf05Sopenharmony_ci if (bytes >= UTF32_BYTE_SIZE_1) { 953f4cbf05Sopenharmony_ci *--dstP = static_cast<uint8_t>(srcChar | UTF8_FIRST_BYTE_MARK[bytes]); 963f4cbf05Sopenharmony_ci } 973f4cbf05Sopenharmony_ci} 983f4cbf05Sopenharmony_ci 993f4cbf05Sopenharmony_cisize_t Utf32CodePointUtf8Length(char32_t srcChar) 1003f4cbf05Sopenharmony_ci{ 1013f4cbf05Sopenharmony_ci if (srcChar < ONE_BYTE_UTF8) { 1023f4cbf05Sopenharmony_ci return UTF8_LENGTH_1; 1033f4cbf05Sopenharmony_ci } else if (srcChar < TWO_BYTES_UTF8) { 1043f4cbf05Sopenharmony_ci return UTF8_LENGTH_2; 1053f4cbf05Sopenharmony_ci } else if (srcChar < THREE_BYTES_UTF8) { 1063f4cbf05Sopenharmony_ci if ((srcChar < UNICODE_RESERVED_START) || (srcChar > UNICODE_RESERVED_END)) { 1073f4cbf05Sopenharmony_ci return UTF8_LENGTH_3; 1083f4cbf05Sopenharmony_ci } else { 1093f4cbf05Sopenharmony_ci // Surrogates are invalid UTF-32 characters. 1103f4cbf05Sopenharmony_ci return UTF8_LENGTH_INVALID; 1113f4cbf05Sopenharmony_ci } 1123f4cbf05Sopenharmony_ci } else if (srcChar <= UNICODE_MAX_NUM) { 1133f4cbf05Sopenharmony_ci // Max code point for Unicode is 0x0010FFFF. 1143f4cbf05Sopenharmony_ci return UTF8_LENGTH_4; 1153f4cbf05Sopenharmony_ci } else { 1163f4cbf05Sopenharmony_ci // Invalid UTF-32 character. 1173f4cbf05Sopenharmony_ci return UTF8_LENGTH_INVALID; 1183f4cbf05Sopenharmony_ci } 1193f4cbf05Sopenharmony_ci} 1203f4cbf05Sopenharmony_ci 1213f4cbf05Sopenharmony_ci// get the length of utf8 from utf16 1223f4cbf05Sopenharmony_ciint Utf16ToUtf8Length(const char16_t* str16, size_t str16Len) 1233f4cbf05Sopenharmony_ci{ 1243f4cbf05Sopenharmony_ci if (str16 == nullptr || str16Len == 0) { 1253f4cbf05Sopenharmony_ci return -1; 1263f4cbf05Sopenharmony_ci } 1273f4cbf05Sopenharmony_ci 1283f4cbf05Sopenharmony_ci const char16_t* const str16End = str16 + str16Len; 1293f4cbf05Sopenharmony_ci int utf8Len = 0; 1303f4cbf05Sopenharmony_ci while (str16 < str16End) { 1313f4cbf05Sopenharmony_ci int charLen = 0; 1323f4cbf05Sopenharmony_ci if (((*str16 & 0xFC00) == 0xD800) && ((str16 + 1) < str16End) 1333f4cbf05Sopenharmony_ci && ((*(str16 + 1) & 0xFC00) == 0xDC00)) { 1343f4cbf05Sopenharmony_ci // surrogate pairs are always 4 bytes. 1353f4cbf05Sopenharmony_ci charLen = 4; 1363f4cbf05Sopenharmony_ci // str16 advance 2 bytes 1373f4cbf05Sopenharmony_ci str16 += 2; 1383f4cbf05Sopenharmony_ci } else { 1393f4cbf05Sopenharmony_ci charLen = Utf32CodePointUtf8Length(static_cast<char32_t>(*str16++)); 1403f4cbf05Sopenharmony_ci } 1413f4cbf05Sopenharmony_ci 1423f4cbf05Sopenharmony_ci if (utf8Len > (INT_MAX - charLen)) { 1433f4cbf05Sopenharmony_ci return -1; 1443f4cbf05Sopenharmony_ci } 1453f4cbf05Sopenharmony_ci utf8Len += charLen; 1463f4cbf05Sopenharmony_ci } 1473f4cbf05Sopenharmony_ci return utf8Len; 1483f4cbf05Sopenharmony_ci} 1493f4cbf05Sopenharmony_ci 1503f4cbf05Sopenharmony_ci// inner function, utf8Str and utf16Str is not nullptr 1513f4cbf05Sopenharmony_civoid StrncpyStr16ToStr8(const char16_t* utf16Str, size_t str16Len, char* utf8Str, size_t str8Len) 1523f4cbf05Sopenharmony_ci{ 1533f4cbf05Sopenharmony_ci const char16_t* curUtf16 = utf16Str; 1543f4cbf05Sopenharmony_ci const char16_t* const endUtf16 = utf16Str + str16Len; 1553f4cbf05Sopenharmony_ci char* cur = utf8Str; 1563f4cbf05Sopenharmony_ci while (curUtf16 < endUtf16) { 1573f4cbf05Sopenharmony_ci char32_t utf32; 1583f4cbf05Sopenharmony_ci // surrogate pairs 1593f4cbf05Sopenharmony_ci if (((*curUtf16 & 0xFC00) == 0xD800) && ((curUtf16 + 1) < endUtf16) 1603f4cbf05Sopenharmony_ci && (((*(curUtf16 + 1) & 0xFC00)) == 0xDC00)) { 1613f4cbf05Sopenharmony_ci utf32 = (*curUtf16++ - 0xD800) << STR16_TO_STR8_SHIFT_WIDTH; 1623f4cbf05Sopenharmony_ci utf32 |= *curUtf16++ - 0xDC00; 1633f4cbf05Sopenharmony_ci utf32 += 0x10000; 1643f4cbf05Sopenharmony_ci } else { 1653f4cbf05Sopenharmony_ci utf32 = static_cast<char32_t>(*curUtf16++); 1663f4cbf05Sopenharmony_ci } 1673f4cbf05Sopenharmony_ci const size_t len = Utf32CodePointUtf8Length(utf32); 1683f4cbf05Sopenharmony_ci if (str8Len < len) { 1693f4cbf05Sopenharmony_ci break; 1703f4cbf05Sopenharmony_ci } 1713f4cbf05Sopenharmony_ci 1723f4cbf05Sopenharmony_ci Utf32CodePointToUtf8(reinterpret_cast<uint8_t*>(cur), utf32, len); 1733f4cbf05Sopenharmony_ci cur += len; 1743f4cbf05Sopenharmony_ci str8Len -= len; 1753f4cbf05Sopenharmony_ci } 1763f4cbf05Sopenharmony_ci *cur = '\0'; 1773f4cbf05Sopenharmony_ci} 1783f4cbf05Sopenharmony_ci 1793f4cbf05Sopenharmony_ci// inner function and str16 is not null 1803f4cbf05Sopenharmony_cichar* Char16ToChar8(const char16_t* str16, size_t str16Len) 1813f4cbf05Sopenharmony_ci{ 1823f4cbf05Sopenharmony_ci char* str8 = nullptr; 1833f4cbf05Sopenharmony_ci int utf8Len = Utf16ToUtf8Length(str16, str16Len); 1843f4cbf05Sopenharmony_ci if (utf8Len < 0 || utf8Len >= INT_MAX) { 1853f4cbf05Sopenharmony_ci return nullptr; 1863f4cbf05Sopenharmony_ci } 1873f4cbf05Sopenharmony_ci 1883f4cbf05Sopenharmony_ci // Allow for closing '\0' 1893f4cbf05Sopenharmony_ci utf8Len += 1; 1903f4cbf05Sopenharmony_ci str8 = reinterpret_cast<char*>(calloc(utf8Len, sizeof(char))); 1913f4cbf05Sopenharmony_ci if (str8 == nullptr) { 1923f4cbf05Sopenharmony_ci return nullptr; 1933f4cbf05Sopenharmony_ci } 1943f4cbf05Sopenharmony_ci 1953f4cbf05Sopenharmony_ci StrncpyStr16ToStr8(str16, str16Len, str8, utf8Len); 1963f4cbf05Sopenharmony_ci return str8; 1973f4cbf05Sopenharmony_ci} 1983f4cbf05Sopenharmony_ci 1993f4cbf05Sopenharmony_cibool String16ToString8(const u16string& str16, string& str8) 2003f4cbf05Sopenharmony_ci{ 2013f4cbf05Sopenharmony_ci size_t str16Len = str16.length(); 2023f4cbf05Sopenharmony_ci if (str16Len < 1) { 2033f4cbf05Sopenharmony_ci return false; 2043f4cbf05Sopenharmony_ci } 2053f4cbf05Sopenharmony_ci 2063f4cbf05Sopenharmony_ci char* str8Temp = Char16ToChar8(str16.c_str(), str16Len); 2073f4cbf05Sopenharmony_ci if (str8Temp == nullptr) { 2083f4cbf05Sopenharmony_ci UTILS_LOGD("Str16 to str8 failed, because str8Temp is nullptr!"); 2093f4cbf05Sopenharmony_ci return false; 2103f4cbf05Sopenharmony_ci } 2113f4cbf05Sopenharmony_ci 2123f4cbf05Sopenharmony_ci str8 = str8Temp; 2133f4cbf05Sopenharmony_ci free(str8Temp); 2143f4cbf05Sopenharmony_ci str8Temp = nullptr; 2153f4cbf05Sopenharmony_ci return true; 2163f4cbf05Sopenharmony_ci} 2173f4cbf05Sopenharmony_ci 2183f4cbf05Sopenharmony_ci/** 2193f4cbf05Sopenharmony_ci* return 1-4 by first byte 2203f4cbf05Sopenharmony_ci* 1111xxxx : 4 2213f4cbf05Sopenharmony_ci* 1110xxxx : 3 2223f4cbf05Sopenharmony_ci* 110xxxxx : 2 2233f4cbf05Sopenharmony_ci* 10xxxxxx : 1 2243f4cbf05Sopenharmony_ci* 0xxxxxxx : 1 2253f4cbf05Sopenharmony_ci*/ 2263f4cbf05Sopenharmony_cistatic inline size_t Utf8CodePointLen(uint8_t ch) 2273f4cbf05Sopenharmony_ci{ 2283f4cbf05Sopenharmony_ci return ((0xe5000000 >> ((ch >> UTF8_LEN_MASK) & 0x1e)) & UTF8_LEN_MASK) + 1; 2293f4cbf05Sopenharmony_ci} 2303f4cbf05Sopenharmony_ci 2313f4cbf05Sopenharmony_cistatic inline void Utf8ShiftAndMask(uint32_t* codePoint, const uint8_t byte) 2323f4cbf05Sopenharmony_ci{ 2333f4cbf05Sopenharmony_ci *codePoint <<= UTF8_SHIFT_WIDTH; 2343f4cbf05Sopenharmony_ci *codePoint |= 0x3F & byte; 2353f4cbf05Sopenharmony_ci} 2363f4cbf05Sopenharmony_ci 2373f4cbf05Sopenharmony_ciuint32_t Utf8ToUtf32CodePoint(const char* src, size_t length) 2383f4cbf05Sopenharmony_ci{ 2393f4cbf05Sopenharmony_ci uint32_t unicode = 0; 2403f4cbf05Sopenharmony_ci 2413f4cbf05Sopenharmony_ci switch (length) { 2423f4cbf05Sopenharmony_ci case UTF8_LENGTH_1: 2433f4cbf05Sopenharmony_ci return src[UTF8_FIRST_BYTE_INDEX]; 2443f4cbf05Sopenharmony_ci case UTF8_LENGTH_2: 2453f4cbf05Sopenharmony_ci unicode = src[UTF8_FIRST_BYTE_INDEX] & 0x1f; 2463f4cbf05Sopenharmony_ci Utf8ShiftAndMask(&unicode, src[UTF8_SECOND_BYTE_INDEX]); 2473f4cbf05Sopenharmony_ci return unicode; 2483f4cbf05Sopenharmony_ci case UTF8_LENGTH_3: 2493f4cbf05Sopenharmony_ci unicode = src[UTF8_FIRST_BYTE_INDEX] & 0x0f; 2503f4cbf05Sopenharmony_ci Utf8ShiftAndMask(&unicode, src[UTF8_SECOND_BYTE_INDEX]); 2513f4cbf05Sopenharmony_ci Utf8ShiftAndMask(&unicode, src[UTF8_THIRD_BYTE_INDEX]); 2523f4cbf05Sopenharmony_ci return unicode; 2533f4cbf05Sopenharmony_ci case UTF8_LENGTH_4: 2543f4cbf05Sopenharmony_ci unicode = src[UTF8_FIRST_BYTE_INDEX] & 0x07; 2553f4cbf05Sopenharmony_ci Utf8ShiftAndMask(&unicode, src[UTF8_SECOND_BYTE_INDEX]); 2563f4cbf05Sopenharmony_ci Utf8ShiftAndMask(&unicode, src[UTF8_THIRD_BYTE_INDEX]); 2573f4cbf05Sopenharmony_ci Utf8ShiftAndMask(&unicode, src[UTF8_FORTH_BYTE_INDEX]); 2583f4cbf05Sopenharmony_ci return unicode; 2593f4cbf05Sopenharmony_ci default: 2603f4cbf05Sopenharmony_ci return 0xffff; 2613f4cbf05Sopenharmony_ci } 2623f4cbf05Sopenharmony_ci} 2633f4cbf05Sopenharmony_ci 2643f4cbf05Sopenharmony_ciint Utf8ToUtf16Length(const char* str8, size_t str8Len) 2653f4cbf05Sopenharmony_ci{ 2663f4cbf05Sopenharmony_ci const char* const str8end = str8 + str8Len; 2673f4cbf05Sopenharmony_ci int utf16len = 0; 2683f4cbf05Sopenharmony_ci while (str8 < str8end) { 2693f4cbf05Sopenharmony_ci utf16len++; 2703f4cbf05Sopenharmony_ci size_t u8charlen = Utf8CodePointLen(*str8); 2713f4cbf05Sopenharmony_ci if (str8 + u8charlen - 1 >= str8end) { 2723f4cbf05Sopenharmony_ci UTILS_LOGE("Get str16 length failed because str8 unicode is illegal!"); 2733f4cbf05Sopenharmony_ci return -1; 2743f4cbf05Sopenharmony_ci } 2753f4cbf05Sopenharmony_ci uint32_t codepoint = Utf8ToUtf32CodePoint(str8, u8charlen); 2763f4cbf05Sopenharmony_ci if (codepoint > 0xFFFF) { 2773f4cbf05Sopenharmony_ci utf16len++; // this will be a surrogate pair in utf16 2783f4cbf05Sopenharmony_ci } 2793f4cbf05Sopenharmony_ci str8 += u8charlen; 2803f4cbf05Sopenharmony_ci } 2813f4cbf05Sopenharmony_ci if (str8 != str8end) { 2823f4cbf05Sopenharmony_ci UTILS_LOGE("Get str16 length failed because str8length is illegal!"); 2833f4cbf05Sopenharmony_ci return -1; 2843f4cbf05Sopenharmony_ci } 2853f4cbf05Sopenharmony_ci return utf16len; 2863f4cbf05Sopenharmony_ci} 2873f4cbf05Sopenharmony_ci 2883f4cbf05Sopenharmony_cichar16_t* Utf8ToUtf16(const char* utf8Str, size_t u8len, char16_t* u16str, size_t u16len) 2893f4cbf05Sopenharmony_ci{ 2903f4cbf05Sopenharmony_ci if (u16len == 0) { 2913f4cbf05Sopenharmony_ci return u16str; 2923f4cbf05Sopenharmony_ci } 2933f4cbf05Sopenharmony_ci const char* const u8end = utf8Str + u8len; 2943f4cbf05Sopenharmony_ci const char* u8cur = utf8Str; 2953f4cbf05Sopenharmony_ci const char16_t* const u16end = u16str + u16len; 2963f4cbf05Sopenharmony_ci char16_t* u16cur = u16str; 2973f4cbf05Sopenharmony_ci 2983f4cbf05Sopenharmony_ci while ((u8cur < u8end) && (u16cur < u16end)) { 2993f4cbf05Sopenharmony_ci size_t len = Utf8CodePointLen(*u8cur); 3003f4cbf05Sopenharmony_ci uint32_t codepoint = Utf8ToUtf32CodePoint(u8cur, len); 3013f4cbf05Sopenharmony_ci // Convert the UTF32 codepoint to one or more UTF16 codepoints 3023f4cbf05Sopenharmony_ci if (codepoint <= 0xFFFF) { 3033f4cbf05Sopenharmony_ci // Single UTF16 character 3043f4cbf05Sopenharmony_ci *u16cur++ = static_cast<char16_t>(codepoint); 3053f4cbf05Sopenharmony_ci } else { 3063f4cbf05Sopenharmony_ci // Multiple UTF16 characters with surrogates 3073f4cbf05Sopenharmony_ci codepoint = codepoint - 0x10000; 3083f4cbf05Sopenharmony_ci *u16cur++ = static_cast<char16_t>((codepoint >> UTF16_SHIFT_WIDTH) + 0xD800); 3093f4cbf05Sopenharmony_ci if (u16cur >= u16end) { 3103f4cbf05Sopenharmony_ci // Ooops... not enough room for this surrogate pair. 3113f4cbf05Sopenharmony_ci return u16cur - 1; 3123f4cbf05Sopenharmony_ci } 3133f4cbf05Sopenharmony_ci *u16cur++ = static_cast<char16_t>((codepoint & 0x3FF) + 0xDC00); 3143f4cbf05Sopenharmony_ci } 3153f4cbf05Sopenharmony_ci 3163f4cbf05Sopenharmony_ci u8cur += len; 3173f4cbf05Sopenharmony_ci } 3183f4cbf05Sopenharmony_ci return u16cur; 3193f4cbf05Sopenharmony_ci} 3203f4cbf05Sopenharmony_ci 3213f4cbf05Sopenharmony_civoid StrncpyStr8ToStr16(const char* utf8Str, size_t u8len, char16_t* u16str, size_t u16len) 3223f4cbf05Sopenharmony_ci{ 3233f4cbf05Sopenharmony_ci char16_t* result = Utf8ToUtf16(utf8Str, u8len, u16str, u16len - 1); 3243f4cbf05Sopenharmony_ci *result = 0; 3253f4cbf05Sopenharmony_ci return; 3263f4cbf05Sopenharmony_ci} 3273f4cbf05Sopenharmony_ci 3283f4cbf05Sopenharmony_ci// inner function and str8 is not null 3293f4cbf05Sopenharmony_cichar16_t* Char8ToChar16(const char* str8, size_t str8Len) 3303f4cbf05Sopenharmony_ci{ 3313f4cbf05Sopenharmony_ci char16_t* str16 = nullptr; 3323f4cbf05Sopenharmony_ci int utf16Len = Utf8ToUtf16Length(str8, str8Len); 3333f4cbf05Sopenharmony_ci if (utf16Len < 0) { 3343f4cbf05Sopenharmony_ci UTILS_LOGE("Get str16 length failed,length is: %{public}d", utf16Len); 3353f4cbf05Sopenharmony_ci return nullptr; 3363f4cbf05Sopenharmony_ci } 3373f4cbf05Sopenharmony_ci 3383f4cbf05Sopenharmony_ci // Allow for closing 0 3393f4cbf05Sopenharmony_ci utf16Len = utf16Len + 1; 3403f4cbf05Sopenharmony_ci str16 = reinterpret_cast<char16_t*>(calloc(utf16Len, sizeof(char16_t))); 3413f4cbf05Sopenharmony_ci if (str16 == nullptr) { 3423f4cbf05Sopenharmony_ci UTILS_LOGE("Str16 malloc memory failed!"); 3433f4cbf05Sopenharmony_ci return nullptr; 3443f4cbf05Sopenharmony_ci } 3453f4cbf05Sopenharmony_ci 3463f4cbf05Sopenharmony_ci StrncpyStr8ToStr16(str8, str8Len, str16, utf16Len); 3473f4cbf05Sopenharmony_ci return str16; 3483f4cbf05Sopenharmony_ci} 3493f4cbf05Sopenharmony_ci 3503f4cbf05Sopenharmony_cibool String8ToString16(const string& str8, u16string& str16) 3513f4cbf05Sopenharmony_ci{ 3523f4cbf05Sopenharmony_ci size_t str8len = str8.length(); 3533f4cbf05Sopenharmony_ci if (str8len < 1) { 3543f4cbf05Sopenharmony_ci return false; 3553f4cbf05Sopenharmony_ci } 3563f4cbf05Sopenharmony_ci 3573f4cbf05Sopenharmony_ci char16_t* str16Temp = Char8ToChar16(str8.c_str(), str8len); 3583f4cbf05Sopenharmony_ci if (str16Temp == nullptr) { 3593f4cbf05Sopenharmony_ci UTILS_LOGD("str8 to str16 failed, str16Temp is nullptr!"); 3603f4cbf05Sopenharmony_ci return false; 3613f4cbf05Sopenharmony_ci } 3623f4cbf05Sopenharmony_ci 3633f4cbf05Sopenharmony_ci str16 = str16Temp; 3643f4cbf05Sopenharmony_ci free(str16Temp); 3653f4cbf05Sopenharmony_ci str16Temp = nullptr; 3663f4cbf05Sopenharmony_ci return true; 3673f4cbf05Sopenharmony_ci} 3683f4cbf05Sopenharmony_ci} // namespace OHOS 369