13f4cbf05Sopenharmony_ci/*
23f4cbf05Sopenharmony_ci * Copyright (c) 2021 Huawei Device Co., Ltd.
33f4cbf05Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License");
43f4cbf05Sopenharmony_ci * you may not use this file except in compliance with the License.
53f4cbf05Sopenharmony_ci * You may obtain a copy of the License at
63f4cbf05Sopenharmony_ci *
73f4cbf05Sopenharmony_ci *     http://www.apache.org/licenses/LICENSE-2.0
83f4cbf05Sopenharmony_ci *
93f4cbf05Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software
103f4cbf05Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS,
113f4cbf05Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
123f4cbf05Sopenharmony_ci * See the License for the specific language governing permissions and
133f4cbf05Sopenharmony_ci * limitations under the License.
143f4cbf05Sopenharmony_ci */
153f4cbf05Sopenharmony_ci
163f4cbf05Sopenharmony_ci#include "unicode_ex.h"
173f4cbf05Sopenharmony_ci
183f4cbf05Sopenharmony_ci#include <climits>
193f4cbf05Sopenharmony_ci#include <cstdio>
203f4cbf05Sopenharmony_ci#include <cstdlib>
213f4cbf05Sopenharmony_ci
223f4cbf05Sopenharmony_ci#include "utils_log.h"
233f4cbf05Sopenharmony_ciusing namespace std;
243f4cbf05Sopenharmony_ci/***************************************UTF8 and UTF16 unicode**********************************************
253f4cbf05Sopenharmony_ciUTF8
263f4cbf05Sopenharmony_ciUnicode                                 utf8
273f4cbf05Sopenharmony_ciU + 0000~U + 007F                    0???????
283f4cbf05Sopenharmony_ciU + 0080~U + 07FF                    110????? 10??????
293f4cbf05Sopenharmony_ciU + 0800~U + FFFF                    1110???? 10?????? 10??????
303f4cbf05Sopenharmony_ciU + 10000~U + 10FFFF                 11110??? 10?????? 10?????? 10??????
313f4cbf05Sopenharmony_ci
323f4cbf05Sopenharmony_ciUTF16
333f4cbf05Sopenharmony_ciUnicode                           utf16 code
343f4cbf05Sopenharmony_ci
353f4cbf05Sopenharmony_ciU + 000~U + FFFF                    2 Byte save, same with Unicode
363f4cbf05Sopenharmony_ciU + 10000~U + 10FFFF                4 Byte save Unicode 0x10000
373f4cbf05Sopenharmony_ci**************************************UTF8 and UTF16 unicode**********************************************/
383f4cbf05Sopenharmony_cinamespace OHOS {
393f4cbf05Sopenharmony_cinamespace {
403f4cbf05Sopenharmony_ciconstexpr char32_t ONE_BYTE_UTF8 = 0x00000080;
413f4cbf05Sopenharmony_ciconstexpr char32_t TWO_BYTES_UTF8 = 0x00000800;
423f4cbf05Sopenharmony_ciconstexpr char32_t THREE_BYTES_UTF8 = 0x00010000;
433f4cbf05Sopenharmony_ci
443f4cbf05Sopenharmony_ci
453f4cbf05Sopenharmony_ciconstexpr char32_t UNICODE_RESERVED_START = 0x0000D800;
463f4cbf05Sopenharmony_ciconstexpr char32_t UNICODE_RESERVED_END = 0x0000DFFF;
473f4cbf05Sopenharmony_ciconstexpr char32_t UNICODE_MAX_NUM = 0x0010FFFF;
483f4cbf05Sopenharmony_ciconstexpr unsigned int UTF8_OFFSET = 6;
493f4cbf05Sopenharmony_ci
503f4cbf05Sopenharmony_ciconstexpr char32_t UTF8_BYTE_MASK = 0x000000BF;
513f4cbf05Sopenharmony_ciconstexpr char32_t UTF8_BYTE_MARK = 0x00000080;
523f4cbf05Sopenharmony_ciconstexpr char32_t UTF8_FIRST_BYTE_MARK[] = {
533f4cbf05Sopenharmony_ci    0x00000000, 0x00000000, 0x000000C0, 0x000000E0, 0x000000F0
543f4cbf05Sopenharmony_ci};
553f4cbf05Sopenharmony_ci}
563f4cbf05Sopenharmony_ci
573f4cbf05Sopenharmony_ci#define UTF8_LENGTH_INVALID 0
583f4cbf05Sopenharmony_ci#define UTF8_LENGTH_1 1
593f4cbf05Sopenharmony_ci#define UTF8_LENGTH_2 2
603f4cbf05Sopenharmony_ci#define UTF8_LENGTH_3 3
613f4cbf05Sopenharmony_ci#define UTF8_LENGTH_4 4
623f4cbf05Sopenharmony_ci#define UTF8_LEN_MASK 3
633f4cbf05Sopenharmony_ci#define UTF8_FIRST_BYTE_INDEX 0
643f4cbf05Sopenharmony_ci#define UTF8_SECOND_BYTE_INDEX 1
653f4cbf05Sopenharmony_ci#define UTF8_THIRD_BYTE_INDEX 2
663f4cbf05Sopenharmony_ci#define UTF8_FORTH_BYTE_INDEX 3
673f4cbf05Sopenharmony_ci#define UTF8_SHIFT_WIDTH 6
683f4cbf05Sopenharmony_ci#define STR16_TO_STR8_SHIFT_WIDTH 10
693f4cbf05Sopenharmony_ci#define UTF16_SHIFT_WIDTH 10
703f4cbf05Sopenharmony_ci#define UTF32_BYTE_SIZE_1 1
713f4cbf05Sopenharmony_ci#define UTF32_BYTE_SIZE_2 2
723f4cbf05Sopenharmony_ci#define UTF32_BYTE_SIZE_3 3
733f4cbf05Sopenharmony_ci#define UTF32_BYTE_SIZE_4 4
743f4cbf05Sopenharmony_ci
753f4cbf05Sopenharmony_ci// inner func and dstP is not nullptr
763f4cbf05Sopenharmony_civoid Utf32CodePointToUtf8(uint8_t* dstP, char32_t srcChar, size_t bytes)
773f4cbf05Sopenharmony_ci{
783f4cbf05Sopenharmony_ci    dstP += bytes;
793f4cbf05Sopenharmony_ci    if (bytes >= UTF32_BYTE_SIZE_4) {
803f4cbf05Sopenharmony_ci        *--dstP = static_cast<uint8_t>((srcChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK);
813f4cbf05Sopenharmony_ci        srcChar >>= UTF8_OFFSET;
823f4cbf05Sopenharmony_ci    }
833f4cbf05Sopenharmony_ci
843f4cbf05Sopenharmony_ci    if (bytes >= UTF32_BYTE_SIZE_3) {
853f4cbf05Sopenharmony_ci        *--dstP = static_cast<uint8_t>((srcChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK);
863f4cbf05Sopenharmony_ci        srcChar >>= UTF8_OFFSET;
873f4cbf05Sopenharmony_ci    }
883f4cbf05Sopenharmony_ci
893f4cbf05Sopenharmony_ci    if (bytes >= UTF32_BYTE_SIZE_2) {
903f4cbf05Sopenharmony_ci        *--dstP = static_cast<uint8_t>((srcChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK);
913f4cbf05Sopenharmony_ci        srcChar >>= UTF8_OFFSET;
923f4cbf05Sopenharmony_ci    }
933f4cbf05Sopenharmony_ci
943f4cbf05Sopenharmony_ci    if (bytes >= UTF32_BYTE_SIZE_1) {
953f4cbf05Sopenharmony_ci        *--dstP = static_cast<uint8_t>(srcChar | UTF8_FIRST_BYTE_MARK[bytes]);
963f4cbf05Sopenharmony_ci    }
973f4cbf05Sopenharmony_ci}
983f4cbf05Sopenharmony_ci
993f4cbf05Sopenharmony_cisize_t Utf32CodePointUtf8Length(char32_t srcChar)
1003f4cbf05Sopenharmony_ci{
1013f4cbf05Sopenharmony_ci    if (srcChar < ONE_BYTE_UTF8) {
1023f4cbf05Sopenharmony_ci        return UTF8_LENGTH_1;
1033f4cbf05Sopenharmony_ci    } else if (srcChar < TWO_BYTES_UTF8) {
1043f4cbf05Sopenharmony_ci        return UTF8_LENGTH_2;
1053f4cbf05Sopenharmony_ci    } else if (srcChar < THREE_BYTES_UTF8) {
1063f4cbf05Sopenharmony_ci        if ((srcChar < UNICODE_RESERVED_START) || (srcChar > UNICODE_RESERVED_END)) {
1073f4cbf05Sopenharmony_ci            return UTF8_LENGTH_3;
1083f4cbf05Sopenharmony_ci        } else {
1093f4cbf05Sopenharmony_ci            // Surrogates are invalid UTF-32 characters.
1103f4cbf05Sopenharmony_ci            return UTF8_LENGTH_INVALID;
1113f4cbf05Sopenharmony_ci        }
1123f4cbf05Sopenharmony_ci    } else if (srcChar <= UNICODE_MAX_NUM) {
1133f4cbf05Sopenharmony_ci        // Max code point for Unicode is 0x0010FFFF.
1143f4cbf05Sopenharmony_ci        return UTF8_LENGTH_4;
1153f4cbf05Sopenharmony_ci    } else {
1163f4cbf05Sopenharmony_ci        // Invalid UTF-32 character.
1173f4cbf05Sopenharmony_ci        return UTF8_LENGTH_INVALID;
1183f4cbf05Sopenharmony_ci    }
1193f4cbf05Sopenharmony_ci}
1203f4cbf05Sopenharmony_ci
1213f4cbf05Sopenharmony_ci// get the length of utf8 from utf16
1223f4cbf05Sopenharmony_ciint Utf16ToUtf8Length(const char16_t* str16, size_t str16Len)
1233f4cbf05Sopenharmony_ci{
1243f4cbf05Sopenharmony_ci    if (str16 == nullptr || str16Len == 0) {
1253f4cbf05Sopenharmony_ci        return -1;
1263f4cbf05Sopenharmony_ci    }
1273f4cbf05Sopenharmony_ci
1283f4cbf05Sopenharmony_ci    const char16_t* const str16End = str16 + str16Len;
1293f4cbf05Sopenharmony_ci    int utf8Len = 0;
1303f4cbf05Sopenharmony_ci    while (str16 < str16End) {
1313f4cbf05Sopenharmony_ci        int charLen = 0;
1323f4cbf05Sopenharmony_ci        if (((*str16 & 0xFC00) == 0xD800) && ((str16 + 1) < str16End)
1333f4cbf05Sopenharmony_ci            && ((*(str16 + 1) & 0xFC00) == 0xDC00)) {
1343f4cbf05Sopenharmony_ci            // surrogate pairs are always 4 bytes.
1353f4cbf05Sopenharmony_ci            charLen = 4;
1363f4cbf05Sopenharmony_ci            // str16 advance 2 bytes
1373f4cbf05Sopenharmony_ci            str16 += 2;
1383f4cbf05Sopenharmony_ci        } else {
1393f4cbf05Sopenharmony_ci            charLen = Utf32CodePointUtf8Length(static_cast<char32_t>(*str16++));
1403f4cbf05Sopenharmony_ci        }
1413f4cbf05Sopenharmony_ci
1423f4cbf05Sopenharmony_ci        if (utf8Len > (INT_MAX - charLen)) {
1433f4cbf05Sopenharmony_ci            return -1;
1443f4cbf05Sopenharmony_ci        }
1453f4cbf05Sopenharmony_ci        utf8Len += charLen;
1463f4cbf05Sopenharmony_ci    }
1473f4cbf05Sopenharmony_ci    return utf8Len;
1483f4cbf05Sopenharmony_ci}
1493f4cbf05Sopenharmony_ci
1503f4cbf05Sopenharmony_ci// inner function, utf8Str and utf16Str is not nullptr
1513f4cbf05Sopenharmony_civoid StrncpyStr16ToStr8(const char16_t* utf16Str, size_t str16Len, char* utf8Str, size_t str8Len)
1523f4cbf05Sopenharmony_ci{
1533f4cbf05Sopenharmony_ci    const char16_t* curUtf16 = utf16Str;
1543f4cbf05Sopenharmony_ci    const char16_t* const endUtf16 = utf16Str + str16Len;
1553f4cbf05Sopenharmony_ci    char* cur = utf8Str;
1563f4cbf05Sopenharmony_ci    while (curUtf16 < endUtf16) {
1573f4cbf05Sopenharmony_ci        char32_t utf32;
1583f4cbf05Sopenharmony_ci        // surrogate pairs
1593f4cbf05Sopenharmony_ci        if (((*curUtf16 & 0xFC00) == 0xD800) && ((curUtf16 + 1) < endUtf16)
1603f4cbf05Sopenharmony_ci            && (((*(curUtf16 + 1) & 0xFC00)) == 0xDC00)) {
1613f4cbf05Sopenharmony_ci            utf32 = (*curUtf16++ - 0xD800) << STR16_TO_STR8_SHIFT_WIDTH;
1623f4cbf05Sopenharmony_ci            utf32 |= *curUtf16++ - 0xDC00;
1633f4cbf05Sopenharmony_ci            utf32 += 0x10000;
1643f4cbf05Sopenharmony_ci        } else {
1653f4cbf05Sopenharmony_ci            utf32 = static_cast<char32_t>(*curUtf16++);
1663f4cbf05Sopenharmony_ci        }
1673f4cbf05Sopenharmony_ci        const size_t len = Utf32CodePointUtf8Length(utf32);
1683f4cbf05Sopenharmony_ci        if (str8Len < len) {
1693f4cbf05Sopenharmony_ci            break;
1703f4cbf05Sopenharmony_ci        }
1713f4cbf05Sopenharmony_ci
1723f4cbf05Sopenharmony_ci        Utf32CodePointToUtf8(reinterpret_cast<uint8_t*>(cur), utf32, len);
1733f4cbf05Sopenharmony_ci        cur += len;
1743f4cbf05Sopenharmony_ci        str8Len -= len;
1753f4cbf05Sopenharmony_ci    }
1763f4cbf05Sopenharmony_ci    *cur = '\0';
1773f4cbf05Sopenharmony_ci}
1783f4cbf05Sopenharmony_ci
1793f4cbf05Sopenharmony_ci// inner function and str16 is not null
1803f4cbf05Sopenharmony_cichar* Char16ToChar8(const char16_t* str16, size_t str16Len)
1813f4cbf05Sopenharmony_ci{
1823f4cbf05Sopenharmony_ci    char* str8 = nullptr;
1833f4cbf05Sopenharmony_ci    int utf8Len = Utf16ToUtf8Length(str16, str16Len);
1843f4cbf05Sopenharmony_ci    if (utf8Len < 0 || utf8Len >= INT_MAX) {
1853f4cbf05Sopenharmony_ci        return nullptr;
1863f4cbf05Sopenharmony_ci    }
1873f4cbf05Sopenharmony_ci
1883f4cbf05Sopenharmony_ci    // Allow for closing '\0'
1893f4cbf05Sopenharmony_ci    utf8Len += 1;
1903f4cbf05Sopenharmony_ci    str8 = reinterpret_cast<char*>(calloc(utf8Len, sizeof(char)));
1913f4cbf05Sopenharmony_ci    if (str8 == nullptr) {
1923f4cbf05Sopenharmony_ci        return nullptr;
1933f4cbf05Sopenharmony_ci    }
1943f4cbf05Sopenharmony_ci
1953f4cbf05Sopenharmony_ci    StrncpyStr16ToStr8(str16, str16Len, str8, utf8Len);
1963f4cbf05Sopenharmony_ci    return str8;
1973f4cbf05Sopenharmony_ci}
1983f4cbf05Sopenharmony_ci
1993f4cbf05Sopenharmony_cibool String16ToString8(const u16string& str16, string& str8)
2003f4cbf05Sopenharmony_ci{
2013f4cbf05Sopenharmony_ci    size_t str16Len = str16.length();
2023f4cbf05Sopenharmony_ci    if (str16Len < 1) {
2033f4cbf05Sopenharmony_ci        return false;
2043f4cbf05Sopenharmony_ci    }
2053f4cbf05Sopenharmony_ci
2063f4cbf05Sopenharmony_ci    char* str8Temp = Char16ToChar8(str16.c_str(), str16Len);
2073f4cbf05Sopenharmony_ci    if (str8Temp == nullptr) {
2083f4cbf05Sopenharmony_ci        UTILS_LOGD("Str16 to str8 failed, because str8Temp is nullptr!");
2093f4cbf05Sopenharmony_ci        return false;
2103f4cbf05Sopenharmony_ci    }
2113f4cbf05Sopenharmony_ci
2123f4cbf05Sopenharmony_ci    str8 = str8Temp;
2133f4cbf05Sopenharmony_ci    free(str8Temp);
2143f4cbf05Sopenharmony_ci    str8Temp = nullptr;
2153f4cbf05Sopenharmony_ci    return true;
2163f4cbf05Sopenharmony_ci}
2173f4cbf05Sopenharmony_ci
2183f4cbf05Sopenharmony_ci/**
2193f4cbf05Sopenharmony_ci* return 1-4 by first byte
2203f4cbf05Sopenharmony_ci* 1111xxxx : 4
2213f4cbf05Sopenharmony_ci* 1110xxxx : 3
2223f4cbf05Sopenharmony_ci* 110xxxxx : 2
2233f4cbf05Sopenharmony_ci* 10xxxxxx : 1
2243f4cbf05Sopenharmony_ci* 0xxxxxxx : 1
2253f4cbf05Sopenharmony_ci*/
2263f4cbf05Sopenharmony_cistatic inline size_t Utf8CodePointLen(uint8_t ch)
2273f4cbf05Sopenharmony_ci{
2283f4cbf05Sopenharmony_ci    return ((0xe5000000 >> ((ch >> UTF8_LEN_MASK) & 0x1e)) & UTF8_LEN_MASK) + 1;
2293f4cbf05Sopenharmony_ci}
2303f4cbf05Sopenharmony_ci
2313f4cbf05Sopenharmony_cistatic inline void Utf8ShiftAndMask(uint32_t* codePoint, const uint8_t byte)
2323f4cbf05Sopenharmony_ci{
2333f4cbf05Sopenharmony_ci    *codePoint <<= UTF8_SHIFT_WIDTH;
2343f4cbf05Sopenharmony_ci    *codePoint |= 0x3F & byte;
2353f4cbf05Sopenharmony_ci}
2363f4cbf05Sopenharmony_ci
2373f4cbf05Sopenharmony_ciuint32_t Utf8ToUtf32CodePoint(const char* src, size_t length)
2383f4cbf05Sopenharmony_ci{
2393f4cbf05Sopenharmony_ci    uint32_t unicode = 0;
2403f4cbf05Sopenharmony_ci
2413f4cbf05Sopenharmony_ci    switch (length) {
2423f4cbf05Sopenharmony_ci        case UTF8_LENGTH_1:
2433f4cbf05Sopenharmony_ci            return src[UTF8_FIRST_BYTE_INDEX];
2443f4cbf05Sopenharmony_ci        case UTF8_LENGTH_2:
2453f4cbf05Sopenharmony_ci            unicode = src[UTF8_FIRST_BYTE_INDEX] & 0x1f;
2463f4cbf05Sopenharmony_ci            Utf8ShiftAndMask(&unicode, src[UTF8_SECOND_BYTE_INDEX]);
2473f4cbf05Sopenharmony_ci            return unicode;
2483f4cbf05Sopenharmony_ci        case UTF8_LENGTH_3:
2493f4cbf05Sopenharmony_ci            unicode = src[UTF8_FIRST_BYTE_INDEX] & 0x0f;
2503f4cbf05Sopenharmony_ci            Utf8ShiftAndMask(&unicode, src[UTF8_SECOND_BYTE_INDEX]);
2513f4cbf05Sopenharmony_ci            Utf8ShiftAndMask(&unicode, src[UTF8_THIRD_BYTE_INDEX]);
2523f4cbf05Sopenharmony_ci            return unicode;
2533f4cbf05Sopenharmony_ci        case UTF8_LENGTH_4:
2543f4cbf05Sopenharmony_ci            unicode = src[UTF8_FIRST_BYTE_INDEX] & 0x07;
2553f4cbf05Sopenharmony_ci            Utf8ShiftAndMask(&unicode, src[UTF8_SECOND_BYTE_INDEX]);
2563f4cbf05Sopenharmony_ci            Utf8ShiftAndMask(&unicode, src[UTF8_THIRD_BYTE_INDEX]);
2573f4cbf05Sopenharmony_ci            Utf8ShiftAndMask(&unicode, src[UTF8_FORTH_BYTE_INDEX]);
2583f4cbf05Sopenharmony_ci            return unicode;
2593f4cbf05Sopenharmony_ci        default:
2603f4cbf05Sopenharmony_ci            return 0xffff;
2613f4cbf05Sopenharmony_ci    }
2623f4cbf05Sopenharmony_ci}
2633f4cbf05Sopenharmony_ci
2643f4cbf05Sopenharmony_ciint Utf8ToUtf16Length(const char* str8, size_t str8Len)
2653f4cbf05Sopenharmony_ci{
2663f4cbf05Sopenharmony_ci    const char* const str8end = str8 + str8Len;
2673f4cbf05Sopenharmony_ci    int utf16len = 0;
2683f4cbf05Sopenharmony_ci    while (str8 < str8end) {
2693f4cbf05Sopenharmony_ci        utf16len++;
2703f4cbf05Sopenharmony_ci        size_t u8charlen = Utf8CodePointLen(*str8);
2713f4cbf05Sopenharmony_ci        if (str8 + u8charlen - 1 >= str8end) {
2723f4cbf05Sopenharmony_ci            UTILS_LOGE("Get str16 length failed because str8 unicode is illegal!");
2733f4cbf05Sopenharmony_ci            return -1;
2743f4cbf05Sopenharmony_ci        }
2753f4cbf05Sopenharmony_ci        uint32_t codepoint = Utf8ToUtf32CodePoint(str8, u8charlen);
2763f4cbf05Sopenharmony_ci        if (codepoint > 0xFFFF) {
2773f4cbf05Sopenharmony_ci            utf16len++; // this will be a surrogate pair in utf16
2783f4cbf05Sopenharmony_ci        }
2793f4cbf05Sopenharmony_ci        str8 += u8charlen;
2803f4cbf05Sopenharmony_ci    }
2813f4cbf05Sopenharmony_ci    if (str8 != str8end) {
2823f4cbf05Sopenharmony_ci        UTILS_LOGE("Get str16 length failed because str8length is illegal!");
2833f4cbf05Sopenharmony_ci        return -1;
2843f4cbf05Sopenharmony_ci    }
2853f4cbf05Sopenharmony_ci    return utf16len;
2863f4cbf05Sopenharmony_ci}
2873f4cbf05Sopenharmony_ci
2883f4cbf05Sopenharmony_cichar16_t* Utf8ToUtf16(const char* utf8Str, size_t u8len, char16_t* u16str, size_t u16len)
2893f4cbf05Sopenharmony_ci{
2903f4cbf05Sopenharmony_ci    if (u16len == 0) {
2913f4cbf05Sopenharmony_ci        return u16str;
2923f4cbf05Sopenharmony_ci    }
2933f4cbf05Sopenharmony_ci    const char* const u8end = utf8Str + u8len;
2943f4cbf05Sopenharmony_ci    const char* u8cur = utf8Str;
2953f4cbf05Sopenharmony_ci    const char16_t* const u16end = u16str + u16len;
2963f4cbf05Sopenharmony_ci    char16_t* u16cur = u16str;
2973f4cbf05Sopenharmony_ci
2983f4cbf05Sopenharmony_ci    while ((u8cur < u8end) && (u16cur < u16end)) {
2993f4cbf05Sopenharmony_ci        size_t len = Utf8CodePointLen(*u8cur);
3003f4cbf05Sopenharmony_ci        uint32_t codepoint = Utf8ToUtf32CodePoint(u8cur, len);
3013f4cbf05Sopenharmony_ci        // Convert the UTF32 codepoint to one or more UTF16 codepoints
3023f4cbf05Sopenharmony_ci        if (codepoint <= 0xFFFF) {
3033f4cbf05Sopenharmony_ci            // Single UTF16 character
3043f4cbf05Sopenharmony_ci            *u16cur++ = static_cast<char16_t>(codepoint);
3053f4cbf05Sopenharmony_ci        } else {
3063f4cbf05Sopenharmony_ci            // Multiple UTF16 characters with surrogates
3073f4cbf05Sopenharmony_ci            codepoint = codepoint - 0x10000;
3083f4cbf05Sopenharmony_ci            *u16cur++ = static_cast<char16_t>((codepoint >> UTF16_SHIFT_WIDTH) + 0xD800);
3093f4cbf05Sopenharmony_ci            if (u16cur >= u16end) {
3103f4cbf05Sopenharmony_ci                // Ooops...  not enough room for this surrogate pair.
3113f4cbf05Sopenharmony_ci                return u16cur - 1;
3123f4cbf05Sopenharmony_ci            }
3133f4cbf05Sopenharmony_ci            *u16cur++ = static_cast<char16_t>((codepoint & 0x3FF) + 0xDC00);
3143f4cbf05Sopenharmony_ci        }
3153f4cbf05Sopenharmony_ci
3163f4cbf05Sopenharmony_ci        u8cur += len;
3173f4cbf05Sopenharmony_ci    }
3183f4cbf05Sopenharmony_ci    return u16cur;
3193f4cbf05Sopenharmony_ci}
3203f4cbf05Sopenharmony_ci
3213f4cbf05Sopenharmony_civoid StrncpyStr8ToStr16(const char* utf8Str, size_t u8len, char16_t* u16str, size_t u16len)
3223f4cbf05Sopenharmony_ci{
3233f4cbf05Sopenharmony_ci    char16_t* result = Utf8ToUtf16(utf8Str, u8len, u16str, u16len - 1);
3243f4cbf05Sopenharmony_ci    *result = 0;
3253f4cbf05Sopenharmony_ci    return;
3263f4cbf05Sopenharmony_ci}
3273f4cbf05Sopenharmony_ci
3283f4cbf05Sopenharmony_ci// inner function and str8 is not null
3293f4cbf05Sopenharmony_cichar16_t* Char8ToChar16(const char* str8, size_t str8Len)
3303f4cbf05Sopenharmony_ci{
3313f4cbf05Sopenharmony_ci    char16_t* str16 = nullptr;
3323f4cbf05Sopenharmony_ci    int utf16Len = Utf8ToUtf16Length(str8, str8Len);
3333f4cbf05Sopenharmony_ci    if (utf16Len < 0) {
3343f4cbf05Sopenharmony_ci        UTILS_LOGE("Get str16 length failed,length is: %{public}d", utf16Len);
3353f4cbf05Sopenharmony_ci        return nullptr;
3363f4cbf05Sopenharmony_ci    }
3373f4cbf05Sopenharmony_ci
3383f4cbf05Sopenharmony_ci    // Allow for closing 0
3393f4cbf05Sopenharmony_ci    utf16Len = utf16Len + 1;
3403f4cbf05Sopenharmony_ci    str16 = reinterpret_cast<char16_t*>(calloc(utf16Len, sizeof(char16_t)));
3413f4cbf05Sopenharmony_ci    if (str16 == nullptr) {
3423f4cbf05Sopenharmony_ci        UTILS_LOGE("Str16 malloc memory failed!");
3433f4cbf05Sopenharmony_ci        return nullptr;
3443f4cbf05Sopenharmony_ci    }
3453f4cbf05Sopenharmony_ci
3463f4cbf05Sopenharmony_ci    StrncpyStr8ToStr16(str8, str8Len, str16, utf16Len);
3473f4cbf05Sopenharmony_ci    return str16;
3483f4cbf05Sopenharmony_ci}
3493f4cbf05Sopenharmony_ci
3503f4cbf05Sopenharmony_cibool String8ToString16(const string& str8, u16string& str16)
3513f4cbf05Sopenharmony_ci{
3523f4cbf05Sopenharmony_ci    size_t str8len = str8.length();
3533f4cbf05Sopenharmony_ci    if (str8len < 1) {
3543f4cbf05Sopenharmony_ci        return false;
3553f4cbf05Sopenharmony_ci    }
3563f4cbf05Sopenharmony_ci
3573f4cbf05Sopenharmony_ci    char16_t* str16Temp = Char8ToChar16(str8.c_str(), str8len);
3583f4cbf05Sopenharmony_ci    if (str16Temp == nullptr) {
3593f4cbf05Sopenharmony_ci        UTILS_LOGD("str8 to str16 failed, str16Temp is nullptr!");
3603f4cbf05Sopenharmony_ci        return false;
3613f4cbf05Sopenharmony_ci    }
3623f4cbf05Sopenharmony_ci
3633f4cbf05Sopenharmony_ci    str16 = str16Temp;
3643f4cbf05Sopenharmony_ci    free(str16Temp);
3653f4cbf05Sopenharmony_ci    str16Temp = nullptr;
3663f4cbf05Sopenharmony_ci    return true;
3673f4cbf05Sopenharmony_ci}
3683f4cbf05Sopenharmony_ci} // namespace OHOS
369