1/*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include "ecmascript/base/utf_helper.h"
17#include "ecmascript/tests/test_helper.h"
18
19using namespace panda::ecmascript;
20using namespace panda::ecmascript::base;
21using namespace panda::ecmascript::base::utf_helper;
22
23namespace panda::test {
24class UtfHelperTest : public BaseTestWithScope<false> {
25};
26
27/*
28* @tc.name: CombineTwoU16
29* @tc.desc: Enter a pair of UTF16-encoded surrogate pair corresponding to the lead surrogates and trail surrogates,
30*           and return the corresponding Unicode codepoint value.
31* @tc.type: FUNC
32*/
33HWTEST_F_L0(UtfHelperTest, CombineTwoU16)
34{
35    uint16_t leadSur = 0xD800;
36    uint16_t trailSur = 0xDC00;
37    uint32_t codePoint = static_cast<uint32_t>(((leadSur - 0xD800U) << 10)|(trailSur - 0xDc00U)) + 0x10000U;
38    uint32_t utfHelperCodePoint = CombineTwoU16(leadSur, trailSur);
39    EXPECT_EQ(codePoint, utfHelperCodePoint);
40    EXPECT_EQ(codePoint, static_cast<uint32_t>(0x10000));
41    trailSur = 0xDFFF;
42    codePoint = static_cast<uint32_t>(((leadSur - 0xD800U) << 10) | (trailSur - 0xDC00U))+ 0x10000U;
43    utfHelperCodePoint = CombineTwoU16(leadSur, trailSur);
44    EXPECT_EQ(codePoint, utfHelperCodePoint);
45    EXPECT_EQ(codePoint, static_cast<uint32_t>(0x103FF));
46    leadSur = 0xDBFF;
47    codePoint = static_cast<uint32_t>(((leadSur - 0xD800U) << 10) | (trailSur - 0xDC00U)) + 0x10000U;
48    utfHelperCodePoint = CombineTwoU16(leadSur, trailSur);
49    EXPECT_EQ(codePoint, utfHelperCodePoint);
50    EXPECT_EQ(codePoint, static_cast<uint32_t>(0x10FFFF));
51    trailSur = 0xDC00;
52    codePoint = static_cast<uint32_t>(((leadSur - 0xD800U) << 10) | (trailSur - 0xDC00U)) + 0x10000U;
53    utfHelperCodePoint = CombineTwoU16(leadSur, trailSur);
54    EXPECT_EQ(codePoint, utfHelperCodePoint);
55    EXPECT_EQ(codePoint, static_cast<uint32_t>(0x10FC00));
56    leadSur = 0xD950;
57    trailSur = 0xDF21;
58    codePoint = static_cast<uint32_t>(((leadSur - 0xD800U)<< 10) | (trailSur - 0xDC00U)) + 0x10000U;
59    utfHelperCodePoint = CombineTwoU16(leadSur, trailSur);
60    EXPECT_EQ(codePoint, utfHelperCodePoint);
61    EXPECT_EQ(codePoint, static_cast<uint32_t>(0x64321));
62}
63
64/*
65* @tc.name: UTF16Decode
66* @tc.desc: Enter a pair of UTF16-encoded surrogate pair corresponding to the lead surrogates and trail surrogates,
67            Decodes them into corresponding Unicode codepoint values and returns.
68* @tc.type: FUNC
69*/
70HWTEST_F_L0(UtfHelperTest, UTF16Decode)
71{
72    uint16_t lead = 0xD950;
73    uint16_t trail = 0xDF21;
74    EXPECT_TRUE(lead >= DECODE_LEAD_LOW && lead <= DECODE_LEAD_HIGH);
75    EXPECT_TRUE(trail >= DECODE_TRAIL_LOW && trail <= DECODE_TRAIL_HIGH);
76    uint32_t codePoint = utf_helper::UTF16Decode(lead, trail);
77    EXPECT_EQ(codePoint, 0x64321U);
78    lead = 0xD85D;
79    trail = 0xDFCC;
80    EXPECT_TRUE(lead >= DECODE_LEAD_LOW && lead <= DECODE_LEAD_HIGH);
81    EXPECT_TRUE(trail >= DECODE_TRAIL_LOW && trail <= DECODE_TRAIL_HIGH);
82    codePoint = utf_helper::UTF16Decode(lead, trail);
83    EXPECT_EQ(codePoint, 0x277CCU);
84}
85
86/*
87 * @tc.name: IsValidUTF8
88 * @tc.desc: Judge whether an input group of symbols is a valid UTF8 coding sequence.
89 * @tc.type: FUNC
90 */
91HWTEST_F_L0(UtfHelperTest, IsValidUTF8)
92{
93    // 0xxxxxxx, min:0, max:127
94    const std::vector<uint8_t> utfDataOneBitVaild1 = {0x00};
95    const std::vector<uint8_t> utfDataOneBitVaild2 = {BIT_MASK_1 - 0x01};
96    const std::vector<uint8_t> utfDataOneBitInvaild = {BIT_MASK_1};
97    EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataOneBitVaild1));
98    EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataOneBitVaild2));
99    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataOneBitInvaild));
100    // 110xxxxx 10xxxxxx, min:128, max:2047
101    const std::vector<uint8_t> utfDataTwoBitVaild1 = {BIT_MASK_2 + 0x02, BIT_MASK_1};
102    const std::vector<uint8_t> utfDataTwoBitVaild2 = {BIT_MASK_3 - 0x01, BIT_MASK_2 - 0x01};
103    const std::vector<uint8_t> utfDataTwoBitInvaild1 = {BIT_MASK_2, BIT_MASK_2};
104    const std::vector<uint8_t> utfDataTwoBitInvaild2 = {BIT_MASK_3, BIT_MASK_1};
105    const std::vector<uint8_t> utfDataTwoBitInvaild3 = {BIT_MASK_2, BIT_MASK_1};
106    EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataTwoBitVaild1));
107    EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataTwoBitVaild2));
108    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataTwoBitInvaild1));
109    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataTwoBitInvaild2));
110    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataTwoBitInvaild3));
111    // 1110xxxx 10xxxxxx 10xxxxxx, min:2048, max:65535
112    const std::vector<uint8_t> utfDataThreeBitVaild1 = {BIT_MASK_3, BIT_MASK_1 + 0x20, BIT_MASK_1};
113    const std::vector<uint8_t> utfDataThreeBitVaild2 = {BIT_MASK_4 - 0x01, BIT_MASK_2 - 0x01, BIT_MASK_2 - 0x01};
114    const std::vector<uint8_t> utfDataThreeBitVaild3 = {BIT_MASK_3 + 0x01, BIT_MASK_1, BIT_MASK_1};
115    const std::vector<uint8_t> utfDataThreeBitInvaild1 = {BIT_MASK_3, BIT_MASK_1, BIT_MASK_2};
116    const std::vector<uint8_t> utfDataThreeBitInvaild2 = {BIT_MASK_3, BIT_MASK_2, BIT_MASK_1};
117    const std::vector<uint8_t> utfDataThreeBitInvaild3 = {BIT_MASK_4, BIT_MASK_1, BIT_MASK_1};
118    const std::vector<uint8_t> utfDataThreeBitInvaild4 = {BIT_MASK_4, BIT_MASK_2, BIT_MASK_2};
119    const std::vector<uint8_t> utfDataThreeBitInvaild5 = {BIT_MASK_3, BIT_MASK_1, BIT_MASK_1};
120    EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataThreeBitVaild1));
121    EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataThreeBitVaild2));
122    EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataThreeBitVaild3));
123    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataThreeBitInvaild1));
124    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataThreeBitInvaild2));
125    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataThreeBitInvaild3));
126    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataThreeBitInvaild4));
127    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataThreeBitInvaild5));
128    // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx, min:65536, max:1114111(0x10FFFF)
129    const std::vector<uint8_t> utfDataFourBitVaild1 = {BIT_MASK_4, BIT_MASK_1 + 0x10, BIT_MASK_1, BIT_MASK_1};
130    const std::vector<uint8_t> utfDataFourBitVaild3 = {BIT_MASK_4 + 0x01, BIT_MASK_1, BIT_MASK_1, BIT_MASK_1};
131    const std::vector<uint8_t> utfDataFourBitInvaild1 = {BIT_MASK_4, BIT_MASK_1, BIT_MASK_1, BIT_MASK_2};
132    const std::vector<uint8_t> utfDataFourBitInvaild2 = {BIT_MASK_4, BIT_MASK_1, BIT_MASK_2, BIT_MASK_1};
133    const std::vector<uint8_t> utfDataFourBitInvaild3 = {BIT_MASK_4, BIT_MASK_2, BIT_MASK_1, BIT_MASK_1};
134    const std::vector<uint8_t> utfDataFourBitInvaild4 = {BIT_MASK_5, BIT_MASK_1, BIT_MASK_1, BIT_MASK_1};
135    const std::vector<uint8_t> utfDataFourBitInvaild5 = {BIT_MASK_5, BIT_MASK_2, BIT_MASK_2, BIT_MASK_2};
136    const std::vector<uint8_t> utfDataFourBitInvaild6 = {BIT_MASK_4, BIT_MASK_1, BIT_MASK_1, BIT_MASK_1};
137    const std::vector<uint8_t> utfDataFourBitInvaild7 =
138        {BIT_MASK_5 - 0x01, BIT_MASK_2 - 0x01, BIT_MASK_2 - 0x01, BIT_MASK_2 - 0x01};
139    EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataFourBitVaild1));
140    EXPECT_TRUE(utf_helper::IsValidUTF8(utfDataFourBitVaild3));
141    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataFourBitInvaild1));
142    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataFourBitInvaild2));
143    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataFourBitInvaild3));
144    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataFourBitInvaild4));
145    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataFourBitInvaild5));
146    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataFourBitInvaild6));
147    EXPECT_FALSE(utf_helper::IsValidUTF8(utfDataFourBitInvaild7));
148}
149
150/*
151* @tc.name: ConvertUtf16ToUtf8
152* @tc.desc: Converts a UTF16 encoding sequence encoding a character into a UTF8 encoding sequence,
153*           and returns the sequence and the byte length of the sequence. The parameter "modify"
154*           indicates whether to perform special conversion for 0.
155* @tc.type: FUNC
156*/
157HWTEST_F_L0(UtfHelperTest, ConvertUtf16ToUtf8_001)
158{
159    // codePoint lie in [0,0x7F]--->UTF-8(length:1)
160    {
161        uint16_t utf16Data0 = 0x00;
162        uint16_t utf16Data1 = 0x00;
163        Utf8Char utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
164        Utf8Char utf8CharTemp = {0, {0x00U}};
165        EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
166        EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
167    }
168
169    // special case for \u0000 ==> Co80- 1100'0000 1000'0000
170    {
171        uint16_t utf16Data0 = 0x00;
172        uint16_t utf16Data1 = 0x00;
173        Utf8Char utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, true);
174        Utf8Char utf8CharTemp = {2, {UTF8_2B_FIRST, UTF8_2B_SECOND}};
175        EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
176        EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
177        utf16Data0 = 0x7F;
178        utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
179        utf8CharTemp = {1, {0x7F}};
180        EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
181        EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
182
183        // codePoint lie in [0x80,0x7FF]--> UTF-8(length:2)
184        utf16Data0 = 0x80;
185        utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
186        utf8CharTemp = {2, {UTF8_2B_FIRST + 0x02U, UTF8_2B_SECOND}};
187        EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
188        EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
189        utf16Data0 = 0x7FF;
190        utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
191        utf8CharTemp = {2, {BIT_MASK_3 - 0x01, BIT_MASK_2 - 0x01}};
192        EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
193        EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
194    }
195
196    // codePoint lie in [0xD800,0xDFFF]--> UTF-8(length:3)
197    {
198        uint16_t utf16Data0 = 0xD800;
199        uint16_t utf16Data1 = 0x00;
200        Utf8Char utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
201        Utf8Char utf8CharTemp = {3, {UTF8_3B_FIRST | static_cast<uint8_t>(0xD800 >> 12),
202                            UTF8_3B_SECOND | (static_cast<uint8_t>(0xD800 >> 6) & utf::MASK_6BIT),
203                            UTF8_3B_THIRD | (static_cast<uint8_t>(0xD800) & utf::MASK_6BIT)}};
204        EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
205        EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
206        utf16Data0 = 0xDFFF;
207        utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
208        utf8CharTemp = {3, {UTF8_3B_FIRST | static_cast<uint8_t>(0xDFFF >> 12),
209                            UTF8_3B_SECOND | (static_cast<uint8_t>(0xDFFF >> 6) & utf::MASK_6BIT),
210                            UTF8_3B_THIRD | (static_cast<uint8_t>(0xDFFF) & utf::MASK_6BIT)}};
211        EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
212        EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
213    }
214}
215
216HWTEST_F_L0(UtfHelperTest, ConvertUtf16ToUtf8_002)
217{
218    // codePoint lie in [0x800,0xD7FF]&&[0xE000,0xFFFF]-->UTF-8(length:3)
219    uint16_t utf16Data0 = 0x800;
220    uint16_t utf16Data1 = 0x00;
221    Utf8Char utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
222    Utf8Char utf8CharTemp = {3, {UTF8_3B_FIRST | static_cast<uint8_t>(0x800 >> 12),
223                                 UTF8_3B_SECOND | (static_cast<uint8_t>(0x800 >> 6) & utf::MASK_6BIT),
224                                 UTF8_3B_THIRD | (static_cast<uint8_t>(0x800) & utf::MASK_6BIT)}};
225    EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
226    EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
227    utf16Data0 = 0xD7FF;
228    utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
229    utf8CharTemp = {3, {UTF8_3B_FIRST | static_cast<uint8_t>(0xD7FF>>12),
230                        UTF8_3B_SECOND | (static_cast<uint8_t>(0xD7FF >> 6) & utf::MASK_6BIT),
231                        UTF8_3B_THIRD | (static_cast<uint8_t>(0xD7FF) & utf::MASK_6BIT)}};
232    EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
233    EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
234    utf16Data0 = 0xE000;
235    utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
236    utf8CharTemp = {3, {UTF8_3B_FIRST | static_cast<uint8_t>(0xE000 >> 12),
237                        UTF8_3B_SECOND | (static_cast<uint8_t>(0xE000 >> 6)& utf::MASK_6BIT),
238                        UTF8_3B_THIRD | (static_cast<uint8_t>(0xE000) & utf::MASK_6BIT)}};
239    EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
240    EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
241    utf16Data0 = 0xFFFF;
242    utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
243    utf8CharTemp = {3, {UTF8_3B_FIRST | static_cast<uint8_t>(0xFFFF >> 12),
244                        UTF8_3B_SECOND | (static_cast<uint8_t>(0xFFFF >> 6)& utf::MASK_6BIT),
245                        UTF8_3B_THIRD | (static_cast<uint8_t>(0xFFFF) & utf::MASK_6BIT)}};
246    EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
247    EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
248}
249
250HWTEST_F_L0(UtfHelperTest, ConvertUtf16ToUtf8_003)
251{
252    // codePoint lie in [0x10000,0x10FFFF] --> UTF-8(length:4)
253    {
254        uint16_t utf16Data0 = 0xD800;
255        uint16_t utf16Data1 = 0xDC00;
256        Utf8Char utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
257        uint32_t codePoint = CombineTwoU16(utf16Data0, utf16Data1);
258        Utf8Char utf8CharTemp = {4, {static_cast<uint8_t>((codePoint >> 18) | UTF8_4B_FIRST),
259                            static_cast<uint8_t>(((codePoint >> 12) & utf::MASK_6BIT) | utf::MASK1),
260                            static_cast<uint8_t>(((codePoint >> 6) & utf::MASK_6BIT) | utf::MASK1),
261                            static_cast<uint8_t>((codePoint & utf::MASK_6BIT) | utf::MASK1)}};
262        EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
263        EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
264    }
265
266    // 0xD950 0xDF21 --> 0x64321 --> 0xf1 0xa4 0x8c 0xa1
267    {
268        uint16_t utf16Data0 = 0xD950;
269        uint16_t utf16Data1 = 0xDF21;
270        Utf8Char utf8Char = ConvertUtf16ToUtf8(utf16Data0, utf16Data1, false);
271        uint32_t codePoint = CombineTwoU16(utf16Data0, utf16Data1);
272        Utf8Char utf8CharTemp = {4, {static_cast<uint8_t>((codePoint >> 18) | UTF8_4B_FIRST),
273                            static_cast<uint8_t>(((codePoint >> 12)& utf::MASK_6BIT)| utf::MASK1),
274                            static_cast<uint8_t>(((codePoint >> 6)& utf::MASK_6BIT) | utf::MASK1),
275        static_cast<uint8_t>((codePoint & utf::MASK_6BIT) | utf::MASK1)}};
276        EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
277        EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
278        utf8CharTemp = {4, {0xf1, 0xa4, 0x8c, 0xa1}};
279        EXPECT_EQ(utf8Char.n, utf8CharTemp.n);
280        EXPECT_EQ(utf8Char.ch, utf8CharTemp.ch);
281    }
282}
283
284/*
285* @tc.name: Utf16ToUtf8Size
286* @tc.desc: Enter a string of UTF16 coded sequences and return the length of the sequence converted into UTF8 coded
287*           sequences. "length" indicates the length of the input UTF16 sequence, and "modify" indicates whether
288*           to perform special conversion for.
289* @tc.type: FUNC
290*/
291HWTEST_F_L0(UtfHelperTest, Utf16ToUtf8Size_001)
292{
293    // when utf16 data length is only 1 and code in 0xd800-0xdfff, means that is a single code point, it needs to be
294    // represented by three UTF8 code.
295    uint32_t length = 0;
296    uint16_t utf16Value1[1] = {0xD800};
297    const uint16_t *utf16ValuePtr1 = utf16Value1;
298    length = Utf16ToUtf8Size(utf16ValuePtr1, 1, false);
299    EXPECT_EQ(length - 1, UtfLength::THREE);
300    length = 1;
301    uint16_t utf16Value2[1] = {0xDFFF};
302    const uint16_t *utf16ValuePtr2 = utf16Value2;
303    length = Utf16ToUtf8Size(utf16ValuePtr2, 1, false);
304    EXPECT_EQ(length - 1, UtfLength::THREE);
305
306    // special case for U+0000 => c0 80
307    uint16_t utf16Value3[1] = {0x00};
308    const uint16_t *utf16ValuePtr3 = utf16Value3;
309    length = Utf16ToUtf8Size(utf16ValuePtr3, 1, false);
310    EXPECT_EQ(length - 1, 0U);
311    length = Utf16ToUtf8Size(utf16ValuePtr3, 1, true);
312    EXPECT_EQ(length - 1, 2U);
313
314    // if isGetBufferSize is true, special case for U+0000 => 00
315    uint16_t utf16Value12[1] = {0x00};
316    const uint16_t *utf16ValuePtr12 = utf16Value12;
317    length = Utf16ToUtf8Size(utf16ValuePtr12, 1, false, true);
318    EXPECT_EQ(length - 1, 1U);
319    length = Utf16ToUtf8Size(utf16ValuePtr12, 1, true, true);
320    EXPECT_EQ(length - 1, 1U);
321
322    // code point lie in [0x00, 0x7F], it needs to be represented by one UTF8 code.
323    uint16_t utf16Value4[1] = {0x00};
324    uint16_t utf16Value5[1] = {0x7F};
325    const uint16_t *utf16ValuePtr4 = utf16Value4;
326    const uint16_t *utf16ValuePtr5 = utf16Value5;
327    length = Utf16ToUtf8Size(utf16ValuePtr4, 1, false);
328    EXPECT_EQ(length - 1, 0U);
329    length = Utf16ToUtf8Size(utf16ValuePtr5, 1, false);
330    EXPECT_EQ(length - 1, 1U);
331
332    // code point lie in [0x80, 0x7FF], it needs to be represented by two UTF8 code,
333    uint16_t utf16Value6[1] = {0x80};
334    uint16_t utf16Value7[1] = {0x7FF};
335    const uint16_t *utf16ValuePtr6 = utf16Value6;
336    const uint16_t *utf16ValuePtr7 = utf16Value7;
337    length = Utf16ToUtf8Size(utf16ValuePtr6, 1, false);
338    EXPECT_EQ(length - 1, 2U);
339    length = Utf16ToUtf8Size(utf16ValuePtr7, 1, false);
340    EXPECT_EQ(length - 1, 2U);
341    // code point lie in [0x800, 0xD7FF] or [0xDCoo, 0xFFFF], it needs to be represented by three UTF8 code.
342    uint16_t utf16Value8[1] = {0x800};
343    uint16_t utf16Value9[1] = {0xD7FF};
344    uint16_t utf16Value10[1] = {0xDC00};
345    uint16_t utf16Value11[1] = {0xFFFF};
346    const uint16_t *utf16ValuePtr8 = utf16Value8;
347    const uint16_t *utf16ValuePtr9 = utf16Value9;
348    const uint16_t *utf16ValuePtr10 = utf16Value10;
349    const uint16_t *utf16ValuePtr11 = utf16Value11;
350    length = Utf16ToUtf8Size(utf16ValuePtr8, 1, false);
351    EXPECT_EQ(length - 1, 3U);
352    length = Utf16ToUtf8Size(utf16ValuePtr9, 1, false);
353    EXPECT_EQ(length - 1, 3U);
354    length = Utf16ToUtf8Size(utf16ValuePtr10, 1, false);
355    EXPECT_EQ(length-1, 3U);
356    length = Utf16ToUtf8Size(utf16ValuePtr11, 1, false);
357    EXPECT_EQ(length - 1, 3U);
358}
359
360HWTEST_F_L0(UtfHelperTest, Utf16ToUtf8Size_002)
361{
362    // The trail value is valid, located in [0xDc00, 0xDFFF].It needs to be represented by four UTF8 code.
363    uint16_t utf16Value12[2] = {0xD800, 0xDc00};
364    uint16_t utf16Value13[2] = {0xD800, 0xDFFF};
365    uint16_t utf16Value14[2] = {0xDBFF, 0xDC00};
366    uint16_t utf16Value15[2] = {0xDBFF, 0xDFFF};
367    const uint16_t *utf16ValuePtr12 = utf16Value12;
368    const uint16_t *utf16ValuePtr13 = utf16Value13;
369    const uint16_t *utf16ValuePtr14 = utf16Value14;
370    const uint16_t *utf16ValuePtr15 = utf16Value15;
371    uint32_t length = Utf16ToUtf8Size(utf16ValuePtr12, 2, false);
372    EXPECT_EQ(length - 1, 4U);
373    length = Utf16ToUtf8Size(utf16ValuePtr13, 2, false);
374    EXPECT_EQ(length- 1, 4U);
375    length = Utf16ToUtf8Size(utf16ValuePtr14, 2, false);
376    EXPECT_EQ(length - 1, 4U);
377    length = Utf16ToUtf8Size(utf16ValuePtr15, 2, false);
378    EXPECT_EQ(length - 1, 4U);
379
380    // The trail value of Bad sequence is invalid, not located in [0xDC00, 0xDFFF].
381    // Need to return 6 bytes length
382    uint16_t utf16Value16[2] = {0xD800, 0xDBFF};
383    uint16_t utf16Value17[2] = {0xDC00, 0xDFFF};
384    const uint16_t *utf16ValuePtr16 = utf16Value16;
385    const uint16_t *utf16ValuePtr17 = utf16Value17;
386    length = Utf16ToUtf8Size(utf16ValuePtr16, 2, false);
387    EXPECT_EQ(length- 1, 6U);
388    length = Utf16ToUtf8Size(utf16ValuePtr17, 2, false);
389    EXPECT_EQ(length-1, 6U);
390
391    // 0(or 2)+ 1+ 2 + 3 + 4 = 10(or 12)
392    uint16_t utf16Value18[6] = {0x00, 0x7F, 0x80, 0x800, 0xD800, 0xDC00};
393    const uint16_t *utf16ValuePtr18 = utf16Value18;
394    length = Utf16ToUtf8Size(utf16ValuePtr18, 6, false);
395    EXPECT_EQ(length - 1, 10U);
396    length = Utf16ToUtf8Size(utf16ValuePtr18, 6, true);
397    EXPECT_EQ(length - 1, 12U);
398}
399
400/*
401* @tc.name: ConvertUtf8ToUtf16Pair
402* @tc.desc: Converts a UTF8 encoding sequence encoding a character into a UTF16 encoding sequence, and returns the
403*           sequence and the byte length of the UTF16 encoding sequence. The parameter "combine" identifies whether
404*           to return a pr0xy pair of Unicode values in the secondary plane, or the Unicode value itself.
405* @tc.type: FUNC
406*/
407HWTEST_F_L0(UtfHelperTest, ConvertUtf8ToUtf16Pair)
408{
409    // code point lie in [0x00, 0x7F], the length of utf8 code element byte is 1
410    uint8_t utf8Value1[1] = {0x00};
411    uint8_t utf8Value2[1] = {UTF8_1B_MAX};
412    const uint8_t *utf8ValuePtr1 = utf8Value1;
413    const uint8_t *utf8ValuePtr2 = utf8Value2;
414    std::pair<uint32_t, size_t> utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr1);
415    std::pair<uint32_t, size_t> utf16Value = {utf8Value1[0], 1};
416    EXPECT_EQ(utf16Res, utf16Value);
417    utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr2);
418    utf16Value = {utf8Value2[0], 1};
419    EXPECT_EQ(utf16Res, utf16Value);
420    // code point lie in [0x80, 0x7FF], the length of utf8 code element byte is 2
421    uint8_t utf8Value3[2] = {0xc2, 0x80}; // 0x80
422    uint8_t utf8Value4[2] = {0xDF, 0xBF}; // 0x7FF
423    const uint8_t *utf8ValuePtr3 = utf8Value3;
424    const uint8_t *utf8ValuePtr4 = utf8Value4;
425    utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr3);
426    utf16Value = {0x80, 2};
427    EXPECT_EQ(utf16Res, utf16Value);
428    utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr4);
429    utf16Value = {0x7FF, 2};
430    EXPECT_EQ(utf16Res, utf16Value);
431
432    // code point lie in [0x800, 0xD7FF] or [0xDC00,0xFFFF], the length of utf8 code element byte is 3.
433    // when code point lie in [0xD800, 0xDBFF], due to the use of UCS-2, it corresponds to 3 utf8 symbols.
434    uint8_t utf8Value5[3] = {0xE0, 0xA0, 0x80}; // 0x800
435    uint8_t utf8Value6[3] = {0xEF, 0xBF, 0xBF}; // 0xFFFF
436    const uint8_t *utf8ValuePtr5 = utf8Value5;
437    const uint8_t *utf8ValuePtr6 = utf8Value6;
438    utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr5);
439    utf16Value = {0x800, 3};
440    EXPECT_EQ(utf16Res, utf16Value);
441    utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr6);
442    utf16Value = {0xFFFF, 3};
443    EXPECT_EQ(utf16Res, utf16Value);
444    // code point lie in [0x10000, 0x10FFFF], the length of utf8 code element byte is 4.
445    uint8_t utf8Value9[4] = {0xF0, 0x90, 0x80, 0x80}; // 0x10000
446    uint8_t utf8Value10[4] = {0xF4, 0x8F, 0xBF, 0xBF}; // 0x10FFFF
447    const uint8_t *utf8ValuePtr9 = utf8Value9;
448    const uint8_t *utf8ValuePtr10 = utf8Value10;
449    utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr9);
450    utf16Value = {0xD800 << 16 | 0xDC00U, 4};
451    EXPECT_EQ(utf16Res, utf16Value);
452    utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr10);
453    utf16Value = {0xDBFF << 16 | 0xDFFF, 4};
454    EXPECT_EQ(utf16Res, utf16Value);
455    utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr9, true);
456    utf16Value = {0x10000, 4};
457    EXPECT_EQ(utf16Res, utf16Value);
458    utf16Res = ConvertUtf8ToUtf16Pair(utf8ValuePtr10, true);
459    utf16Value = {0x10FFFF, 4};
460    EXPECT_EQ(utf16Res, utf16Value);
461}
462
463/*
464* @tc.name: Utf8ToUtf16Size
465* @tc.desc: Enter a string of UTF8 coded sequences and return the length of the sequence converted into UTF16 coded
466*           sequences.
467* @tc.type: FUNC
468*/
469HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size)
470{
471    // when code point lie in (0x00, 0xFFFF], the required utf16 code element length is 1.
472    uint8_t utf8ValueOneByteMin[1] = {0x00};
473    uint8_t utf8ValueOneByteMax[4] = {0xEF, 0xBF, 0xBF, 0x00}; // 0xFFFF
474    const uint8_t *utf8ValueOneByteMinPtr = utf8ValueOneByteMin;
475    const uint8_t *utf8ValueOneByteMaxPtr = utf8ValueOneByteMax;
476    size_t length = Utf8ToUtf16Size(utf8ValueOneByteMinPtr, sizeof(utf8ValueOneByteMin));
477    EXPECT_EQ(length, 1U);
478    length = Utf8ToUtf16Size(utf8ValueOneByteMaxPtr, sizeof(utf8ValueOneByteMax));
479    EXPECT_EQ(length, 2U);
480    // when code point lie in [0x10000, 0x10FFFF], the required utf16 code element length is 2.
481    const uint8_t utf8ValueTwoBytesMin[5] = {0xF0, 0x90, 0x80, 0x80, 0x00}; // 0x10000
482    const uint8_t utf8ValueTwoBytesMax[5] = {0xF4, 0x8F, 0xBF, 0xBF, 0x00}; // 0x10FFFF
483    const uint8_t *utf8ValueTwoBytesMinPtr = utf8ValueTwoBytesMin;
484    const uint8_t *utf8ValueTwoBytesMaxPtr = utf8ValueTwoBytesMax;
485    length = Utf8ToUtf16Size(utf8ValueTwoBytesMinPtr, sizeof(utf8ValueTwoBytesMin));
486    EXPECT_EQ(length, 3U);
487    length = Utf8ToUtf16Size(utf8ValueTwoBytesMaxPtr, sizeof(utf8ValueTwoBytesMax));
488    EXPECT_EQ(length, 3U);
489    uint8_t utf8Value[12] = {
490        0xEF, 0xBF, 0xBF, 0xF0,
491        0x90, 0x80, 0x80, 0xF4,
492        0x8F, 0xBF, 0xBF, 0x00};
493    const uint8_t *utf8ValuePtr = utf8Value;
494    length = Utf8ToUtf16Size(utf8ValuePtr, sizeof(utf8Value));
495    EXPECT_EQ(length, 6U);
496}
497
498static void ConvertRegionUtf16ToUtf8Test(bool isDebugger = false)
499{
500    size_t utf16Len = 8;
501    size_t utf8Len = 100;
502    size_t start = 0;
503    size_t utf8Pos = 0;
504    bool modify = false;
505    uint16_t utf16Value[8] = {
506        0x00, // 0 or 2 (special case for \u0000 ==> C080 - 1100'0000 1000'0000)
507        0x7F, // 1(0x00, 0x7F]
508        0x7FF, // 2 [0x80, 0x7FF]
509        0x800, // 3 [0x800, 0xD7FF]
510        0xD800, // 3 [0xD800, 0xDFFF]
511        0xFFFF, // 3 [0xE000, 0xFFFF]
512        0xD800, 0xDFFF}; // 4 [0x10000, 0x10FFFF]
513    const uint16_t *utf16ValuePtr = utf16Value;
514    uint8_t *utf8Out = (uint8_t*)malloc(utf8Len);
515    if (isDebugger) {
516        utf8Pos = DebuggerConvertRegionUtf16ToUtf8(utf16ValuePtr, utf8Out, utf16Len, utf8Len, start, modify);
517    } else {
518        utf8Pos = ConvertRegionUtf16ToUtf8(utf16ValuePtr, utf8Out, utf16Len, utf8Len, start, modify);
519    }
520    // 0 + 1 + 2 +(3 *3)+ 4= 16
521    EXPECT_EQ(utf8Pos, 16U);
522    // 2 + 1 + 2 +(3 * 3)+ 4 = 18
523    modify = true;
524    if (isDebugger) {
525        utf8Pos = DebuggerConvertRegionUtf16ToUtf8(utf16ValuePtr, utf8Out, utf16Len, utf8Len, start, modify);
526    } else {
527        utf8Pos = ConvertRegionUtf16ToUtf8(utf16ValuePtr, utf8Out, utf16Len, utf8Len, start, modify);
528    }
529    EXPECT_EQ(utf8Pos, 18U);
530    free(utf8Out);
531}
532
533/*
534* @tc.name: ConvertRegionUtf16ToUtf8
535* @tc.desc: Input aUTF16-encoded sequence (thelength is "utf16Len"), convert part of the sequence into a UTF8-encoded
536*           sequence, and save it to "utf8Out"(the maximum length is "utf8Len"). The start parameter indicates the
537*           start position of the conversion. Whether to perform special processing for O in the "modify" parameter.
538* @tc.type: FUNC
539*/
540HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf16ToUtf8)
541{
542    ConvertRegionUtf16ToUtf8Test();
543}
544
545HWTEST_F_L0(UtfHelperTest, DebuggerConvertRegionUtf16ToUtf8)
546{
547    ConvertRegionUtf16ToUtf8Test(true);
548}
549
550/*
551* @tc.name: ConvertRegionUtf8ToUtf16
552* @tc.desc: Input a UTF8-encoded sequence, convert part of the sequence into a UTF8-encoded sequence, and save it to
553*           "utf16Out"(the maximum length is "utf16Len"), The start parameter indicates the start position of the
554*           conversion.
555* @tc.type: FUNC
556*/
557HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16)
558{
559    size_t utf16Len = 100;
560    uint8_t utf8Value[10] = {
561        0x7F, // 1-length UTF16 encoding
562        0xDF, 0xBF, // 1-length UTF16 encoding
563        0xEF, 0xBF, 0xBF, // 1-length UTF16 encoding
564        0xF4, 0x8F, 0xBF, 0xBF}; // 2-length UTF16 encoding
565    const uint8_t *utf8ValuePtr = utf8Value;
566    uint16_t *utf16Out = (uint16_t*)malloc(utf16Len);
567    size_t outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value), utf16Len);
568    // 1 + 1 + 1 + 2 = 5s
569    EXPECT_EQ(outPos, 5U);
570    // 1 + 2 = 3
571    utf8ValuePtr = utf8Value + 3;
572    outPos = ConvertRegionUtf8ToUtf16(utf8ValuePtr, utf16Out, sizeof(utf8Value) - 3, utf16Len);
573    EXPECT_EQ(outPos, 3U);
574}
575
576/*
577* @tc.name: ConvertUtf8ToUnicodeChar
578* @tc.desc: Converts a UTF8 encoding sequence encoding a character into a unicode point, and returns the
579*           unicode point and the byte length of the utf8 encoding sequence.
580* @tc.type: FUNC
581*/
582HWTEST_F_L0(UtfHelperTest, ConvertUtf8ToUnicodeChar)
583{
584    std::pair<int32_t, size_t> invalidValue = {INVALID_UTF8, 0};
585    // utf-8 is one byte, code point lie in [0x00, 0x7F]
586    uint8_t utf8Value1[1] = {0x00}; // 0x00
587    uint8_t utf8Value2[1] = {0x7F}; // 0x7F
588    const uint8_t *utf8ValuePtr1 = utf8Value1;
589    const uint8_t *utf8ValuePtr2 = utf8Value2;
590    std::pair<int32_t, size_t> unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr1, UtfLength::ONE);
591    std::pair<int32_t, size_t> unicodeValue = {0x00, 1};
592    EXPECT_EQ(unicodeRes, unicodeValue);
593    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr2, UtfLength::ONE);
594    unicodeValue = {0x7F, 1};
595    EXPECT_EQ(unicodeRes, unicodeValue);
596    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr2, 0);
597    EXPECT_EQ(unicodeRes, invalidValue);
598
599    // utf-8 is two bytes, code point lie in [0x80, 0x7FF]
600    uint8_t utf8Value3[2] = {0xC2, 0x80}; // 0x80
601    uint8_t utf8Value4[2] = {0xDF, 0xBF}; // 0x7FF
602    const uint8_t *utf8ValuePtr3 = utf8Value3;
603    const uint8_t *utf8ValuePtr4 = utf8Value4;
604    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr3, UtfLength::TWO);
605    unicodeValue = {0x80, 2};
606    EXPECT_EQ(unicodeRes, unicodeValue);
607    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr4, UtfLength::TWO);
608    unicodeValue = {0x7FF, 2};
609    EXPECT_EQ(unicodeRes, unicodeValue);
610    uint8_t utf8Value5[2] = {0xD0, 0x00}; // invalid
611    const uint8_t *utf8ValuePtr5 = utf8Value5;
612    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr5, UtfLength::TWO);
613    EXPECT_EQ(unicodeRes, invalidValue);
614    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr4, UtfLength::ONE);
615    EXPECT_EQ(unicodeRes, invalidValue);
616
617    // utf-8 is three bytes, code point lie in [0x800, 0xFFFF]
618    uint8_t utf8Value6[3] = {0xE0, 0xA0, 0x80}; // 0x800
619    uint8_t utf8Value7[3] = {0xED, 0x9F, 0xBF}; // 0xD7FF
620    const uint8_t *utf8ValuePtr6 = utf8Value6;
621    const uint8_t *utf8ValuePtr7 = utf8Value7;
622    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr6, UtfLength::THREE);
623    unicodeValue = {0x800, 3};
624    EXPECT_EQ(unicodeRes, unicodeValue);
625    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr7, UtfLength::THREE);
626    unicodeValue = {0xD7FF, 3};
627    EXPECT_EQ(unicodeRes, unicodeValue);
628    uint8_t utf8Value8[3] = {0xEB, 0x80, 0x40}; // invalid
629    const uint8_t *utf8ValuePtr8 = utf8Value8;
630    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr8, UtfLength::THREE);
631    EXPECT_EQ(unicodeRes, invalidValue);
632    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr7, UtfLength::TWO);
633    EXPECT_EQ(unicodeRes, invalidValue);
634
635    // utf-8 is four bytes, code point lie in [0x10000, 0x10FFFF].
636    uint8_t utf8Value9[4] = {0xF0, 0x90, 0x80, 0x80}; // 0x10000
637    uint8_t utf8Value10[4] = {0xF4, 0x8F, 0xBF, 0xBF}; // 0x10FFFF
638    const uint8_t *utf8ValuePtr9 = utf8Value9;
639    const uint8_t *utf8ValuePtr10 = utf8Value10;
640    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr9, UtfLength::FOUR);
641    unicodeValue = {0x10000, 4};
642    EXPECT_EQ(unicodeRes, unicodeValue);
643    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr10, UtfLength::FOUR);
644    unicodeValue = {0x10FFFF, 4};
645    EXPECT_EQ(unicodeRes, unicodeValue);
646    uint8_t utf8Value11[4] = {0xF4, 0x80, 0x80, 0x40}; // invalid
647    const uint8_t *utf8ValuePtr11 = utf8Value11;
648    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr11, UtfLength::FOUR);
649    EXPECT_EQ(unicodeRes, invalidValue);
650    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr10, UtfLength::THREE);
651    EXPECT_EQ(unicodeRes, invalidValue);
652
653    // other exception
654    uint8_t utf8Value12[2] = {0x90, 0x00}; // invalid
655    const uint8_t *utf8ValuePtr12 = utf8Value12;
656    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr12, UtfLength::FOUR);
657    EXPECT_EQ(unicodeRes, invalidValue);
658    uint8_t utf8Value13[2] = {0xF8, 0x00}; // invalid
659    const uint8_t *utf8ValuePtr13 = utf8Value13;
660    unicodeRes = ConvertUtf8ToUnicodeChar(utf8ValuePtr13, UtfLength::FOUR);
661    EXPECT_EQ(unicodeRes, invalidValue);
662}
663
664/*
665* @tc.name: Utf8ToUtf16Size
666* @tc.desc: Test single byte characters
667* @tc.type: FUNC
668*/
669HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size_001) {
670    std::string utf8 = "Hello";
671    std::vector<uint16_t> expected_utf16 = {0x0, 0x0, 0x0, 0x0, 0x0}; // "Hello"
672    std::vector<uint16_t> utf16(10);
673    size_t converted = Utf8ToUtf16Size(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.size());
674    utf16.resize(converted);
675    EXPECT_EQ(utf16, expected_utf16);
676}
677
678/*
679* @tc.name: Utf8ToUtf16Size
680* @tc.desc: Test includes Chinese characters
681* @tc.type: FUNC
682*/
683HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size_002) {
684    std::string utf8 = "你好,世界!";
685    std::vector<uint16_t> expected_utf16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; // "你好,世界!"
686    std::vector<uint16_t> utf16(10);
687    size_t converted = Utf8ToUtf16Size(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.size());
688    utf16.resize(converted);
689    EXPECT_EQ(utf16, expected_utf16);
690}
691
692/*
693* @tc.name: Utf8ToUtf16Size
694* @tc.desc: Test empty string
695* @tc.type: FUNC
696*/
697HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size_003) {
698    std::string utf8 = "";
699    std::vector<uint16_t> expected_utf16 = {}; // empty
700    std::vector<uint16_t> utf16(10);
701    size_t converted = Utf8ToUtf16Size(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.size());
702    utf16.resize(converted);
703    EXPECT_EQ(utf16, expected_utf16);
704}
705
706/*
707* @tc.name: Utf8ToUtf16Size
708* @tc.desc: Test section conversion
709* @tc.type: FUNC
710*/
711HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size_004) {
712    std::string utf8 = "Hello, 你好";
713    std::vector<uint16_t> expected_utf16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; // "Hello, 你"
714    std::vector<uint16_t> utf16(10);
715    size_t converted = Utf8ToUtf16Size(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.size());
716    utf16.resize(converted);
717    EXPECT_EQ(utf16, expected_utf16);
718}
719
720/*
721* @tc.name: Utf8ToUtf16Size
722* @tc.desc: Test buffer length limit
723* @tc.type: FUNC
724*/
725HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size_005) {
726    std::string utf8 = "你好,世界!";
727    std::vector<uint16_t> expected_utf16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; // "你好"
728    std::vector<uint16_t> utf16(2); // Limit buffer length
729    size_t converted = Utf8ToUtf16Size(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.size());
730    utf16.resize(converted);
731    EXPECT_EQ(utf16, expected_utf16);
732}
733
734/*
735* @tc.name: Utf8ToUtf16Size
736* @tc.desc: Test for incorrect UTF-8 data
737* @tc.type: FUNC
738*/
739HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size_006) {
740    std::string utf8 = "\xF0\x28\x8C\x28";
741    std::vector<uint16_t> expected_utf16 = {0x0, 0x0};
742    std::vector<uint16_t> utf16(10);
743    size_t converted = Utf8ToUtf16Size(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.size());
744    utf16.resize(converted);
745    EXPECT_EQ(utf16, expected_utf16);
746}
747
748/*
749* @tc.name: Utf8ToUtf16Size
750* @tc.desc: Test single byte UTF-8 characters
751* @tc.type: FUNC
752*/
753HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size_007) {
754    std::string utf8 = "ABC"; // All are single byte characters
755    std::vector<uint16_t> expected_utf16 = {0x0, 0x0, 0x0}; // ASCII characters: A, B, C
756    std::vector<uint16_t> utf16(10);
757    size_t converted = Utf8ToUtf16Size(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.size());
758    utf16.resize(converted);
759    EXPECT_EQ(utf16, expected_utf16);
760}
761
762/*
763* @tc.name: Utf8ToUtf16Size
764* @tc.desc: Testing Double Byte UTF-8 Characters
765* @tc.type: FUNC
766*/
767HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size_008) {
768    std::string utf8 = "\xC2\xA2\xC3\xBC"; // They are ¢ and ü, respectively
769    std::vector<uint16_t> expected_utf16 = {0x0, 0x0}; // Unicode .
770    std::vector<uint16_t> utf16(10);
771    size_t converted = Utf8ToUtf16Size(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.size());
772    utf16.resize(converted);
773    EXPECT_EQ(utf16, expected_utf16);
774}
775
776/*
777* @tc.name: Utf8ToUtf16Size
778* @tc.desc: Test three byte UTF-8 characters
779* @tc.type: FUNC
780*/
781HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size_009) {
782    std::string utf8 = "\xE2\x82\xAC"; // euro: €
783    std::vector<uint16_t> expected_utf16 = {0x0}; // Unicode .
784    std::vector<uint16_t> utf16(10);
785    size_t converted = Utf8ToUtf16Size(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.size());
786    utf16.resize(converted);
787    EXPECT_EQ(utf16, expected_utf16);
788}
789
790/*
791* @tc.name: Utf8ToUtf16Size
792* @tc.desc: Test four byte UTF-8 characters and proxy pairs
793* @tc.type: FUNC
794*/
795HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size_010) {
796    std::string utf8 = "\xF0\x9F\x98\x8E"; // Emoji �
797    std::vector<uint16_t> expected_utf16 = {0x0, 0x0}; // surrogates
798    std::vector<uint16_t> utf16(10);
799    size_t converted = Utf8ToUtf16Size(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.size());
800    utf16.resize(converted);
801    EXPECT_EQ(utf16, expected_utf16);
802}
803
804/*
805* @tc.name: Utf8ToUtf16Size
806* @tc.desc: Test UTF-8 data containing zero bytes
807* @tc.type: FUNC
808*/
809HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size_011) {
810    std::string utf8 = "Hello\0World", utf8Nul = utf8 + '\0' + "World"; // Clearly including zero bytes
811    std::vector<uint16_t> expected_utf16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
812    std::vector<uint16_t> utf16(15);
813    size_t converted = Utf8ToUtf16Size(reinterpret_cast<const uint8_t*>(utf8Nul.data()), utf8Nul.size());
814    utf16.resize(converted);
815    EXPECT_EQ(utf16, expected_utf16);
816}
817
818/*
819* @tc.name: Utf8ToUtf16Size
820* @tc.desc: Test continuous illegal sequences
821* @tc.type: FUNC
822*/
823HWTEST_F_L0(UtfHelperTest, Utf8ToUtf16Size_012) {
824    std::string utf8 = "\xC0\x80\xC0\x80"; // Continuous illegal sequence
825    std::vector<uint16_t> expected_utf16 = {0x0, 0x0};
826    std::vector<uint16_t> utf16(10);
827    size_t converted = Utf8ToUtf16Size(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.size());
828    utf16.resize(converted);
829    EXPECT_EQ(utf16, expected_utf16);
830}
831
832/*
833* @tc.name: ConvertRegionUtf8ToUtf16
834* @tc.desc: Test single byte characters
835* @tc.type: FUNC
836*/
837HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_001) {
838    std::string utf8 = "Hello";
839    std::vector<uint16_t> expected_utf16 = {0x0048, 0x0065, 0x006C, 0x006C, 0x006F}; // "Hello"
840    std::vector<uint16_t> utf16(10);
841    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
842                                                utf16.data(), utf8.size(), utf16.size());
843    utf16.resize(converted);
844    EXPECT_EQ(utf16, expected_utf16);
845}
846
847/*
848* @tc.name: ConvertRegionUtf8ToUtf16
849* @tc.desc: Test includes Chinese characters
850* @tc.type: FUNC
851*/
852HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_002) {
853    std::string utf8 = "你好,世界!";
854    std::vector<uint16_t> expected_utf16 = {0x4F60, 0x597D, 0xFF0C, 0x4E16, 0x754C, 0xFF01}; // "你好,世界!"
855    std::vector<uint16_t> utf16(10);
856    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
857                                                utf16.data(), utf8.size(), utf16.size());
858    utf16.resize(converted);
859    EXPECT_EQ(utf16, expected_utf16);
860}
861
862/*
863* @tc.name: ConvertRegionUtf8ToUtf16
864* @tc.desc: Test empty string
865* @tc.type: FUNC
866*/
867HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_003) {
868    std::string utf8 = "";
869    std::vector<uint16_t> expected_utf16 = {}; // Empty
870    std::vector<uint16_t> utf16(10);
871    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
872                                                utf16.data(), utf8.size(), utf16.size());
873    utf16.resize(converted);
874    EXPECT_EQ(utf16, expected_utf16);
875}
876
877/*
878* @tc.name: ConvertRegionUtf8ToUtf16
879* @tc.desc: Test section conversion
880* @tc.type: FUNC
881*/
882HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_004) {
883    std::string utf8 = "Hello, 你好";
884    std::vector<uint16_t> expected_utf16 = {0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002C, 0x20, 0x4F60};
885    std::vector<uint16_t> utf16(10);
886    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
887                                                utf16.data(), 10, utf16.size()); // Only process the first 9 bytes
888    utf16.resize(converted);
889    EXPECT_EQ(utf16, expected_utf16);
890}
891
892/*
893* @tc.name: ConvertRegionUtf8ToUtf16
894* @tc.desc: Test buffer length limit
895* @tc.type: FUNC
896*/
897HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_005) {
898    std::string utf8 = "你好,世界!";
899    std::vector<uint16_t> expected_utf16 = {0x4F60, 0x597D}; // "你好"
900    std::vector<uint16_t> utf16(2); // Limit buffer length
901    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
902                                                utf16.data(), utf8.size(), utf16.size());
903    utf16.resize(converted);
904    EXPECT_EQ(utf16, expected_utf16);
905}
906
907/*
908* @tc.name: ConvertRegionUtf8ToUtf16
909* @tc.desc: Test for incorrect UTF-8 data
910* @tc.type: FUNC
911*/
912HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_006) {
913    std::string utf8 = "\xF0\x28\x8C\x28";
914    std::vector<uint16_t> expected_utf16 = {}; // Expected empty output, handling erroneous data
915    std::vector<uint16_t> utf16(10);
916    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
917                                                utf16.data(), utf8.size(), utf16.size());
918    utf16.resize(converted);
919    EXPECT_NE(utf16, expected_utf16);
920}
921
922/*
923* @tc.name: ConvertRegionUtf8ToUtf16
924* @tc.desc: Test single byte UTF-8 characters
925* @tc.type: FUNC
926*/
927HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_007) {
928    std::string utf8 = "ABC"; // All are single byte characters
929    std::vector<uint16_t> expected_utf16 = {0x0041, 0x0042, 0x0043}; // ASCII characters: A, B, C
930    std::vector<uint16_t> utf16(10);
931    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
932                                                utf16.data(), utf8.size(), utf16.size());
933    utf16.resize(converted);
934    EXPECT_EQ(utf16, expected_utf16);
935}
936
937/*
938* @tc.name: ConvertRegionUtf8ToUtf16
939* @tc.desc: Testing Double Byte UTF-8 Characters
940* @tc.type: FUNC
941*/
942HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_008) {
943    std::string utf8 = "\xC2\xA2\xC3\xBC"; // They are ¢ and ü, respectively
944    std::vector<uint16_t> expected_utf16 = {0x00A2, 0x00FC}; // Unicode .
945    std::vector<uint16_t> utf16(10);
946    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
947                                                utf16.data(), utf8.size(), utf16.size());
948    utf16.resize(converted);
949    EXPECT_EQ(utf16, expected_utf16);
950}
951
952/*
953* @tc.name: ConvertRegionUtf8ToUtf16
954* @tc.desc: Test three byte UTF-8 characters
955* @tc.type: FUNC
956*/
957HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_009) {
958    std::string utf8 = "\xE2\x82\xAC"; // euro €
959    std::vector<uint16_t> expected_utf16 = {0x20AC}; // Unicode .
960    std::vector<uint16_t> utf16(10);
961    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
962                                                utf16.data(), utf8.size(), utf16.size());
963    utf16.resize(converted);
964    EXPECT_EQ(utf16, expected_utf16);
965}
966
967/*
968* @tc.name: ConvertRegionUtf8ToUtf16
969* @tc.desc: Test four byte UTF-8 characters and proxy pairs
970* @tc.type: FUNC
971*/
972HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_010) {
973    std::string utf8 = "\xF0\x9F\x98\x8E"; // Emoji �
974    std::vector<uint16_t> expected_utf16 = {0xD83D, 0xDE0E}; // surrogates
975    std::vector<uint16_t> utf16(10);
976    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
977                                                utf16.data(), utf8.size(), utf16.size());
978    utf16.resize(converted);
979    EXPECT_EQ(utf16, expected_utf16);
980}
981
982/*
983* @tc.name: ConvertRegionUtf8ToUtf16
984* @tc.desc: Test UTF-8 data containing zero bytes
985* @tc.type: FUNC
986*/
987HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_011) {
988    std::string utf8 = "Hello\0World", utf8Nul = utf8 + '\0' + "World"; // Clearly including zero bytes
989    std::vector<uint16_t> expected_utf16 = {0x0048, 0x0065, 0x006C, 0x006C, 0x006F,
990        0x0000, 0x0057, 0x006F, 0x0072, 0x006C, 0x0064}; // Including NULL characters
991    std::vector<uint16_t> utf16(15);
992    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8Nul.data()),
993                                                utf16.data(), utf8Nul.size(), utf16.size());
994    utf16.resize(converted);
995    EXPECT_EQ(utf16, expected_utf16);
996}
997
998/*
999* @tc.name: ConvertRegionUtf8ToUtf16
1000* @tc.desc: Test continuous illegal sequences
1001* @tc.type: FUNC
1002*/
1003HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_012) {
1004    std::string utf8 = "\xC0\x80\xC0\x80"; // Continuous illegal sequence
1005    std::vector<uint16_t> expected_utf16 = {};
1006    std::vector<uint16_t> utf16(10);
1007    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
1008                                                utf16.data(), utf8.size(), utf16.size());
1009    utf16.resize(converted);
1010    EXPECT_NE(utf16, expected_utf16);
1011}
1012
1013/*
1014* @tc.name: ConvertRegionUtf8ToUtf16
1015* @tc.desc: Test four byte UTF-8 characters and proxy pairs
1016* @tc.type: FUNC
1017*/
1018HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_013) {
1019    std::string utf8 = "\xF0\x9F\x98\x8E"; // Emoji �
1020    std::vector<uint16_t> expected_utf16 = {0xD83D, 0xDE0E}; // surrogates
1021    std::vector<uint16_t> utf16(0);
1022    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
1023                                                utf16.data(), utf8.size(), utf16.size());
1024    utf16.resize(converted);
1025    EXPECT_EQ(converted, 0);
1026}
1027/*
1028* @tc.name: ConvertRegionUtf8ToUtf16
1029* @tc.desc: Test four byte UTF-8 characters and proxy pairs
1030* @tc.type: FUNC
1031*/
1032HWTEST_F_L0(UtfHelperTest, ConvertRegionUtf8ToUtf16_014) {
1033    std::string utf8 = "\xF0\x9F\x98\x8E"; // Emoji �
1034    std::vector<uint16_t> expected_utf16 = {0xD83D, 0xDE0E}; // surrogates
1035    std::vector<uint16_t> utf16(1);
1036    size_t converted = ConvertRegionUtf8ToUtf16(reinterpret_cast<const uint8_t*>(utf8.data()),
1037                                                utf16.data(), utf8.size(), utf16.size());
1038    utf16.resize(converted);
1039    EXPECT_EQ(converted, 0);
1040}
1041} // namespace panda:test
1042