1/*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include "utils/utf.h"
17
18#include <cstddef>
19#include <cstdint>
20
21#include <vector>
22
23#include <gtest/gtest.h>
24
25namespace panda::utf::test {
26
27HWTEST(Utf, ConvertMUtf8ToUtf16_1, testing::ext::TestSize.Level0)
28{
29    // 2-byte mutf-8 U+0000
30    {
31        const std::vector<uint8_t> in {0xc0, 0x80, 0x00};
32        const std::vector<uint16_t> res {0x0};
33        std::vector<uint16_t> out(res.size());
34        ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
35        EXPECT_EQ(out, res);
36    }
37
38    // 1-byte mutf-8: 0xxxxxxx
39    {
40        const std::vector<uint8_t> in {0x7f, 0x00};
41        const std::vector<uint16_t> res {0x7f};
42        std::vector<uint16_t> out(res.size());
43        ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
44        EXPECT_EQ(out, res);
45    }
46
47    // 2-byte mutf-8: 110xxxxx 10xxxxxx
48    {
49        const std::vector<uint8_t> in {0xc2, 0xa7, 0x33, 0x00};
50        const std::vector<uint16_t> res {0xa7, 0x33};
51        std::vector<uint16_t> out(res.size());
52        ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
53        EXPECT_EQ(out, res);
54    }
55
56    // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx
57    {
58        const std::vector<uint8_t> in {0xef, 0xbf, 0x83, 0x33, 0x00};
59        const std::vector<uint16_t> res {0xffc3, 0x33};
60        std::vector<uint16_t> out(res.size());
61        ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
62        EXPECT_EQ(out, res);
63    }
64}
65
66// double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx
67HWTEST(Utf, ConvertMUtf8ToUtf16_2, testing::ext::TestSize.Level0)
68{
69    {
70        const std::vector<uint8_t> in {0xed, 0xa0, 0x81, 0xed, 0xb0, 0xb7, 0x00};
71        const std::vector<uint16_t> res {0xd801, 0xdc37};
72        std::vector<uint16_t> out(res.size());
73        ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
74        EXPECT_EQ(out, res);
75    }
76
77    {
78        const std::vector<uint8_t> in {0x5b, 0x61, 0x62, 0x63, 0xed, 0xa3, 0x92, 0x5d, 0x00};
79        const std::vector<uint16_t> res {0x5b, 0x61, 0x62, 0x63, 0xd8d2, 0x5d};
80        std::vector<uint16_t> out(res.size());
81        ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
82        EXPECT_EQ(out, res);
83    }
84
85    {
86        const std::vector<uint8_t> in {0xF0, 0x9F, 0x91, 0xB3, 0x00};
87        const std::vector<uint16_t> res {0xD83D, 0xDC73};
88        std::vector<uint16_t> out(res.size());
89        ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data());
90        EXPECT_EQ(out, res);
91    }
92}
93
94/**
95 * @tc.name: MUtf8ToUtf16Size
96 * @tc.desc: Verify the MUtf8ToUtf16Size function.
97 * @tc.type: FUNC
98 * @tc.require:
99 */
100HWTEST(Utf, MUtf8ToUtf16Size, testing::ext::TestSize.Level0)
101{
102    // 2-byte mutf-8 U+0000
103    {
104        const std::vector<uint8_t> in {0xc0, 0x80};
105        size_t res = MUtf8ToUtf16Size(in.data(), in.size());
106        EXPECT_EQ(res, 1U);
107    }
108
109    // 1-byte mutf-8: 0xxxxxxx
110    {
111        const std::vector<uint8_t> in {0x7f};
112        size_t res = MUtf8ToUtf16Size(in.data(), in.size());
113        EXPECT_EQ(res, 1U);
114    }
115
116    // 2-byte mutf-8: 110xxxxx 10xxxxxx
117    {
118        const std::vector<uint8_t> in {0xc2, 0xa7, 0x33};
119        size_t res = MUtf8ToUtf16Size(in.data(), in.size());
120        EXPECT_EQ(res, 2U);
121    }
122
123    // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx
124    {
125        const std::vector<uint8_t> in {0xef, 0xbf, 0x83, 0x33};
126        size_t res = MUtf8ToUtf16Size(in.data(), in.size());
127        EXPECT_EQ(res, 2U);
128    }
129
130    // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx
131    {
132        const std::vector<uint8_t> in {0xed, 0xa0, 0x81, 0xed, 0xb0, 0xb7};
133        size_t res = MUtf8ToUtf16Size(in.data(), in.size());
134        EXPECT_EQ(res, 2U);
135    }
136}
137
138HWTEST(Utf, Utf16ToMUtf8Size, testing::ext::TestSize.Level0)
139{
140    // 2-byte mutf-8 U+0000
141    {
142        const std::vector<uint16_t> in {0x0};
143        size_t res = Utf16ToMUtf8Size(in.data(), in.size());
144        EXPECT_EQ(res, 3U);
145    }
146
147    // 1-byte mutf-8: 0xxxxxxx
148    {
149        const std::vector<uint16_t> in {0x7f};
150        size_t res = Utf16ToMUtf8Size(in.data(), in.size());
151        EXPECT_EQ(res, 2U);
152    }
153
154    {
155        const std::vector<uint16_t> in {0x7f};
156        size_t res = Utf16ToMUtf8Size(in.data(), in.size());
157        EXPECT_EQ(res, 2U);
158    }
159
160    // 2-byte mutf-8: 110xxxxx 10xxxxxx
161    {
162        const std::vector<uint16_t> in {0xa7, 0x33};
163        size_t res = Utf16ToMUtf8Size(in.data(), in.size());
164        EXPECT_EQ(res, 4U);
165    }
166
167    // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx
168    {
169        const std::vector<uint16_t> in {0xffc3, 0x33};
170        size_t res = Utf16ToMUtf8Size(in.data(), in.size());
171        EXPECT_EQ(res, 5U);
172    }
173
174    // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx
175    {
176        const std::vector<uint16_t> in {0xd801, 0xdc37};
177        size_t res = Utf16ToMUtf8Size(in.data(), in.size());
178        EXPECT_EQ(res, 5U);
179    }
180}
181
182/**
183 * @tc.name: ConvertRegionMUtf8ToUtf16
184 * @tc.desc: Verify the ConvertRegionMUtf8ToUtf16 function.
185 * @tc.type: FUNC
186 * @tc.require:
187 */
188HWTEST(Utf, ConvertRegionMUtf8ToUtf16, testing::ext::TestSize.Level0)
189{
190    // 2-byte mutf-8 U+0000
191    {
192        const std::vector<uint8_t> in {0xc0, 0x80, 0x00};
193        const std::vector<uint16_t> res {0x0};
194        std::vector<uint16_t> out(res.size());
195        ConvertRegionMUtf8ToUtf16(in.data(), out.data(), utf::Mutf8Size(in.data()), res.size() * sizeof(uint16_t), 0);
196        EXPECT_EQ(out, res);
197    }
198
199    // 1-byte mutf-8: 0xxxxxxx
200    {
201        const std::vector<uint8_t> in {0x7f, 0x00};
202        const std::vector<uint16_t> res {0x7f};
203        std::vector<uint16_t> out(res.size());
204        ConvertRegionMUtf8ToUtf16(in.data(), out.data(), utf::Mutf8Size(in.data()), res.size() * sizeof(uint16_t), 0);
205        EXPECT_EQ(out, res);
206    }
207
208    // 2-byte mutf-8: 110xxxxx 10xxxxxx
209    {
210        const std::vector<uint8_t> in {0xc2, 0xa7, 0x33, 0x00};
211        const std::vector<uint16_t> res {0xa7, 0x33};
212        std::vector<uint16_t> out(res.size());
213        ConvertRegionMUtf8ToUtf16(in.data(), out.data(), utf::Mutf8Size(in.data()), res.size() * sizeof(uint16_t), 0);
214        EXPECT_EQ(out, res);
215    }
216
217    // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx
218    {
219        const std::vector<uint8_t> in {0xef, 0xbf, 0x83, 0x33, 0x00};
220        const std::vector<uint16_t> res {0xffc3, 0x33};
221        std::vector<uint16_t> out(res.size());
222        ConvertRegionMUtf8ToUtf16(in.data(), out.data(), utf::Mutf8Size(in.data()), res.size() * sizeof(uint16_t), 0);
223        EXPECT_EQ(out, res);
224    }
225
226    // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx
227    {
228        const std::vector<uint8_t> in {0xed, 0xa0, 0x81, 0xed, 0xb0, 0xb7, 0x00};
229        const std::vector<uint16_t> res1 {0xd801, 0xdc37};
230        const std::vector<uint16_t> res2 {0xdc37};
231        std::vector<uint16_t> out1(res1.size());
232        std::vector<uint16_t> out2(res2.size());
233        size_t in_len1 = utf::Mutf8Size(in.data());
234        size_t in_len2 = utf::Mutf8Size(in.data() + 3);
235        ConvertRegionMUtf8ToUtf16(in.data(), out1.data(), in_len1, res1.size() * sizeof(uint16_t), 0);
236        ConvertRegionMUtf8ToUtf16(in.data(), out2.data(), in_len1, res2.size() * sizeof(uint16_t), in_len1 - in_len2);
237        EXPECT_EQ(out1, res1);
238        EXPECT_EQ(out2, res2);
239    }
240}
241
242HWTEST(Utf, ConvertRegionUtf16ToMUtf8_1, testing::ext::TestSize.Level0)
243{
244    // 2-byte mutf-8 U+0000
245    {
246        const std::vector<uint16_t> in {0x0};
247        const std::vector<uint8_t> res {0xc0, 0x80, 0x00};
248        std::vector<uint8_t> out(res.size());
249        size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
250        EXPECT_EQ(sz, 2U);
251        out[out.size() - 1] = '\0';
252        EXPECT_EQ(out, res);
253    }
254
255    // 1-byte mutf-8: 0xxxxxxx
256    {
257        const std::vector<uint16_t> in {0x7f};
258        const std::vector<uint8_t> res {0x7f, 0x00};
259        std::vector<uint8_t> out(res.size());
260        size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
261        EXPECT_EQ(sz, 1U);
262        out[out.size() - 1] = '\0';
263        EXPECT_EQ(out, res);
264    }
265
266    // 2-byte mutf-8: 110xxxxx 10xxxxxx
267    {
268        const std::vector<uint16_t> in {0xa7, 0x33};
269        const std::vector<uint8_t> res {0xc2, 0xa7, 0x33, 0x00};
270        std::vector<uint8_t> out(res.size());
271        size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
272        EXPECT_EQ(sz, 3U);
273        out[out.size() - 1] = '\0';
274        EXPECT_EQ(out, res);
275    }
276
277    // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx
278    {
279        const std::vector<uint16_t> in {0xffc3, 0x33};
280        const std::vector<uint8_t> res {0xef, 0xbf, 0x83, 0x33, 0x00};
281        std::vector<uint8_t> out(res.size());
282        size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
283        EXPECT_EQ(sz, 4U);
284        out[out.size() - 1] = '\0';
285        EXPECT_EQ(out, res);
286    }
287}
288
289HWTEST(Utf, ConvertRegionUtf16ToMUtf8_2, testing::ext::TestSize.Level0)
290{
291    // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx
292    // utf-16 data in 0xd800-0xdfff
293    {
294        const std::vector<uint16_t> in {0xd834, 0x33};
295        const std::vector<uint8_t> res {0xed, 0xa0, 0xb4, 0x33, 0x00};
296        std::vector<uint8_t> out(res.size());
297        size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
298        EXPECT_EQ(sz, 4U);
299        out[out.size() - 1] = '\0';
300        EXPECT_EQ(out, res);
301    }
302
303    // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx
304    // utf-16 data in 0xd800-0xdfff
305    {
306        const std::vector<uint16_t> in {0xdf06, 0x33};
307        const std::vector<uint8_t> res {0xed, 0xbc, 0x86, 0x33, 0x00};
308        std::vector<uint8_t> out(res.size());
309        size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
310        EXPECT_EQ(sz, 4U);
311        out[out.size() - 1] = '\0';
312        EXPECT_EQ(out, res);
313    }
314
315    // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx
316    {
317        const std::vector<uint16_t> in {0xd801, 0xdc37};
318        const std::vector<uint8_t> res {0xf0, 0x90, 0x90, 0xb7, 0x00};
319        std::vector<uint8_t> out(res.size());
320        size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0);
321        EXPECT_EQ(sz, 4U);
322        out[out.size() - 1] = '\0';
323        EXPECT_EQ(out, res);
324    }
325}
326
327// 1-byte utf-8: 0xxxxxxx
328HWTEST(Utf, CompareMUtf8ToMUtf8_1, testing::ext::TestSize.Level0)
329{
330    {
331        const std::vector<uint8_t> v1 {0x00};
332        const std::vector<uint8_t> v2 {0x7f, 0x00};
333        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0);
334    }
335
336    {
337        const std::vector<uint8_t> v1 {0x02, 0x00};
338        const std::vector<uint8_t> v2 {0x00};
339        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0);
340    }
341
342    {
343        const std::vector<uint8_t> v1 {0x7f, 0x00};
344        const std::vector<uint8_t> v2 {0x7f, 0x00};
345        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0);
346    }
347
348    {
349        const std::vector<uint8_t> v1 {0x01, 0x7f, 0x00};
350        const std::vector<uint8_t> v2 {0x01, 0x70, 0x00};
351        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0);
352    }
353
354    {
355        const std::vector<uint8_t> v1 {0x01, 0x71, 0x00};
356        const std::vector<uint8_t> v2 {0x01, 0x73, 0x00};
357        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0);
358    }
359}
360
361// 2-byte utf-8: 110xxxxx 10xxxxxx
362HWTEST(Utf, CompareMUtf8ToMUtf8_2, testing::ext::TestSize.Level0)
363{
364    {
365        const std::vector<uint8_t> v1 {0xdf, 0xbf, 0x03, 0x00};
366        const std::vector<uint8_t> v2 {0xdf, 0xbf, 0x03, 0x00};
367        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0);
368    }
369
370    {
371        const std::vector<uint8_t> v1 {0xdf, 0xb1, 0x03, 0x00};
372        const std::vector<uint8_t> v2 {0xd1, 0xb2, 0x03, 0x00};
373        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0);
374    }
375
376    {
377        const std::vector<uint8_t> v1 {0xd1, 0xbf, 0x03, 0x00};
378        const std::vector<uint8_t> v2 {0xdf, 0xb0, 0x03, 0x00};
379        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0);
380    }
381}
382
383// 3-byte utf-8: 1110xxxx 10xxxxxx 10xxxxxx
384HWTEST(Utf, CompareMUtf8ToMUtf8_3, testing::ext::TestSize.Level0)
385{
386    {
387        const std::vector<uint8_t> v1 {0xef, 0xbf, 0x03, 0x04, 0x00};
388        const std::vector<uint8_t> v2 {0xef, 0xbf, 0x03, 0x04, 0x00};
389        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0);
390    }
391
392    {
393        const std::vector<uint8_t> v1 {0xef, 0xb2, 0x03, 0x04, 0x00};
394        const std::vector<uint8_t> v2 {0xe0, 0xbf, 0x03, 0x04, 0x00};
395        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0);
396    }
397
398    {
399        const std::vector<uint8_t> v1 {0xef, 0xb0, 0x03, 0x04, 0x00};
400        const std::vector<uint8_t> v2 {0xef, 0xbf, 0x05, 0x04, 0x00};
401        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0);
402    }
403}
404
405// 4-byte utf-8: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
406HWTEST(Utf, CompareMUtf8ToMUtf8_4, testing::ext::TestSize.Level0)
407{
408    {
409        const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
410        const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
411        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0);
412    }
413
414    {
415        const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x0a, 0x05, 0x00};
416        const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
417        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0);
418    }
419
420    {
421        const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
422        const std::vector<uint8_t> v2 {0xf8, 0xbf, 0xbf, 0x04, 0x05, 0x00};
423        EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0);
424    }
425}
426
427// 1-byte utf-8: 0xxxxxxx
428HWTEST(Utf, CompareUtf8ToUtf8_1, testing::ext::TestSize.Level0)
429{
430    {
431        const std::vector<uint8_t> v1 {0x00};
432        const std::vector<uint8_t> v2 {0x7f, 0x00};
433        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0);
434    }
435
436    {
437        const std::vector<uint8_t> v1 {0x02, 0x00};
438        const std::vector<uint8_t> v2 {0x00};
439        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0);
440    }
441
442    {
443        const std::vector<uint8_t> v1 {0x7f, 0x00};
444        const std::vector<uint8_t> v2 {0x7f, 0x00};
445        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0);
446    }
447
448    {
449        const std::vector<uint8_t> v1 {0x01, 0x7f, 0x00};
450        const std::vector<uint8_t> v2 {0x01, 0x70, 0x00};
451        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0);
452    }
453
454    {
455        const std::vector<uint8_t> v1 {0x01, 0x71, 0x00};
456        const std::vector<uint8_t> v2 {0x01, 0x73, 0x00};
457        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0);
458    }
459}
460
461// 2-byte utf-8: 110xxxxx 10xxxxxx
462HWTEST(Utf, CompareUtf8ToUtf8_2, testing::ext::TestSize.Level0)
463{
464    {
465        const std::vector<uint8_t> v1 {0xdf, 0xbf, 0x03, 0x00};
466        const std::vector<uint8_t> v2 {0xdf, 0xbf, 0x03, 0x00};
467        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0);
468    }
469
470    {
471        const std::vector<uint8_t> v1 {0xdf, 0xb1, 0x03, 0x00};
472        const std::vector<uint8_t> v2 {0xd1, 0xb2, 0x03, 0x00};
473        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0);
474    }
475
476    {
477        const std::vector<uint8_t> v1 {0xd1, 0xbf, 0x03, 0x00};
478        const std::vector<uint8_t> v2 {0xdf, 0xb0, 0x03, 0x00};
479        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0);
480    }
481}
482
483// 3-byte utf-8: 1110xxxx 10xxxxxx 10xxxxxx
484HWTEST(Utf, CompareUtf8ToUtf8_3, testing::ext::TestSize.Level0)
485{
486    {
487        const std::vector<uint8_t> v1 {0xef, 0xbf, 0x03, 0x04, 0x00};
488        const std::vector<uint8_t> v2 {0xef, 0xbf, 0x03, 0x04, 0x00};
489        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0);
490    }
491
492    {
493        const std::vector<uint8_t> v1 {0xef, 0xb2, 0x03, 0x04, 0x00};
494        const std::vector<uint8_t> v2 {0xe0, 0xbf, 0x03, 0x04, 0x00};
495        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0);
496    }
497
498    {
499        const std::vector<uint8_t> v1 {0xef, 0xb0, 0x03, 0x04, 0x00};
500        const std::vector<uint8_t> v2 {0xef, 0xbf, 0x05, 0x04, 0x00};
501        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0);
502    }
503}
504
505// 4-byte utf-8: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
506HWTEST(Utf, CompareUtf8ToUtf8_4, testing::ext::TestSize.Level0)
507{
508    {
509        const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
510        const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
511        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0);
512    }
513
514    {
515        const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x0a, 0x05, 0x00};
516        const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
517        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0);
518    }
519
520    {
521        const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00};
522        const std::vector<uint8_t> v2 {0xf8, 0xbf, 0xbf, 0x04, 0x05, 0x00};
523        EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0);
524    }
525}
526
527HWTEST(Utf, IsMUtf8OnlySingleBytes, testing::ext::TestSize.Level0)
528{
529    const std::vector<uint8_t> v1 {0x02, 0x00};
530    EXPECT_TRUE(IsMUtf8OnlySingleBytes(v1.data()));
531
532    const std::vector<uint8_t> v2 {0x90, 0x00};
533    EXPECT_FALSE(IsMUtf8OnlySingleBytes(v2.data()));
534}
535
536HWTEST(Utf, IsValidModifiedUTF8, testing::ext::TestSize.Level0)
537{
538    const std::vector<uint8_t> v1 {0x31, 0x00};
539    EXPECT_TRUE(IsValidModifiedUTF8(v1.data()));
540
541    const std::vector<uint8_t> v2 {0x9f, 0x00};
542    EXPECT_FALSE(IsValidModifiedUTF8(v2.data()));
543
544    const std::vector<uint8_t> v3 {0xf7, 0x00};
545    EXPECT_FALSE(IsValidModifiedUTF8(v3.data()));
546
547    const std::vector<uint8_t> v4 {0xe0, 0x00};
548    EXPECT_FALSE(IsValidModifiedUTF8(v4.data()));
549
550    const std::vector<uint8_t> v5 {0xd4, 0x00};
551    EXPECT_FALSE(IsValidModifiedUTF8(v5.data()));
552
553    const std::vector<uint8_t> v6 {0x11, 0x31, 0x00};
554    EXPECT_TRUE(IsValidModifiedUTF8(v6.data()));
555
556    const std::vector<uint8_t> v7 {0xf8, 0x00};
557    EXPECT_FALSE(IsValidModifiedUTF8(v7.data()));
558}
559
560HWTEST(Utf, ConvertMUtf8ToUtf16Pair, testing::ext::TestSize.Level0)
561{
562    const uint8_t data = 0x11;
563    std::pair<uint32_t, size_t> p1 = ConvertMUtf8ToUtf16Pair(&data, 2U);
564    ASSERT_EQ(17U, p1.first);
565    ASSERT_EQ(1U, p1.second);
566
567    std::pair<uint32_t, size_t> p2 = ConvertMUtf8ToUtf16Pair(&data, 3U);
568    ASSERT_EQ(17U, p2.first);
569    ASSERT_EQ(1U, p2.second);
570}
571
572HWTEST(Utf, IsEqualTest, testing::ext::TestSize.Level0)
573{
574    {
575        const std::vector<uint8_t> v1 {0x7f, 0x00};
576        const std::vector<uint8_t> v2 {0x7f, 0x00};
577        Span<const uint8_t> utf8_1(v1.data(), v1.size());
578        Span<const uint8_t> utf8_2(v2.data(), v2.size());
579        ASSERT_TRUE(IsEqual(utf8_1, utf8_2));
580    }
581
582    {
583        const std::vector<uint8_t> v1 {0x7f, 0x7f, 0x00};
584        const std::vector<uint8_t> v2 {0x7f, 0x00};
585        Span<const uint8_t> utf8_1(v1.data(), v1.size());
586        Span<const uint8_t> utf8_2(v2.data(), v2.size());
587        ASSERT_FALSE(IsEqual(utf8_1, utf8_2));
588    }
589
590    {
591        const std::vector<uint8_t> v1 {0xdf, 0xbf, 0x03, 0x00};
592        const std::vector<uint8_t> v2 {0xdf, 0xbf, 0x03, 0x00};
593        EXPECT_TRUE(IsEqual(v1.data(), v2.data()));
594    }
595}
596
597}  // namespace panda::utf::test
598