1/* 2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#include "utils/utf.h" 17 18#include <cstddef> 19#include <cstdint> 20 21#include <vector> 22 23#include <gtest/gtest.h> 24 25namespace panda::utf::test { 26 27HWTEST(Utf, ConvertMUtf8ToUtf16_1, testing::ext::TestSize.Level0) 28{ 29 // 2-byte mutf-8 U+0000 30 { 31 const std::vector<uint8_t> in {0xc0, 0x80, 0x00}; 32 const std::vector<uint16_t> res {0x0}; 33 std::vector<uint16_t> out(res.size()); 34 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 35 EXPECT_EQ(out, res); 36 } 37 38 // 1-byte mutf-8: 0xxxxxxx 39 { 40 const std::vector<uint8_t> in {0x7f, 0x00}; 41 const std::vector<uint16_t> res {0x7f}; 42 std::vector<uint16_t> out(res.size()); 43 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 44 EXPECT_EQ(out, res); 45 } 46 47 // 2-byte mutf-8: 110xxxxx 10xxxxxx 48 { 49 const std::vector<uint8_t> in {0xc2, 0xa7, 0x33, 0x00}; 50 const std::vector<uint16_t> res {0xa7, 0x33}; 51 std::vector<uint16_t> out(res.size()); 52 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 53 EXPECT_EQ(out, res); 54 } 55 56 // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx 57 { 58 const std::vector<uint8_t> in {0xef, 0xbf, 0x83, 0x33, 0x00}; 59 const std::vector<uint16_t> res {0xffc3, 0x33}; 60 std::vector<uint16_t> out(res.size()); 61 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 62 EXPECT_EQ(out, res); 63 } 64} 65 66// double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx 67HWTEST(Utf, ConvertMUtf8ToUtf16_2, testing::ext::TestSize.Level0) 68{ 69 { 70 const std::vector<uint8_t> in {0xed, 0xa0, 0x81, 0xed, 0xb0, 0xb7, 0x00}; 71 const std::vector<uint16_t> res {0xd801, 0xdc37}; 72 std::vector<uint16_t> out(res.size()); 73 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 74 EXPECT_EQ(out, res); 75 } 76 77 { 78 const std::vector<uint8_t> in {0x5b, 0x61, 0x62, 0x63, 0xed, 0xa3, 0x92, 0x5d, 0x00}; 79 const std::vector<uint16_t> res {0x5b, 0x61, 0x62, 0x63, 0xd8d2, 0x5d}; 80 std::vector<uint16_t> out(res.size()); 81 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 82 EXPECT_EQ(out, res); 83 } 84 85 { 86 const std::vector<uint8_t> in {0xF0, 0x9F, 0x91, 0xB3, 0x00}; 87 const std::vector<uint16_t> res {0xD83D, 0xDC73}; 88 std::vector<uint16_t> out(res.size()); 89 ConvertMUtf8ToUtf16(in.data(), utf::Mutf8Size(in.data()), out.data()); 90 EXPECT_EQ(out, res); 91 } 92} 93 94/** 95 * @tc.name: MUtf8ToUtf16Size 96 * @tc.desc: Verify the MUtf8ToUtf16Size function. 97 * @tc.type: FUNC 98 * @tc.require: 99 */ 100HWTEST(Utf, MUtf8ToUtf16Size, testing::ext::TestSize.Level0) 101{ 102 // 2-byte mutf-8 U+0000 103 { 104 const std::vector<uint8_t> in {0xc0, 0x80}; 105 size_t res = MUtf8ToUtf16Size(in.data(), in.size()); 106 EXPECT_EQ(res, 1U); 107 } 108 109 // 1-byte mutf-8: 0xxxxxxx 110 { 111 const std::vector<uint8_t> in {0x7f}; 112 size_t res = MUtf8ToUtf16Size(in.data(), in.size()); 113 EXPECT_EQ(res, 1U); 114 } 115 116 // 2-byte mutf-8: 110xxxxx 10xxxxxx 117 { 118 const std::vector<uint8_t> in {0xc2, 0xa7, 0x33}; 119 size_t res = MUtf8ToUtf16Size(in.data(), in.size()); 120 EXPECT_EQ(res, 2U); 121 } 122 123 // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx 124 { 125 const std::vector<uint8_t> in {0xef, 0xbf, 0x83, 0x33}; 126 size_t res = MUtf8ToUtf16Size(in.data(), in.size()); 127 EXPECT_EQ(res, 2U); 128 } 129 130 // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx 131 { 132 const std::vector<uint8_t> in {0xed, 0xa0, 0x81, 0xed, 0xb0, 0xb7}; 133 size_t res = MUtf8ToUtf16Size(in.data(), in.size()); 134 EXPECT_EQ(res, 2U); 135 } 136} 137 138HWTEST(Utf, Utf16ToMUtf8Size, testing::ext::TestSize.Level0) 139{ 140 // 2-byte mutf-8 U+0000 141 { 142 const std::vector<uint16_t> in {0x0}; 143 size_t res = Utf16ToMUtf8Size(in.data(), in.size()); 144 EXPECT_EQ(res, 3U); 145 } 146 147 // 1-byte mutf-8: 0xxxxxxx 148 { 149 const std::vector<uint16_t> in {0x7f}; 150 size_t res = Utf16ToMUtf8Size(in.data(), in.size()); 151 EXPECT_EQ(res, 2U); 152 } 153 154 { 155 const std::vector<uint16_t> in {0x7f}; 156 size_t res = Utf16ToMUtf8Size(in.data(), in.size()); 157 EXPECT_EQ(res, 2U); 158 } 159 160 // 2-byte mutf-8: 110xxxxx 10xxxxxx 161 { 162 const std::vector<uint16_t> in {0xa7, 0x33}; 163 size_t res = Utf16ToMUtf8Size(in.data(), in.size()); 164 EXPECT_EQ(res, 4U); 165 } 166 167 // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx 168 { 169 const std::vector<uint16_t> in {0xffc3, 0x33}; 170 size_t res = Utf16ToMUtf8Size(in.data(), in.size()); 171 EXPECT_EQ(res, 5U); 172 } 173 174 // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx 175 { 176 const std::vector<uint16_t> in {0xd801, 0xdc37}; 177 size_t res = Utf16ToMUtf8Size(in.data(), in.size()); 178 EXPECT_EQ(res, 5U); 179 } 180} 181 182/** 183 * @tc.name: ConvertRegionMUtf8ToUtf16 184 * @tc.desc: Verify the ConvertRegionMUtf8ToUtf16 function. 185 * @tc.type: FUNC 186 * @tc.require: 187 */ 188HWTEST(Utf, ConvertRegionMUtf8ToUtf16, testing::ext::TestSize.Level0) 189{ 190 // 2-byte mutf-8 U+0000 191 { 192 const std::vector<uint8_t> in {0xc0, 0x80, 0x00}; 193 const std::vector<uint16_t> res {0x0}; 194 std::vector<uint16_t> out(res.size()); 195 ConvertRegionMUtf8ToUtf16(in.data(), out.data(), utf::Mutf8Size(in.data()), res.size() * sizeof(uint16_t), 0); 196 EXPECT_EQ(out, res); 197 } 198 199 // 1-byte mutf-8: 0xxxxxxx 200 { 201 const std::vector<uint8_t> in {0x7f, 0x00}; 202 const std::vector<uint16_t> res {0x7f}; 203 std::vector<uint16_t> out(res.size()); 204 ConvertRegionMUtf8ToUtf16(in.data(), out.data(), utf::Mutf8Size(in.data()), res.size() * sizeof(uint16_t), 0); 205 EXPECT_EQ(out, res); 206 } 207 208 // 2-byte mutf-8: 110xxxxx 10xxxxxx 209 { 210 const std::vector<uint8_t> in {0xc2, 0xa7, 0x33, 0x00}; 211 const std::vector<uint16_t> res {0xa7, 0x33}; 212 std::vector<uint16_t> out(res.size()); 213 ConvertRegionMUtf8ToUtf16(in.data(), out.data(), utf::Mutf8Size(in.data()), res.size() * sizeof(uint16_t), 0); 214 EXPECT_EQ(out, res); 215 } 216 217 // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx 218 { 219 const std::vector<uint8_t> in {0xef, 0xbf, 0x83, 0x33, 0x00}; 220 const std::vector<uint16_t> res {0xffc3, 0x33}; 221 std::vector<uint16_t> out(res.size()); 222 ConvertRegionMUtf8ToUtf16(in.data(), out.data(), utf::Mutf8Size(in.data()), res.size() * sizeof(uint16_t), 0); 223 EXPECT_EQ(out, res); 224 } 225 226 // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx 227 { 228 const std::vector<uint8_t> in {0xed, 0xa0, 0x81, 0xed, 0xb0, 0xb7, 0x00}; 229 const std::vector<uint16_t> res1 {0xd801, 0xdc37}; 230 const std::vector<uint16_t> res2 {0xdc37}; 231 std::vector<uint16_t> out1(res1.size()); 232 std::vector<uint16_t> out2(res2.size()); 233 size_t in_len1 = utf::Mutf8Size(in.data()); 234 size_t in_len2 = utf::Mutf8Size(in.data() + 3); 235 ConvertRegionMUtf8ToUtf16(in.data(), out1.data(), in_len1, res1.size() * sizeof(uint16_t), 0); 236 ConvertRegionMUtf8ToUtf16(in.data(), out2.data(), in_len1, res2.size() * sizeof(uint16_t), in_len1 - in_len2); 237 EXPECT_EQ(out1, res1); 238 EXPECT_EQ(out2, res2); 239 } 240} 241 242HWTEST(Utf, ConvertRegionUtf16ToMUtf8_1, testing::ext::TestSize.Level0) 243{ 244 // 2-byte mutf-8 U+0000 245 { 246 const std::vector<uint16_t> in {0x0}; 247 const std::vector<uint8_t> res {0xc0, 0x80, 0x00}; 248 std::vector<uint8_t> out(res.size()); 249 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 250 EXPECT_EQ(sz, 2U); 251 out[out.size() - 1] = '\0'; 252 EXPECT_EQ(out, res); 253 } 254 255 // 1-byte mutf-8: 0xxxxxxx 256 { 257 const std::vector<uint16_t> in {0x7f}; 258 const std::vector<uint8_t> res {0x7f, 0x00}; 259 std::vector<uint8_t> out(res.size()); 260 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 261 EXPECT_EQ(sz, 1U); 262 out[out.size() - 1] = '\0'; 263 EXPECT_EQ(out, res); 264 } 265 266 // 2-byte mutf-8: 110xxxxx 10xxxxxx 267 { 268 const std::vector<uint16_t> in {0xa7, 0x33}; 269 const std::vector<uint8_t> res {0xc2, 0xa7, 0x33, 0x00}; 270 std::vector<uint8_t> out(res.size()); 271 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 272 EXPECT_EQ(sz, 3U); 273 out[out.size() - 1] = '\0'; 274 EXPECT_EQ(out, res); 275 } 276 277 // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx 278 { 279 const std::vector<uint16_t> in {0xffc3, 0x33}; 280 const std::vector<uint8_t> res {0xef, 0xbf, 0x83, 0x33, 0x00}; 281 std::vector<uint8_t> out(res.size()); 282 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 283 EXPECT_EQ(sz, 4U); 284 out[out.size() - 1] = '\0'; 285 EXPECT_EQ(out, res); 286 } 287} 288 289HWTEST(Utf, ConvertRegionUtf16ToMUtf8_2, testing::ext::TestSize.Level0) 290{ 291 // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx 292 // utf-16 data in 0xd800-0xdfff 293 { 294 const std::vector<uint16_t> in {0xd834, 0x33}; 295 const std::vector<uint8_t> res {0xed, 0xa0, 0xb4, 0x33, 0x00}; 296 std::vector<uint8_t> out(res.size()); 297 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 298 EXPECT_EQ(sz, 4U); 299 out[out.size() - 1] = '\0'; 300 EXPECT_EQ(out, res); 301 } 302 303 // 3-byte mutf-8: 1110xxxx 10xxxxxx 10xxxxxx 304 // utf-16 data in 0xd800-0xdfff 305 { 306 const std::vector<uint16_t> in {0xdf06, 0x33}; 307 const std::vector<uint8_t> res {0xed, 0xbc, 0x86, 0x33, 0x00}; 308 std::vector<uint8_t> out(res.size()); 309 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 310 EXPECT_EQ(sz, 4U); 311 out[out.size() - 1] = '\0'; 312 EXPECT_EQ(out, res); 313 } 314 315 // double 3-byte mutf-8: 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx 316 { 317 const std::vector<uint16_t> in {0xd801, 0xdc37}; 318 const std::vector<uint8_t> res {0xf0, 0x90, 0x90, 0xb7, 0x00}; 319 std::vector<uint8_t> out(res.size()); 320 size_t sz = ConvertRegionUtf16ToMUtf8(in.data(), out.data(), in.size(), out.size() - 1, 0); 321 EXPECT_EQ(sz, 4U); 322 out[out.size() - 1] = '\0'; 323 EXPECT_EQ(out, res); 324 } 325} 326 327// 1-byte utf-8: 0xxxxxxx 328HWTEST(Utf, CompareMUtf8ToMUtf8_1, testing::ext::TestSize.Level0) 329{ 330 { 331 const std::vector<uint8_t> v1 {0x00}; 332 const std::vector<uint8_t> v2 {0x7f, 0x00}; 333 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0); 334 } 335 336 { 337 const std::vector<uint8_t> v1 {0x02, 0x00}; 338 const std::vector<uint8_t> v2 {0x00}; 339 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0); 340 } 341 342 { 343 const std::vector<uint8_t> v1 {0x7f, 0x00}; 344 const std::vector<uint8_t> v2 {0x7f, 0x00}; 345 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0); 346 } 347 348 { 349 const std::vector<uint8_t> v1 {0x01, 0x7f, 0x00}; 350 const std::vector<uint8_t> v2 {0x01, 0x70, 0x00}; 351 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0); 352 } 353 354 { 355 const std::vector<uint8_t> v1 {0x01, 0x71, 0x00}; 356 const std::vector<uint8_t> v2 {0x01, 0x73, 0x00}; 357 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0); 358 } 359} 360 361// 2-byte utf-8: 110xxxxx 10xxxxxx 362HWTEST(Utf, CompareMUtf8ToMUtf8_2, testing::ext::TestSize.Level0) 363{ 364 { 365 const std::vector<uint8_t> v1 {0xdf, 0xbf, 0x03, 0x00}; 366 const std::vector<uint8_t> v2 {0xdf, 0xbf, 0x03, 0x00}; 367 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0); 368 } 369 370 { 371 const std::vector<uint8_t> v1 {0xdf, 0xb1, 0x03, 0x00}; 372 const std::vector<uint8_t> v2 {0xd1, 0xb2, 0x03, 0x00}; 373 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0); 374 } 375 376 { 377 const std::vector<uint8_t> v1 {0xd1, 0xbf, 0x03, 0x00}; 378 const std::vector<uint8_t> v2 {0xdf, 0xb0, 0x03, 0x00}; 379 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0); 380 } 381} 382 383// 3-byte utf-8: 1110xxxx 10xxxxxx 10xxxxxx 384HWTEST(Utf, CompareMUtf8ToMUtf8_3, testing::ext::TestSize.Level0) 385{ 386 { 387 const std::vector<uint8_t> v1 {0xef, 0xbf, 0x03, 0x04, 0x00}; 388 const std::vector<uint8_t> v2 {0xef, 0xbf, 0x03, 0x04, 0x00}; 389 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0); 390 } 391 392 { 393 const std::vector<uint8_t> v1 {0xef, 0xb2, 0x03, 0x04, 0x00}; 394 const std::vector<uint8_t> v2 {0xe0, 0xbf, 0x03, 0x04, 0x00}; 395 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0); 396 } 397 398 { 399 const std::vector<uint8_t> v1 {0xef, 0xb0, 0x03, 0x04, 0x00}; 400 const std::vector<uint8_t> v2 {0xef, 0xbf, 0x05, 0x04, 0x00}; 401 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0); 402 } 403} 404 405// 4-byte utf-8: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 406HWTEST(Utf, CompareMUtf8ToMUtf8_4, testing::ext::TestSize.Level0) 407{ 408 { 409 const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 410 const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 411 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) == 0); 412 } 413 414 { 415 const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x0a, 0x05, 0x00}; 416 const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 417 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) > 0); 418 } 419 420 { 421 const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 422 const std::vector<uint8_t> v2 {0xf8, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 423 EXPECT_TRUE(CompareMUtf8ToMUtf8(v1.data(), v2.data()) < 0); 424 } 425} 426 427// 1-byte utf-8: 0xxxxxxx 428HWTEST(Utf, CompareUtf8ToUtf8_1, testing::ext::TestSize.Level0) 429{ 430 { 431 const std::vector<uint8_t> v1 {0x00}; 432 const std::vector<uint8_t> v2 {0x7f, 0x00}; 433 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0); 434 } 435 436 { 437 const std::vector<uint8_t> v1 {0x02, 0x00}; 438 const std::vector<uint8_t> v2 {0x00}; 439 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0); 440 } 441 442 { 443 const std::vector<uint8_t> v1 {0x7f, 0x00}; 444 const std::vector<uint8_t> v2 {0x7f, 0x00}; 445 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0); 446 } 447 448 { 449 const std::vector<uint8_t> v1 {0x01, 0x7f, 0x00}; 450 const std::vector<uint8_t> v2 {0x01, 0x70, 0x00}; 451 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0); 452 } 453 454 { 455 const std::vector<uint8_t> v1 {0x01, 0x71, 0x00}; 456 const std::vector<uint8_t> v2 {0x01, 0x73, 0x00}; 457 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0); 458 } 459} 460 461// 2-byte utf-8: 110xxxxx 10xxxxxx 462HWTEST(Utf, CompareUtf8ToUtf8_2, testing::ext::TestSize.Level0) 463{ 464 { 465 const std::vector<uint8_t> v1 {0xdf, 0xbf, 0x03, 0x00}; 466 const std::vector<uint8_t> v2 {0xdf, 0xbf, 0x03, 0x00}; 467 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0); 468 } 469 470 { 471 const std::vector<uint8_t> v1 {0xdf, 0xb1, 0x03, 0x00}; 472 const std::vector<uint8_t> v2 {0xd1, 0xb2, 0x03, 0x00}; 473 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0); 474 } 475 476 { 477 const std::vector<uint8_t> v1 {0xd1, 0xbf, 0x03, 0x00}; 478 const std::vector<uint8_t> v2 {0xdf, 0xb0, 0x03, 0x00}; 479 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0); 480 } 481} 482 483// 3-byte utf-8: 1110xxxx 10xxxxxx 10xxxxxx 484HWTEST(Utf, CompareUtf8ToUtf8_3, testing::ext::TestSize.Level0) 485{ 486 { 487 const std::vector<uint8_t> v1 {0xef, 0xbf, 0x03, 0x04, 0x00}; 488 const std::vector<uint8_t> v2 {0xef, 0xbf, 0x03, 0x04, 0x00}; 489 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0); 490 } 491 492 { 493 const std::vector<uint8_t> v1 {0xef, 0xb2, 0x03, 0x04, 0x00}; 494 const std::vector<uint8_t> v2 {0xe0, 0xbf, 0x03, 0x04, 0x00}; 495 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0); 496 } 497 498 { 499 const std::vector<uint8_t> v1 {0xef, 0xb0, 0x03, 0x04, 0x00}; 500 const std::vector<uint8_t> v2 {0xef, 0xbf, 0x05, 0x04, 0x00}; 501 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0); 502 } 503} 504 505// 4-byte utf-8: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 506HWTEST(Utf, CompareUtf8ToUtf8_4, testing::ext::TestSize.Level0) 507{ 508 { 509 const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 510 const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 511 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) == 0); 512 } 513 514 { 515 const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x0a, 0x05, 0x00}; 516 const std::vector<uint8_t> v2 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 517 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) > 0); 518 } 519 520 { 521 const std::vector<uint8_t> v1 {0xf7, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 522 const std::vector<uint8_t> v2 {0xf8, 0xbf, 0xbf, 0x04, 0x05, 0x00}; 523 EXPECT_TRUE(CompareUtf8ToUtf8(v1.data(), v1.size(), v2.data(), v2.size()) < 0); 524 } 525} 526 527HWTEST(Utf, IsMUtf8OnlySingleBytes, testing::ext::TestSize.Level0) 528{ 529 const std::vector<uint8_t> v1 {0x02, 0x00}; 530 EXPECT_TRUE(IsMUtf8OnlySingleBytes(v1.data())); 531 532 const std::vector<uint8_t> v2 {0x90, 0x00}; 533 EXPECT_FALSE(IsMUtf8OnlySingleBytes(v2.data())); 534} 535 536HWTEST(Utf, IsValidModifiedUTF8, testing::ext::TestSize.Level0) 537{ 538 const std::vector<uint8_t> v1 {0x31, 0x00}; 539 EXPECT_TRUE(IsValidModifiedUTF8(v1.data())); 540 541 const std::vector<uint8_t> v2 {0x9f, 0x00}; 542 EXPECT_FALSE(IsValidModifiedUTF8(v2.data())); 543 544 const std::vector<uint8_t> v3 {0xf7, 0x00}; 545 EXPECT_FALSE(IsValidModifiedUTF8(v3.data())); 546 547 const std::vector<uint8_t> v4 {0xe0, 0x00}; 548 EXPECT_FALSE(IsValidModifiedUTF8(v4.data())); 549 550 const std::vector<uint8_t> v5 {0xd4, 0x00}; 551 EXPECT_FALSE(IsValidModifiedUTF8(v5.data())); 552 553 const std::vector<uint8_t> v6 {0x11, 0x31, 0x00}; 554 EXPECT_TRUE(IsValidModifiedUTF8(v6.data())); 555 556 const std::vector<uint8_t> v7 {0xf8, 0x00}; 557 EXPECT_FALSE(IsValidModifiedUTF8(v7.data())); 558} 559 560HWTEST(Utf, ConvertMUtf8ToUtf16Pair, testing::ext::TestSize.Level0) 561{ 562 const uint8_t data = 0x11; 563 std::pair<uint32_t, size_t> p1 = ConvertMUtf8ToUtf16Pair(&data, 2U); 564 ASSERT_EQ(17U, p1.first); 565 ASSERT_EQ(1U, p1.second); 566 567 std::pair<uint32_t, size_t> p2 = ConvertMUtf8ToUtf16Pair(&data, 3U); 568 ASSERT_EQ(17U, p2.first); 569 ASSERT_EQ(1U, p2.second); 570} 571 572HWTEST(Utf, IsEqualTest, testing::ext::TestSize.Level0) 573{ 574 { 575 const std::vector<uint8_t> v1 {0x7f, 0x00}; 576 const std::vector<uint8_t> v2 {0x7f, 0x00}; 577 Span<const uint8_t> utf8_1(v1.data(), v1.size()); 578 Span<const uint8_t> utf8_2(v2.data(), v2.size()); 579 ASSERT_TRUE(IsEqual(utf8_1, utf8_2)); 580 } 581 582 { 583 const std::vector<uint8_t> v1 {0x7f, 0x7f, 0x00}; 584 const std::vector<uint8_t> v2 {0x7f, 0x00}; 585 Span<const uint8_t> utf8_1(v1.data(), v1.size()); 586 Span<const uint8_t> utf8_2(v2.data(), v2.size()); 587 ASSERT_FALSE(IsEqual(utf8_1, utf8_2)); 588 } 589 590 { 591 const std::vector<uint8_t> v1 {0xdf, 0xbf, 0x03, 0x00}; 592 const std::vector<uint8_t> v2 {0xdf, 0xbf, 0x03, 0x00}; 593 EXPECT_TRUE(IsEqual(v1.data(), v2.data())); 594 } 595} 596 597} // namespace panda::utf::test 598