14514f5e3Sopenharmony_ci/* 24514f5e3Sopenharmony_ci * Copyright (c) 2024 Huawei Device Co., Ltd. 34514f5e3Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 44514f5e3Sopenharmony_ci * you may not use this file except in compliance with the License. 54514f5e3Sopenharmony_ci * You may obtain a copy of the License at 64514f5e3Sopenharmony_ci * 74514f5e3Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 84514f5e3Sopenharmony_ci * 94514f5e3Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software 104514f5e3Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS, 114514f5e3Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 124514f5e3Sopenharmony_ci * See the License for the specific language governing permissions and 134514f5e3Sopenharmony_ci * limitations under the License. 144514f5e3Sopenharmony_ci */ 154514f5e3Sopenharmony_ci 164514f5e3Sopenharmony_ci#include "ecmascript/ecma_string-inl.h" 174514f5e3Sopenharmony_ci#include "ecmascript/object_factory.h" 184514f5e3Sopenharmony_ci#include "ecmascript/tests/ecma_test_common.h" 194514f5e3Sopenharmony_ci 204514f5e3Sopenharmony_ciusing namespace panda::ecmascript; 214514f5e3Sopenharmony_ci 224514f5e3Sopenharmony_cinamespace panda::test { 234514f5e3Sopenharmony_ciclass EcmaStringEqualsTest : public BaseTestWithScope<false> { 244514f5e3Sopenharmony_ci public: 254514f5e3Sopenharmony_ci static bool IsUtf8EqualsUtf16UT(const uint8_t *utf8Data, size_t utf8Len, 264514f5e3Sopenharmony_ci const uint16_t *utf16Data, uint32_t utf16Len) 274514f5e3Sopenharmony_ci { 284514f5e3Sopenharmony_ci return EcmaString::IsUtf8EqualsUtf16(utf8Data, utf8Len, utf16Data, utf16Len); 294514f5e3Sopenharmony_ci } 304514f5e3Sopenharmony_ci}; 314514f5e3Sopenharmony_ci 324514f5e3Sopenharmony_ci/* 334514f5e3Sopenharmony_ci* @tc.name: IsUtf8EqualsUtf16 344514f5e3Sopenharmony_ci* @tc.desc: Test a function that compares whether an array of UTF8 characters 354514f5e3Sopenharmony_ci* is equal to an array of UTF16 characters 364514f5e3Sopenharmony_ci* @tc.type: FUNC 374514f5e3Sopenharmony_ci*/ 384514f5e3Sopenharmony_ciHWTEST_F_L0(EcmaStringEqualsTest, IsUtf8EqualsUtf16) 394514f5e3Sopenharmony_ci{ 404514f5e3Sopenharmony_ci // Test case 1: ASCII characters 414514f5e3Sopenharmony_ci const uint8_t utf8_01[] = "hello"; // "hello" in ASCII is valid UTF-8 424514f5e3Sopenharmony_ci const uint16_t utf16_01[] = {'h', 'e', 'l', 'l', 'o'}; 434514f5e3Sopenharmony_ci EXPECT_TRUE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_01, 5, utf16_01, 5)); 444514f5e3Sopenharmony_ci 454514f5e3Sopenharmony_ci // Test case 2: 2-byte UTF-8 sequences 464514f5e3Sopenharmony_ci const uint8_t utf8_02[] = {0xC3, 0xA9, 0xC3, 0xA8}; // "éè" in UTF-8 474514f5e3Sopenharmony_ci const uint16_t utf16_02[] = {0x00E9, 0x00E8}; // "éè" in UTF-16 484514f5e3Sopenharmony_ci EXPECT_TRUE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_02, 4, utf16_02, 2)); 494514f5e3Sopenharmony_ci 504514f5e3Sopenharmony_ci // Test case 3: 3-byte UTF-8 sequences 514514f5e3Sopenharmony_ci const uint8_t utf8_03[] = {0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87}; // "中文" in UTF-8 524514f5e3Sopenharmony_ci const uint16_t utf16_03[] = {0x4E2D, 0x6587}; // "中文" in UTF-16 534514f5e3Sopenharmony_ci EXPECT_TRUE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_03, 6, utf16_03, 2)); 544514f5e3Sopenharmony_ci 554514f5e3Sopenharmony_ci // Test case 4: 4-byte UTF-8 sequences 564514f5e3Sopenharmony_ci const uint8_t utf8_04[] = {0xF0, 0x9F, 0x98, 0x81}; // in UTF-8 574514f5e3Sopenharmony_ci const uint16_t utf16_04[] = {0xD83D, 0xDE01}; // in UTF-16 (surrogate pair) 584514f5e3Sopenharmony_ci EXPECT_TRUE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_04, 4, utf16_04, 2)); 594514f5e3Sopenharmony_ci 604514f5e3Sopenharmony_ci // Test case 5: UTF-16 edge cases (empty strings) 614514f5e3Sopenharmony_ci const uint8_t *utf8_05 = nullptr; 624514f5e3Sopenharmony_ci const uint16_t *utf16_05 = nullptr; 634514f5e3Sopenharmony_ci EXPECT_TRUE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_05, 0, utf16_05, 0)); 644514f5e3Sopenharmony_ci 654514f5e3Sopenharmony_ci // Test case 6: UTF-8 shorter than UTF-16 664514f5e3Sopenharmony_ci const uint8_t utf8_06[] = "test"; // "test" in ASCII 674514f5e3Sopenharmony_ci const uint16_t utf16_06[] = {'t', 'e', 's', 't', '!', '!'}; 684514f5e3Sopenharmony_ci EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_06, 4, utf16_06, 6)); 694514f5e3Sopenharmony_ci 704514f5e3Sopenharmony_ci // Test case 7: UTF-8 longer than UTF-16 714514f5e3Sopenharmony_ci const uint8_t utf8_07[] = {0xF0, 0x9F, 0x98, 0x81, 0xF0, 0x9F, 0x98, 0x81}; // in UTF-8 724514f5e3Sopenharmony_ci const uint16_t utf16_07[] = {0xD83D, 0xDE01}; // in UTF-16 (surrogate pair) 734514f5e3Sopenharmony_ci EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_07, 8, utf16_07, 2)); 744514f5e3Sopenharmony_ci 754514f5e3Sopenharmony_ci // Test case 8: Incomplete surrogate pair in UTF-16 764514f5e3Sopenharmony_ci const uint8_t utf8_08[] = {0xF0, 0x9F, 0x92, 0xA9}; // in UTF-8 774514f5e3Sopenharmony_ci const uint16_t utf16_08[] = {0xD83D}; // Missing low surrogate 784514f5e3Sopenharmony_ci EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_08, 4, utf16_08, 1)); 794514f5e3Sopenharmony_ci 804514f5e3Sopenharmony_ci // Test case 9: Truncated UTF-8 multi-byte character 814514f5e3Sopenharmony_ci const uint8_t utf8_09[] = {0xE3, 0x81}; // Truncated "あ" (Japanese 'a') 824514f5e3Sopenharmony_ci const uint16_t utf16_09[] = {0x3042}; // Full "あ" 834514f5e3Sopenharmony_ci EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_09, 2, utf16_09, 1)); 844514f5e3Sopenharmony_ci 854514f5e3Sopenharmony_ci // Test case 10: Longer UTF-8 sequence matching shorter UTF-16 864514f5e3Sopenharmony_ci const uint8_t utf8_10[] = {0xC2, 0xA3, 0xC2, 0xA3}; // "££" in UTF-8 874514f5e3Sopenharmony_ci const uint16_t utf16_10[] = {0x00A3}; // Single "£" 884514f5e3Sopenharmony_ci EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_10, 4, utf16_10, 1)); 894514f5e3Sopenharmony_ci 904514f5e3Sopenharmony_ci // Test case 11: Handling noncharacters in both UTF-8 and UTF-16 914514f5e3Sopenharmony_ci const uint8_t utf8_11[] = {0xEF, 0xBF, 0xBE}; // UTF-8 noncharacter U+FFFE 924514f5e3Sopenharmony_ci const uint16_t utf16_11[] = {0xFFFE}; // UTF-16 noncharacter 934514f5e3Sopenharmony_ci EXPECT_TRUE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_11, 3, utf16_11, 1)); 944514f5e3Sopenharmony_ci 954514f5e3Sopenharmony_ci // Test case 12: Empty UTF-8 and non-empty UTF-16 964514f5e3Sopenharmony_ci const uint8_t *utf8_12 = nullptr; // Empty UTF-8 974514f5e3Sopenharmony_ci const uint16_t utf16_12[] = {0x0061}; // "a" 984514f5e3Sopenharmony_ci EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_12, 0, utf16_12, 1)); 994514f5e3Sopenharmony_ci 1004514f5e3Sopenharmony_ci // Test case 13: Non-empty UTF-8 and empty UTF-16 1014514f5e3Sopenharmony_ci const uint8_t utf8_13[] = {0x61}; // "a" 1024514f5e3Sopenharmony_ci const uint16_t *utf16_13 = nullptr; // Empty UTF-16 1034514f5e3Sopenharmony_ci EXPECT_FALSE(EcmaStringEqualsTest::IsUtf8EqualsUtf16UT(utf8_13, 1, utf16_13, 0)); 1044514f5e3Sopenharmony_ci} 1054514f5e3Sopenharmony_ci}