14514f5e3Sopenharmony_ci/* 24514f5e3Sopenharmony_ci * Copyright (c) 2021 Huawei Device Co., Ltd. 34514f5e3Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 44514f5e3Sopenharmony_ci * you may not use this file except in compliance with the License. 54514f5e3Sopenharmony_ci * You may obtain a copy of the License at 64514f5e3Sopenharmony_ci * 74514f5e3Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 84514f5e3Sopenharmony_ci * 94514f5e3Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software 104514f5e3Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS, 114514f5e3Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 124514f5e3Sopenharmony_ci * See the License for the specific language governing permissions and 134514f5e3Sopenharmony_ci * limitations under the License. 144514f5e3Sopenharmony_ci */ 154514f5e3Sopenharmony_ci 164514f5e3Sopenharmony_ci#include "ecmascript/ecma_string-inl.h" 174514f5e3Sopenharmony_ci 184514f5e3Sopenharmony_ci#include "ecmascript/ecma_string_table.h" 194514f5e3Sopenharmony_ci#include "ecmascript/platform/ecma_string_hash.h" 204514f5e3Sopenharmony_ci 214514f5e3Sopenharmony_cinamespace panda::ecmascript { 224514f5e3Sopenharmony_ci 234514f5e3Sopenharmony_ciconstexpr size_t LOW_3BITS = 0x7; 244514f5e3Sopenharmony_ciconstexpr size_t LOW_4BITS = 0xF; 254514f5e3Sopenharmony_ciconstexpr size_t LOW_5BITS = 0x1F; 264514f5e3Sopenharmony_ciconstexpr size_t LOW_6BITS = 0x3F; 274514f5e3Sopenharmony_ciconstexpr size_t L_SURROGATE_START = 0xDC00; 284514f5e3Sopenharmony_ciconstexpr size_t H_SURROGATE_START = 0xD800; 294514f5e3Sopenharmony_ciconstexpr size_t SURROGATE_RAIR_START = 0x10000; 304514f5e3Sopenharmony_ciconstexpr size_t OFFSET_18POS = 18; 314514f5e3Sopenharmony_ciconstexpr size_t OFFSET_12POS = 12; 324514f5e3Sopenharmony_ciconstexpr size_t OFFSET_10POS = 10; 334514f5e3Sopenharmony_ciconstexpr size_t OFFSET_6POS = 6; 344514f5e3Sopenharmony_ci 354514f5e3Sopenharmony_ciEcmaString *EcmaString::Concat(const EcmaVM *vm, 364514f5e3Sopenharmony_ci const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type) 374514f5e3Sopenharmony_ci{ 384514f5e3Sopenharmony_ci ASSERT(IsSMemSpace(type)); 394514f5e3Sopenharmony_ci // allocator may trig gc and move src, need to hold it 404514f5e3Sopenharmony_ci EcmaString *strLeft = *left; 414514f5e3Sopenharmony_ci EcmaString *strRight = *right; 424514f5e3Sopenharmony_ci uint32_t leftLength = strLeft->GetLength(); 434514f5e3Sopenharmony_ci uint32_t rightLength = strRight->GetLength(); 444514f5e3Sopenharmony_ci uint32_t newLength = leftLength + rightLength; 454514f5e3Sopenharmony_ci if (newLength == 0) { 464514f5e3Sopenharmony_ci return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>(); 474514f5e3Sopenharmony_ci } 484514f5e3Sopenharmony_ci 494514f5e3Sopenharmony_ci if (leftLength == 0) { 504514f5e3Sopenharmony_ci return strRight; 514514f5e3Sopenharmony_ci } 524514f5e3Sopenharmony_ci if (rightLength == 0) { 534514f5e3Sopenharmony_ci return strLeft; 544514f5e3Sopenharmony_ci } 554514f5e3Sopenharmony_ci // if the result string is small, make a LineString 564514f5e3Sopenharmony_ci bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8()); 574514f5e3Sopenharmony_ci if (newLength < TreeEcmaString::MIN_TREE_ECMASTRING_LENGTH) { 584514f5e3Sopenharmony_ci ASSERT(strLeft->IsLineOrConstantString()); 594514f5e3Sopenharmony_ci ASSERT(strRight->IsLineOrConstantString()); 604514f5e3Sopenharmony_ci auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type); 614514f5e3Sopenharmony_ci // retrieve strings after gc 624514f5e3Sopenharmony_ci strLeft = *left; 634514f5e3Sopenharmony_ci strRight = *right; 644514f5e3Sopenharmony_ci if (compressed) { 654514f5e3Sopenharmony_ci // copy left part 664514f5e3Sopenharmony_ci Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength); 674514f5e3Sopenharmony_ci Span<const uint8_t> srcLeft(strLeft->GetDataUtf8(), leftLength); 684514f5e3Sopenharmony_ci EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength); 694514f5e3Sopenharmony_ci // copy right part 704514f5e3Sopenharmony_ci sp = sp.SubSpan(leftLength); 714514f5e3Sopenharmony_ci Span<const uint8_t> srcRight(strRight->GetDataUtf8(), rightLength); 724514f5e3Sopenharmony_ci EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength); 734514f5e3Sopenharmony_ci } else { 744514f5e3Sopenharmony_ci // copy left part 754514f5e3Sopenharmony_ci Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength); 764514f5e3Sopenharmony_ci if (strLeft->IsUtf8()) { 774514f5e3Sopenharmony_ci EcmaString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength); 784514f5e3Sopenharmony_ci } else { 794514f5e3Sopenharmony_ci Span<const uint16_t> srcLeft(strLeft->GetDataUtf16(), leftLength); 804514f5e3Sopenharmony_ci EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U); 814514f5e3Sopenharmony_ci } 824514f5e3Sopenharmony_ci // copy right part 834514f5e3Sopenharmony_ci sp = sp.SubSpan(leftLength); 844514f5e3Sopenharmony_ci if (strRight->IsUtf8()) { 854514f5e3Sopenharmony_ci EcmaString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength); 864514f5e3Sopenharmony_ci } else { 874514f5e3Sopenharmony_ci Span<const uint16_t> srcRight(strRight->GetDataUtf16(), rightLength); 884514f5e3Sopenharmony_ci EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U); 894514f5e3Sopenharmony_ci } 904514f5e3Sopenharmony_ci } 914514f5e3Sopenharmony_ci ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!"); 924514f5e3Sopenharmony_ci return newString; 934514f5e3Sopenharmony_ci } 944514f5e3Sopenharmony_ci return CreateTreeString(vm, left, right, newLength, compressed); 954514f5e3Sopenharmony_ci} 964514f5e3Sopenharmony_ci 974514f5e3Sopenharmony_ci/* static */ 984514f5e3Sopenharmony_ciEcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, 994514f5e3Sopenharmony_ci uint32_t length, bool compressed) 1004514f5e3Sopenharmony_ci{ 1014514f5e3Sopenharmony_ci if (original->IsConstantString()) { 1024514f5e3Sopenharmony_ci return CreateConstantString(vm, original->GetDataUtf8(), length, MemSpaceType::OLD_SPACE); 1034514f5e3Sopenharmony_ci } 1044514f5e3Sopenharmony_ci JSHandle<EcmaString> newString(vm->GetJSThread(), 1054514f5e3Sopenharmony_ci CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE)); 1064514f5e3Sopenharmony_ci auto strOrigin = FlattenAllString(vm, original); 1074514f5e3Sopenharmony_ci if (compressed) { 1084514f5e3Sopenharmony_ci // copy 1094514f5e3Sopenharmony_ci Span<uint8_t> sp(newString->GetDataUtf8Writable(), length); 1104514f5e3Sopenharmony_ci Span<const uint8_t> srcSp(strOrigin.GetDataUtf8(), length); 1114514f5e3Sopenharmony_ci EcmaString::MemCopyChars(sp, length, srcSp, length); 1124514f5e3Sopenharmony_ci } else { 1134514f5e3Sopenharmony_ci // copy left part 1144514f5e3Sopenharmony_ci Span<uint16_t> sp(newString->GetDataUtf16Writable(), length); 1154514f5e3Sopenharmony_ci if (strOrigin.IsUtf8()) { 1164514f5e3Sopenharmony_ci EcmaString::CopyChars(sp.data(), strOrigin.GetDataUtf8(), length); 1174514f5e3Sopenharmony_ci } else { 1184514f5e3Sopenharmony_ci Span<const uint16_t> srcSp(strOrigin.GetDataUtf16(), length); 1194514f5e3Sopenharmony_ci EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U); 1204514f5e3Sopenharmony_ci } 1214514f5e3Sopenharmony_ci } 1224514f5e3Sopenharmony_ci ASSERT_PRINT(compressed == CanBeCompressed(*newString), "compressed does not match the real value!"); 1234514f5e3Sopenharmony_ci return *newString; 1244514f5e3Sopenharmony_ci} 1254514f5e3Sopenharmony_ci 1264514f5e3Sopenharmony_ci/* static */ 1274514f5e3Sopenharmony_ciEcmaString *EcmaString::FastSubString(const EcmaVM *vm, 1284514f5e3Sopenharmony_ci const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1294514f5e3Sopenharmony_ci{ 1304514f5e3Sopenharmony_ci ASSERT((start + length) <= src->GetLength()); 1314514f5e3Sopenharmony_ci if (length == 0) { 1324514f5e3Sopenharmony_ci return *vm->GetFactory()->GetEmptyString(); 1334514f5e3Sopenharmony_ci } 1344514f5e3Sopenharmony_ci if (start == 0 && length == src->GetLength()) { 1354514f5e3Sopenharmony_ci return *src; 1364514f5e3Sopenharmony_ci } 1374514f5e3Sopenharmony_ci if (src->IsUtf8()) { 1384514f5e3Sopenharmony_ci return FastSubUtf8String(vm, src, start, length); 1394514f5e3Sopenharmony_ci } 1404514f5e3Sopenharmony_ci return FastSubUtf16String(vm, src, start, length); 1414514f5e3Sopenharmony_ci} 1424514f5e3Sopenharmony_ci 1434514f5e3Sopenharmony_ci/* static */ 1444514f5e3Sopenharmony_ciEcmaString *EcmaString::GetSlicedString(const EcmaVM *vm, 1454514f5e3Sopenharmony_ci const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1464514f5e3Sopenharmony_ci{ 1474514f5e3Sopenharmony_ci ASSERT((start + length) <= src->GetLength()); 1484514f5e3Sopenharmony_ci JSHandle<SlicedString> slicedString(vm->GetJSThread(), CreateSlicedString(vm)); 1494514f5e3Sopenharmony_ci FlatStringInfo srcFlat = FlattenAllString(vm, src); 1504514f5e3Sopenharmony_ci slicedString->SetLength(length, srcFlat.GetString()->IsUtf8()); 1514514f5e3Sopenharmony_ci slicedString->SetParent(vm->GetJSThread(), JSTaggedValue(srcFlat.GetString())); 1524514f5e3Sopenharmony_ci slicedString->SetStartIndex(start + srcFlat.GetStartIndex()); 1534514f5e3Sopenharmony_ci return *slicedString; 1544514f5e3Sopenharmony_ci} 1554514f5e3Sopenharmony_ci 1564514f5e3Sopenharmony_ci/* static */ 1574514f5e3Sopenharmony_ciEcmaString *EcmaString::GetSubString(const EcmaVM *vm, 1584514f5e3Sopenharmony_ci const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1594514f5e3Sopenharmony_ci{ 1604514f5e3Sopenharmony_ci ASSERT((start + length) <= src->GetLength()); 1614514f5e3Sopenharmony_ci if (length == 1) { 1624514f5e3Sopenharmony_ci JSThread *thread = vm->GetJSThread(); 1634514f5e3Sopenharmony_ci uint16_t res = EcmaStringAccessor(src).Get<false>(start); 1644514f5e3Sopenharmony_ci if (EcmaStringAccessor::CanBeCompressed(&res, 1)) { 1654514f5e3Sopenharmony_ci JSHandle<SingleCharTable> singleCharTable(thread, thread->GetSingleCharTable()); 1664514f5e3Sopenharmony_ci return EcmaString::Cast(singleCharTable->GetStringFromSingleCharTable(res).GetTaggedObject()); 1674514f5e3Sopenharmony_ci } 1684514f5e3Sopenharmony_ci } 1694514f5e3Sopenharmony_ci if (static_cast<uint32_t>(length) >= SlicedString::MIN_SLICED_ECMASTRING_LENGTH) { 1704514f5e3Sopenharmony_ci if (start == 0 && length == src->GetLength()) { 1714514f5e3Sopenharmony_ci return *src; 1724514f5e3Sopenharmony_ci } 1734514f5e3Sopenharmony_ci if (src->IsUtf16()) { 1744514f5e3Sopenharmony_ci FlatStringInfo srcFlat = FlattenAllString(vm, src); 1754514f5e3Sopenharmony_ci bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length); 1764514f5e3Sopenharmony_ci if (canBeCompressed) { 1774514f5e3Sopenharmony_ci JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed)); 1784514f5e3Sopenharmony_ci srcFlat = FlattenAllString(vm, src); 1794514f5e3Sopenharmony_ci CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length); 1804514f5e3Sopenharmony_ci return *string; 1814514f5e3Sopenharmony_ci } 1824514f5e3Sopenharmony_ci } 1834514f5e3Sopenharmony_ci return GetSlicedString(vm, src, start, length); 1844514f5e3Sopenharmony_ci } 1854514f5e3Sopenharmony_ci return FastSubString(vm, src, start, length); 1864514f5e3Sopenharmony_ci} 1874514f5e3Sopenharmony_ci 1884514f5e3Sopenharmony_civoid EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length) 1894514f5e3Sopenharmony_ci{ 1904514f5e3Sopenharmony_ci ASSERT(IsLineString() && !IsConstantString()); 1914514f5e3Sopenharmony_ci if (IsUtf8()) { 1924514f5e3Sopenharmony_ci ASSERT(src->IsUtf8()); 1934514f5e3Sopenharmony_ci CVector<uint8_t> buf; 1944514f5e3Sopenharmony_ci const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf); 1954514f5e3Sopenharmony_ci // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 1964514f5e3Sopenharmony_ci if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) { 1974514f5e3Sopenharmony_ci LOG_FULL(FATAL) << "memcpy_s failed"; 1984514f5e3Sopenharmony_ci UNREACHABLE(); 1994514f5e3Sopenharmony_ci } 2004514f5e3Sopenharmony_ci } else if (src->IsUtf8()) { 2014514f5e3Sopenharmony_ci // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 2024514f5e3Sopenharmony_ci CVector<uint8_t> buf; 2034514f5e3Sopenharmony_ci const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf); 2044514f5e3Sopenharmony_ci Span<uint16_t> to(GetDataUtf16Writable() + start, length); 2054514f5e3Sopenharmony_ci Span<const uint8_t> from(data, length); 2064514f5e3Sopenharmony_ci for (uint32_t i = 0; i < length; i++) { 2074514f5e3Sopenharmony_ci to[i] = from[i]; 2084514f5e3Sopenharmony_ci } 2094514f5e3Sopenharmony_ci } else { 2104514f5e3Sopenharmony_ci CVector<uint16_t> buf; 2114514f5e3Sopenharmony_ci const uint16_t *data = EcmaString::GetUtf16DataFlat(src, buf); 2124514f5e3Sopenharmony_ci // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 2134514f5e3Sopenharmony_ci if (length != 0 && memcpy_s(GetDataUtf16Writable() + start, 2144514f5e3Sopenharmony_ci destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { 2154514f5e3Sopenharmony_ci LOG_FULL(FATAL) << "memcpy_s failed"; 2164514f5e3Sopenharmony_ci UNREACHABLE(); 2174514f5e3Sopenharmony_ci } 2184514f5e3Sopenharmony_ci } 2194514f5e3Sopenharmony_ci} 2204514f5e3Sopenharmony_ci 2214514f5e3Sopenharmony_citemplate<typename T1, typename T2> 2224514f5e3Sopenharmony_ciint32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count) 2234514f5e3Sopenharmony_ci{ 2244514f5e3Sopenharmony_ci for (int32_t i = 0; i < count; ++i) { 2254514f5e3Sopenharmony_ci auto left = static_cast<int32_t>(lhsSp[i]); 2264514f5e3Sopenharmony_ci auto right = static_cast<int32_t>(rhsSp[i]); 2274514f5e3Sopenharmony_ci if (left != right) { 2284514f5e3Sopenharmony_ci return left - right; 2294514f5e3Sopenharmony_ci } 2304514f5e3Sopenharmony_ci } 2314514f5e3Sopenharmony_ci return 0; 2324514f5e3Sopenharmony_ci} 2334514f5e3Sopenharmony_ci 2344514f5e3Sopenharmony_ciint32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right) 2354514f5e3Sopenharmony_ci{ 2364514f5e3Sopenharmony_ci if (*left == *right) { 2374514f5e3Sopenharmony_ci return 0; 2384514f5e3Sopenharmony_ci } 2394514f5e3Sopenharmony_ci FlatStringInfo lhs = FlattenAllString(vm, left); 2404514f5e3Sopenharmony_ci JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString()); 2414514f5e3Sopenharmony_ci FlatStringInfo rhs = FlattenAllString(vm, right); 2424514f5e3Sopenharmony_ci lhs.SetString(*string); 2434514f5e3Sopenharmony_ci int32_t lhsCount = static_cast<int32_t>(lhs.GetLength()); 2444514f5e3Sopenharmony_ci int32_t rhsCount = static_cast<int32_t>(rhs.GetLength()); 2454514f5e3Sopenharmony_ci int32_t countDiff = lhsCount - rhsCount; 2464514f5e3Sopenharmony_ci int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount; 2474514f5e3Sopenharmony_ci if (!lhs.IsUtf16() && !rhs.IsUtf16()) { 2484514f5e3Sopenharmony_ci Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount); 2494514f5e3Sopenharmony_ci Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount); 2504514f5e3Sopenharmony_ci int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount); 2514514f5e3Sopenharmony_ci if (charDiff != 0) { 2524514f5e3Sopenharmony_ci return charDiff; 2534514f5e3Sopenharmony_ci } 2544514f5e3Sopenharmony_ci } else if (!lhs.IsUtf16()) { 2554514f5e3Sopenharmony_ci Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount); 2564514f5e3Sopenharmony_ci Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount); 2574514f5e3Sopenharmony_ci int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount); 2584514f5e3Sopenharmony_ci if (charDiff != 0) { 2594514f5e3Sopenharmony_ci return charDiff; 2604514f5e3Sopenharmony_ci } 2614514f5e3Sopenharmony_ci } else if (!rhs.IsUtf16()) { 2624514f5e3Sopenharmony_ci Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), rhsCount); 2634514f5e3Sopenharmony_ci Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), lhsCount); 2644514f5e3Sopenharmony_ci int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount); 2654514f5e3Sopenharmony_ci if (charDiff != 0) { 2664514f5e3Sopenharmony_ci return charDiff; 2674514f5e3Sopenharmony_ci } 2684514f5e3Sopenharmony_ci } else { 2694514f5e3Sopenharmony_ci Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount); 2704514f5e3Sopenharmony_ci Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount); 2714514f5e3Sopenharmony_ci int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount); 2724514f5e3Sopenharmony_ci if (charDiff != 0) { 2734514f5e3Sopenharmony_ci return charDiff; 2744514f5e3Sopenharmony_ci } 2754514f5e3Sopenharmony_ci } 2764514f5e3Sopenharmony_ci return countDiff; 2774514f5e3Sopenharmony_ci} 2784514f5e3Sopenharmony_ci 2794514f5e3Sopenharmony_citemplate<typename T1, typename T2> 2804514f5e3Sopenharmony_cibool IsSubStringAtSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, uint32_t offset) 2814514f5e3Sopenharmony_ci{ 2824514f5e3Sopenharmony_ci int rhsSize = static_cast<int>(rhsSp.size()); 2834514f5e3Sopenharmony_ci ASSERT(rhsSize + offset <= lhsSp.size()); 2844514f5e3Sopenharmony_ci for (int i = 0; i < rhsSize; ++i) { 2854514f5e3Sopenharmony_ci auto left = static_cast<int32_t>(lhsSp[offset + static_cast<uint32_t>(i)]); 2864514f5e3Sopenharmony_ci auto right = static_cast<int32_t>(rhsSp[i]); 2874514f5e3Sopenharmony_ci if (left != right) { 2884514f5e3Sopenharmony_ci return false; 2894514f5e3Sopenharmony_ci } 2904514f5e3Sopenharmony_ci } 2914514f5e3Sopenharmony_ci return true; 2924514f5e3Sopenharmony_ci} 2934514f5e3Sopenharmony_ci 2944514f5e3Sopenharmony_ci 2954514f5e3Sopenharmony_ci/** 2964514f5e3Sopenharmony_ci * left: text string 2974514f5e3Sopenharmony_ci * right: pattern string 2984514f5e3Sopenharmony_ci * example 1: IsSubStringAt("IsSubStringAt", "Is", 0) return true 2994514f5e3Sopenharmony_ci * example 2: IsSubStringAt("IsSubStringAt", "It", 0) return false 3004514f5e3Sopenharmony_ci*/ 3014514f5e3Sopenharmony_cibool EcmaString::IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left, 3024514f5e3Sopenharmony_ci const JSHandle<EcmaString>& right, uint32_t offset) 3034514f5e3Sopenharmony_ci{ 3044514f5e3Sopenharmony_ci FlatStringInfo lhs = FlattenAllString(vm, left); 3054514f5e3Sopenharmony_ci JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString()); 3064514f5e3Sopenharmony_ci FlatStringInfo rhs = FlattenAllString(vm, right); 3074514f5e3Sopenharmony_ci lhs.SetString(*string); 3084514f5e3Sopenharmony_ci int32_t lhsCount = static_cast<int32_t>(lhs.GetLength()); 3094514f5e3Sopenharmony_ci int32_t rhsCount = static_cast<int32_t>(rhs.GetLength()); 3104514f5e3Sopenharmony_ci if (!lhs.IsUtf16() && !rhs.IsUtf16()) { 3114514f5e3Sopenharmony_ci Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount); 3124514f5e3Sopenharmony_ci Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount); 3134514f5e3Sopenharmony_ci return IsSubStringAtSpan(lhsSp, rhsSp, offset); 3144514f5e3Sopenharmony_ci } else if (!lhs.IsUtf16()) { 3154514f5e3Sopenharmony_ci Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount); 3164514f5e3Sopenharmony_ci Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount); 3174514f5e3Sopenharmony_ci return IsSubStringAtSpan(lhsSp, rhsSp, offset); 3184514f5e3Sopenharmony_ci } else if (!rhs.IsUtf16()) { 3194514f5e3Sopenharmony_ci Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount); 3204514f5e3Sopenharmony_ci Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount); 3214514f5e3Sopenharmony_ci return IsSubStringAtSpan(lhsSp, rhsSp, offset); 3224514f5e3Sopenharmony_ci } else { 3234514f5e3Sopenharmony_ci Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount); 3244514f5e3Sopenharmony_ci Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount); 3254514f5e3Sopenharmony_ci return IsSubStringAtSpan(lhsSp, rhsSp, offset); 3264514f5e3Sopenharmony_ci } 3274514f5e3Sopenharmony_ci return false; 3284514f5e3Sopenharmony_ci} 3294514f5e3Sopenharmony_ci 3304514f5e3Sopenharmony_ci/* static */ 3314514f5e3Sopenharmony_citemplate<typename T1, typename T2> 3324514f5e3Sopenharmony_ciint32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max) 3334514f5e3Sopenharmony_ci{ 3344514f5e3Sopenharmony_ci ASSERT(rhsSp.size() > 0); 3354514f5e3Sopenharmony_ci auto first = static_cast<int32_t>(rhsSp[0]); 3364514f5e3Sopenharmony_ci for (int32_t i = pos; i <= max; i++) { 3374514f5e3Sopenharmony_ci if (static_cast<int32_t>(lhsSp[i]) != first) { 3384514f5e3Sopenharmony_ci i++; 3394514f5e3Sopenharmony_ci while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) { 3404514f5e3Sopenharmony_ci i++; 3414514f5e3Sopenharmony_ci } 3424514f5e3Sopenharmony_ci } 3434514f5e3Sopenharmony_ci /* Found first character, now look at the rest of rhsSp */ 3444514f5e3Sopenharmony_ci if (i <= max) { 3454514f5e3Sopenharmony_ci int j = i + 1; 3464514f5e3Sopenharmony_ci int end = j + static_cast<int>(rhsSp.size()) - 1; 3474514f5e3Sopenharmony_ci 3484514f5e3Sopenharmony_ci for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) { 3494514f5e3Sopenharmony_ci } 3504514f5e3Sopenharmony_ci if (j == end) { 3514514f5e3Sopenharmony_ci /* Found whole string. */ 3524514f5e3Sopenharmony_ci return i; 3534514f5e3Sopenharmony_ci } 3544514f5e3Sopenharmony_ci } 3554514f5e3Sopenharmony_ci } 3564514f5e3Sopenharmony_ci return -1; 3574514f5e3Sopenharmony_ci} 3584514f5e3Sopenharmony_ci 3594514f5e3Sopenharmony_citemplate<typename T1, typename T2> 3604514f5e3Sopenharmony_ciint32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos) 3614514f5e3Sopenharmony_ci{ 3624514f5e3Sopenharmony_ci int rhsSize = static_cast<int>(rhsSp.size()); 3634514f5e3Sopenharmony_ci ASSERT(rhsSize > 0); 3644514f5e3Sopenharmony_ci auto first = rhsSp[0]; 3654514f5e3Sopenharmony_ci for (int32_t i = pos; i >= 0; i--) { 3664514f5e3Sopenharmony_ci if (lhsSp[i] != first) { 3674514f5e3Sopenharmony_ci continue; 3684514f5e3Sopenharmony_ci } 3694514f5e3Sopenharmony_ci /* Found first character, now look at the rest of rhsSp */ 3704514f5e3Sopenharmony_ci int j = 1; 3714514f5e3Sopenharmony_ci while (j < rhsSize) { 3724514f5e3Sopenharmony_ci if (rhsSp[j] != lhsSp[i + j]) { 3734514f5e3Sopenharmony_ci break; 3744514f5e3Sopenharmony_ci } 3754514f5e3Sopenharmony_ci j++; 3764514f5e3Sopenharmony_ci } 3774514f5e3Sopenharmony_ci if (j == rhsSize) { 3784514f5e3Sopenharmony_ci return i; 3794514f5e3Sopenharmony_ci } 3804514f5e3Sopenharmony_ci } 3814514f5e3Sopenharmony_ci return -1; 3824514f5e3Sopenharmony_ci} 3834514f5e3Sopenharmony_ci 3844514f5e3Sopenharmony_ciint32_t EcmaString::IndexOf(const EcmaVM *vm, 3854514f5e3Sopenharmony_ci const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos) 3864514f5e3Sopenharmony_ci{ 3874514f5e3Sopenharmony_ci EcmaString *lhstring = *receiver; 3884514f5e3Sopenharmony_ci EcmaString *rhstring = *search; 3894514f5e3Sopenharmony_ci if (lhstring == nullptr || rhstring == nullptr) { 3904514f5e3Sopenharmony_ci return -1; 3914514f5e3Sopenharmony_ci } 3924514f5e3Sopenharmony_ci int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength()); 3934514f5e3Sopenharmony_ci int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength()); 3944514f5e3Sopenharmony_ci 3954514f5e3Sopenharmony_ci if (pos > lhsCount) { 3964514f5e3Sopenharmony_ci return -1; 3974514f5e3Sopenharmony_ci } 3984514f5e3Sopenharmony_ci 3994514f5e3Sopenharmony_ci if (rhsCount == 0) { 4004514f5e3Sopenharmony_ci return pos; 4014514f5e3Sopenharmony_ci } 4024514f5e3Sopenharmony_ci 4034514f5e3Sopenharmony_ci if (pos < 0) { 4044514f5e3Sopenharmony_ci pos = 0; 4054514f5e3Sopenharmony_ci } 4064514f5e3Sopenharmony_ci 4074514f5e3Sopenharmony_ci int32_t max = lhsCount - rhsCount; 4084514f5e3Sopenharmony_ci if (max < 0) { 4094514f5e3Sopenharmony_ci return -1; 4104514f5e3Sopenharmony_ci } 4114514f5e3Sopenharmony_ci 4124514f5e3Sopenharmony_ci if (pos + rhsCount > lhsCount) { 4134514f5e3Sopenharmony_ci return -1; 4144514f5e3Sopenharmony_ci } 4154514f5e3Sopenharmony_ci 4164514f5e3Sopenharmony_ci FlatStringInfo lhs = FlattenAllString(vm, receiver); 4174514f5e3Sopenharmony_ci JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString()); 4184514f5e3Sopenharmony_ci FlatStringInfo rhs = FlattenAllString(vm, search); 4194514f5e3Sopenharmony_ci lhs.SetString(*string); 4204514f5e3Sopenharmony_ci 4214514f5e3Sopenharmony_ci if (rhs.IsUtf8() && lhs.IsUtf8()) { 4224514f5e3Sopenharmony_ci Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount); 4234514f5e3Sopenharmony_ci Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount); 4244514f5e3Sopenharmony_ci return EcmaString::IndexOf(lhsSp, rhsSp, pos, max); 4254514f5e3Sopenharmony_ci } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return) 4264514f5e3Sopenharmony_ci Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount); 4274514f5e3Sopenharmony_ci Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount); 4284514f5e3Sopenharmony_ci return EcmaString::IndexOf(lhsSp, rhsSp, pos, max); 4294514f5e3Sopenharmony_ci } else if (rhs.IsUtf16()) { 4304514f5e3Sopenharmony_ci return -1; 4314514f5e3Sopenharmony_ci } else { // NOLINT(readability-else-after-return) 4324514f5e3Sopenharmony_ci Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount); 4334514f5e3Sopenharmony_ci Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount); 4344514f5e3Sopenharmony_ci return EcmaString::IndexOf(lhsSp, rhsSp, pos, max); 4354514f5e3Sopenharmony_ci } 4364514f5e3Sopenharmony_ci} 4374514f5e3Sopenharmony_ci 4384514f5e3Sopenharmony_ciint32_t EcmaString::LastIndexOf(const EcmaVM *vm, 4394514f5e3Sopenharmony_ci const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos) 4404514f5e3Sopenharmony_ci{ 4414514f5e3Sopenharmony_ci EcmaString *lhstring = *receiver; 4424514f5e3Sopenharmony_ci EcmaString *rhstring = *search; 4434514f5e3Sopenharmony_ci if (lhstring == nullptr || rhstring == nullptr) { 4444514f5e3Sopenharmony_ci return -1; 4454514f5e3Sopenharmony_ci } 4464514f5e3Sopenharmony_ci 4474514f5e3Sopenharmony_ci int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength()); 4484514f5e3Sopenharmony_ci int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength()); 4494514f5e3Sopenharmony_ci if (lhsCount < rhsCount) { 4504514f5e3Sopenharmony_ci return -1; 4514514f5e3Sopenharmony_ci } 4524514f5e3Sopenharmony_ci 4534514f5e3Sopenharmony_ci if (pos < 0) { 4544514f5e3Sopenharmony_ci pos = 0; 4554514f5e3Sopenharmony_ci } 4564514f5e3Sopenharmony_ci 4574514f5e3Sopenharmony_ci if (pos > lhsCount) { 4584514f5e3Sopenharmony_ci pos = lhsCount; 4594514f5e3Sopenharmony_ci } 4604514f5e3Sopenharmony_ci 4614514f5e3Sopenharmony_ci if (pos + rhsCount > lhsCount) { 4624514f5e3Sopenharmony_ci pos = lhsCount - rhsCount; 4634514f5e3Sopenharmony_ci } 4644514f5e3Sopenharmony_ci 4654514f5e3Sopenharmony_ci if (rhsCount == 0) { 4664514f5e3Sopenharmony_ci return pos; 4674514f5e3Sopenharmony_ci } 4684514f5e3Sopenharmony_ci 4694514f5e3Sopenharmony_ci FlatStringInfo lhs = FlattenAllString(vm, receiver); 4704514f5e3Sopenharmony_ci JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString()); 4714514f5e3Sopenharmony_ci FlatStringInfo rhs = FlattenAllString(vm, search); 4724514f5e3Sopenharmony_ci lhs.SetString(*string); 4734514f5e3Sopenharmony_ci if (rhs.IsUtf8() && lhs.IsUtf8()) { 4744514f5e3Sopenharmony_ci Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount); 4754514f5e3Sopenharmony_ci Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount); 4764514f5e3Sopenharmony_ci return EcmaString::LastIndexOf(lhsSp, rhsSp, pos); 4774514f5e3Sopenharmony_ci } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return) 4784514f5e3Sopenharmony_ci Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount); 4794514f5e3Sopenharmony_ci Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount); 4804514f5e3Sopenharmony_ci return EcmaString::LastIndexOf(lhsSp, rhsSp, pos); 4814514f5e3Sopenharmony_ci } else if (rhs.IsUtf16()) { 4824514f5e3Sopenharmony_ci return -1; 4834514f5e3Sopenharmony_ci } else { // NOLINT(readability-else-after-return) 4844514f5e3Sopenharmony_ci Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount); 4854514f5e3Sopenharmony_ci Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount); 4864514f5e3Sopenharmony_ci return EcmaString::LastIndexOf(lhsSp, rhsSp, pos); 4874514f5e3Sopenharmony_ci } 4884514f5e3Sopenharmony_ci} 4894514f5e3Sopenharmony_ci 4904514f5e3Sopenharmony_cistd::u16string EcmaString::ToU16String(uint32_t len) 4914514f5e3Sopenharmony_ci{ 4924514f5e3Sopenharmony_ci uint32_t length = len > 0 ? len : GetLength(); 4934514f5e3Sopenharmony_ci std::u16string result; 4944514f5e3Sopenharmony_ci if (IsUtf16()) { 4954514f5e3Sopenharmony_ci CVector<uint16_t> buf; 4964514f5e3Sopenharmony_ci const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf); 4974514f5e3Sopenharmony_ci result = base::StringHelper::Utf16ToU16String(data, length); 4984514f5e3Sopenharmony_ci } else { 4994514f5e3Sopenharmony_ci CVector<uint8_t> buf; 5004514f5e3Sopenharmony_ci const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 5014514f5e3Sopenharmony_ci result = base::StringHelper::Utf8ToU16String(data, length); 5024514f5e3Sopenharmony_ci } 5034514f5e3Sopenharmony_ci return result; 5044514f5e3Sopenharmony_ci} 5054514f5e3Sopenharmony_ci 5064514f5e3Sopenharmony_ci//static 5074514f5e3Sopenharmony_ciuint32_t EcmaString::CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString, 5084514f5e3Sopenharmony_ci const JSHandle<EcmaString> &secondString) 5094514f5e3Sopenharmony_ci{ 5104514f5e3Sopenharmony_ci uint32_t hashCode; 5114514f5e3Sopenharmony_ci uint32_t firstLength = firstString->GetLength(); 5124514f5e3Sopenharmony_ci uint32_t secondLength = secondString->GetLength(); 5134514f5e3Sopenharmony_ci if ((firstLength + secondLength < MAX_ELEMENT_INDEX_LEN) && 5144514f5e3Sopenharmony_ci firstString->IsUtf8() && secondString->IsUtf8() && 5154514f5e3Sopenharmony_ci firstString->IsInteger() && secondString->IsInteger()) { 5164514f5e3Sopenharmony_ci firstString->HashIntegerString(firstLength, &hashCode, 0); 5174514f5e3Sopenharmony_ci secondString->HashIntegerString(secondLength, &hashCode, hashCode); 5184514f5e3Sopenharmony_ci return hashCode; 5194514f5e3Sopenharmony_ci } 5204514f5e3Sopenharmony_ci hashCode = EcmaString::CalculateConcatHashCode(firstString, secondString); 5214514f5e3Sopenharmony_ci hashCode = MixHashcode(hashCode, NOT_INTEGER); 5224514f5e3Sopenharmony_ci return hashCode; 5234514f5e3Sopenharmony_ci} 5244514f5e3Sopenharmony_ci 5254514f5e3Sopenharmony_ci// static 5264514f5e3Sopenharmony_citemplate<typename T1, typename T2> 5274514f5e3Sopenharmony_ciuint32_t EcmaString::CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst, 5284514f5e3Sopenharmony_ci const T2 *dataSecond, size_t sizeSecond) 5294514f5e3Sopenharmony_ci{ 5304514f5e3Sopenharmony_ci uint32_t totalHash = 0; 5314514f5e3Sopenharmony_ci constexpr uint32_t hashShift = static_cast<uint32_t>(EcmaStringHash::HASH_SHIFT); 5324514f5e3Sopenharmony_ci constexpr uint32_t blockSize = static_cast<size_t>(EcmaStringHash::BLOCK_SIZE); 5334514f5e3Sopenharmony_ci // The concatenated length of the two strings is less than MIN_SIZE_FOR_UNROLLING. 5344514f5e3Sopenharmony_ci if (sizeFirst + sizeSecond <= static_cast<size_t>(EcmaStringHash::MIN_SIZE_FOR_UNROLLING)) { 5354514f5e3Sopenharmony_ci for (uint32_t i = 0; i < sizeFirst; i++) { 5364514f5e3Sopenharmony_ci totalHash = (totalHash << hashShift) - totalHash + dataFirst[i]; 5374514f5e3Sopenharmony_ci } 5384514f5e3Sopenharmony_ci for (uint32_t i = 0; i < sizeSecond; i++) { 5394514f5e3Sopenharmony_ci totalHash = (totalHash << hashShift) - totalHash + dataSecond[i]; 5404514f5e3Sopenharmony_ci } 5414514f5e3Sopenharmony_ci return totalHash; 5424514f5e3Sopenharmony_ci } 5434514f5e3Sopenharmony_ci // Process the entire block of the first string. 5444514f5e3Sopenharmony_ci uint32_t hash[blockSize] = {0}; 5454514f5e3Sopenharmony_ci uint32_t index = 0; 5464514f5e3Sopenharmony_ci for (; index + blockSize <= sizeFirst; index += blockSize) { 5474514f5e3Sopenharmony_ci hash[0] = (hash[0] << hashShift) - hash[0] + dataFirst[index]; 5484514f5e3Sopenharmony_ci hash[1] = (hash[1] << hashShift) - hash[1] + dataFirst[index + 1]; // 1: the second element 5494514f5e3Sopenharmony_ci hash[2] = (hash[2] << hashShift) - hash[2] + dataFirst[index + 2]; // 2: the third element 5504514f5e3Sopenharmony_ci hash[3] = (hash[3] << hashShift) - hash[3] + dataFirst[index + 3]; // 3: the fourth element 5514514f5e3Sopenharmony_ci } 5524514f5e3Sopenharmony_ci // The remaining total string length is less than a whole block. 5534514f5e3Sopenharmony_ci if ((sizeFirst % blockSize) + sizeSecond < blockSize) { 5544514f5e3Sopenharmony_ci for (; index < sizeFirst; ++index) { 5554514f5e3Sopenharmony_ci hash[0] = (hash[0] << hashShift) - hash[0] + dataFirst[index]; 5564514f5e3Sopenharmony_ci } 5574514f5e3Sopenharmony_ci index = 0; 5584514f5e3Sopenharmony_ci } else { 5594514f5e3Sopenharmony_ci //Calculate the non-integral block portion at the end of the first string. 5604514f5e3Sopenharmony_ci for (; index < sizeFirst; ++index) { 5614514f5e3Sopenharmony_ci hash[index % blockSize] = (hash[index % blockSize] << hashShift) - 5624514f5e3Sopenharmony_ci hash[index % blockSize] + dataFirst[index]; 5634514f5e3Sopenharmony_ci } 5644514f5e3Sopenharmony_ci //Calculate the portion of the second string 5654514f5e3Sopenharmony_ci //that starts and aligns with an integral block at the end of the first string. 5664514f5e3Sopenharmony_ci uint32_t wholeBlockRemain = (blockSize - sizeFirst % blockSize) % blockSize; 5674514f5e3Sopenharmony_ci index = 0; 5684514f5e3Sopenharmony_ci for (; index < wholeBlockRemain && index < sizeSecond; ++index) { 5694514f5e3Sopenharmony_ci uint32_t nowHashIndex = sizeFirst % blockSize + index; 5704514f5e3Sopenharmony_ci hash[nowHashIndex] = (hash[nowHashIndex] << hashShift) - hash[nowHashIndex] + dataSecond[index]; 5714514f5e3Sopenharmony_ci } 5724514f5e3Sopenharmony_ci // Process the entire block of the Second string. 5734514f5e3Sopenharmony_ci for (; index + blockSize <= sizeSecond; index += blockSize) { 5744514f5e3Sopenharmony_ci hash[0] = (hash[0] << hashShift) - hash[0] + dataSecond[index]; 5754514f5e3Sopenharmony_ci hash[1] = (hash[1] << hashShift) - hash[1] + dataSecond[index + 1]; // 1: the second element 5764514f5e3Sopenharmony_ci hash[2] = (hash[2] << hashShift) - hash[2] + dataSecond[index + 2]; // 2: the third element 5774514f5e3Sopenharmony_ci hash[3] = (hash[3] << hashShift) - hash[3] + dataSecond[index + 3]; // 3: the fourth element 5784514f5e3Sopenharmony_ci } 5794514f5e3Sopenharmony_ci } 5804514f5e3Sopenharmony_ci for (; index < sizeSecond; ++index) { 5814514f5e3Sopenharmony_ci hash[0] = (hash[0] << hashShift) - hash[0] + dataSecond[index]; 5824514f5e3Sopenharmony_ci } 5834514f5e3Sopenharmony_ci for (uint32_t i = 0; i < blockSize; ++i) { 5844514f5e3Sopenharmony_ci totalHash = (totalHash << hashShift) - totalHash + hash[i]; 5854514f5e3Sopenharmony_ci } 5864514f5e3Sopenharmony_ci return totalHash; 5874514f5e3Sopenharmony_ci} 5884514f5e3Sopenharmony_ci 5894514f5e3Sopenharmony_ci// static 5904514f5e3Sopenharmony_ciuint32_t EcmaString::CalculateConcatHashCode(const JSHandle<EcmaString> &firstString, 5914514f5e3Sopenharmony_ci const JSHandle<EcmaString> &secondString) 5924514f5e3Sopenharmony_ci{ 5934514f5e3Sopenharmony_ci bool isFirstStringUtf8 = EcmaStringAccessor(firstString).IsUtf8(); 5944514f5e3Sopenharmony_ci bool isSecondStringUtf8 = EcmaStringAccessor(secondString).IsUtf8(); 5954514f5e3Sopenharmony_ci EcmaString *firstStr = *firstString; 5964514f5e3Sopenharmony_ci EcmaString *secondStr = *secondString; 5974514f5e3Sopenharmony_ci CVector<uint8_t> bufFirstUint8; 5984514f5e3Sopenharmony_ci CVector<uint8_t> bufSecondUint8; 5994514f5e3Sopenharmony_ci CVector<uint16_t> bufFirstUint16; 6004514f5e3Sopenharmony_ci CVector<uint16_t> bufSecondUint16; 6014514f5e3Sopenharmony_ci if (isFirstStringUtf8 && isSecondStringUtf8) { 6024514f5e3Sopenharmony_ci const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(firstStr, bufFirstUint8); 6034514f5e3Sopenharmony_ci const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(secondStr, bufSecondUint8); 6044514f5e3Sopenharmony_ci return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(), 6054514f5e3Sopenharmony_ci dataSecond, secondStr->GetLength()); 6064514f5e3Sopenharmony_ci } 6074514f5e3Sopenharmony_ci if (!isFirstStringUtf8 && isSecondStringUtf8) { 6084514f5e3Sopenharmony_ci const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(firstStr, bufFirstUint16); 6094514f5e3Sopenharmony_ci const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(secondStr, bufSecondUint8); 6104514f5e3Sopenharmony_ci return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(), 6114514f5e3Sopenharmony_ci dataSecond, secondStr->GetLength()); 6124514f5e3Sopenharmony_ci } 6134514f5e3Sopenharmony_ci if (isFirstStringUtf8 && !isSecondStringUtf8) { 6144514f5e3Sopenharmony_ci const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(firstStr, bufFirstUint8); 6154514f5e3Sopenharmony_ci const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(secondStr, bufSecondUint16); 6164514f5e3Sopenharmony_ci return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(), 6174514f5e3Sopenharmony_ci dataSecond, secondStr->GetLength()); 6184514f5e3Sopenharmony_ci } 6194514f5e3Sopenharmony_ci { 6204514f5e3Sopenharmony_ci const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(firstStr, bufFirstUint16); 6214514f5e3Sopenharmony_ci const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(secondStr, bufSecondUint16); 6224514f5e3Sopenharmony_ci return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(), 6234514f5e3Sopenharmony_ci dataSecond, secondStr->GetLength()); 6244514f5e3Sopenharmony_ci } 6254514f5e3Sopenharmony_ci} 6264514f5e3Sopenharmony_ci 6274514f5e3Sopenharmony_ci// static 6284514f5e3Sopenharmony_cibool EcmaString::CanBeCompressed(const EcmaString *string) 6294514f5e3Sopenharmony_ci{ 6304514f5e3Sopenharmony_ci ASSERT(string->IsLineOrConstantString()); 6314514f5e3Sopenharmony_ci if (string->IsUtf8()) { 6324514f5e3Sopenharmony_ci return CanBeCompressed(string->GetDataUtf8(), string->GetLength()); 6334514f5e3Sopenharmony_ci } 6344514f5e3Sopenharmony_ci return CanBeCompressed(string->GetDataUtf16(), string->GetLength()); 6354514f5e3Sopenharmony_ci} 6364514f5e3Sopenharmony_ci 6374514f5e3Sopenharmony_ci// static 6384514f5e3Sopenharmony_cibool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len) 6394514f5e3Sopenharmony_ci{ 6404514f5e3Sopenharmony_ci uint32_t index = 0; 6414514f5e3Sopenharmony_ci for (; index + 4 <= utf8Len; index += 4) { // 4: process the data in chunks of 4 elements to improve speed 6424514f5e3Sopenharmony_ci // Check if all four characters in the current block are ASCII characters 6434514f5e3Sopenharmony_ci if (!IsASCIICharacter(utf8Data[index]) || 6444514f5e3Sopenharmony_ci !IsASCIICharacter(utf8Data[index + 1]) || // 1: the second element of the block 6454514f5e3Sopenharmony_ci !IsASCIICharacter(utf8Data[index + 2]) || // 2: the third element of the block 6464514f5e3Sopenharmony_ci !IsASCIICharacter(utf8Data[index + 3])) { // 3: the fourth element of the block 6474514f5e3Sopenharmony_ci return false; 6484514f5e3Sopenharmony_ci } 6494514f5e3Sopenharmony_ci } 6504514f5e3Sopenharmony_ci // Check remaining characters if they are ASCII 6514514f5e3Sopenharmony_ci for (; index < utf8Len; ++index) { 6524514f5e3Sopenharmony_ci if (!IsASCIICharacter(utf8Data[index])) { 6534514f5e3Sopenharmony_ci return false; 6544514f5e3Sopenharmony_ci } 6554514f5e3Sopenharmony_ci } 6564514f5e3Sopenharmony_ci return true; 6574514f5e3Sopenharmony_ci} 6584514f5e3Sopenharmony_ci 6594514f5e3Sopenharmony_ci/* static */ 6604514f5e3Sopenharmony_cibool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len) 6614514f5e3Sopenharmony_ci{ 6624514f5e3Sopenharmony_ci uint32_t index = 0; 6634514f5e3Sopenharmony_ci for (; index + 4 <= utf16Len; index += 4) { // 4: process the data in chunks of 4 elements to improve speed 6644514f5e3Sopenharmony_ci // Check if all four characters in the current block are ASCII characters 6654514f5e3Sopenharmony_ci if (!IsASCIICharacter(utf16Data[index]) || 6664514f5e3Sopenharmony_ci !IsASCIICharacter(utf16Data[index + 1]) || // 1: the second element of the block 6674514f5e3Sopenharmony_ci !IsASCIICharacter(utf16Data[index + 2]) || // 2: the third element of the block 6684514f5e3Sopenharmony_ci !IsASCIICharacter(utf16Data[index + 3])) { // 3: the fourth element of the block 6694514f5e3Sopenharmony_ci return false; 6704514f5e3Sopenharmony_ci } 6714514f5e3Sopenharmony_ci } 6724514f5e3Sopenharmony_ci // Check remaining characters if they are ASCII 6734514f5e3Sopenharmony_ci for (; index < utf16Len; ++index) { 6744514f5e3Sopenharmony_ci if (!IsASCIICharacter(utf16Data[index])) { 6754514f5e3Sopenharmony_ci return false; 6764514f5e3Sopenharmony_ci } 6774514f5e3Sopenharmony_ci } 6784514f5e3Sopenharmony_ci return true; 6794514f5e3Sopenharmony_ci} 6804514f5e3Sopenharmony_ci 6814514f5e3Sopenharmony_cibool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2) 6824514f5e3Sopenharmony_ci{ 6834514f5e3Sopenharmony_ci ASSERT(NotTreeString()); 6844514f5e3Sopenharmony_ci ASSERT(str1->NotTreeString() && str2->NotTreeString()); 6854514f5e3Sopenharmony_ci if (GetLength() != str1->GetLength() + str2->GetLength()) { 6864514f5e3Sopenharmony_ci return false; 6874514f5e3Sopenharmony_ci } 6884514f5e3Sopenharmony_ci if (IsUtf16()) { 6894514f5e3Sopenharmony_ci CVector<uint16_t> buf; 6904514f5e3Sopenharmony_ci const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf); 6914514f5e3Sopenharmony_ci if (EcmaString::StringsAreEqualUtf16(str1, data, str1->GetLength())) { 6924514f5e3Sopenharmony_ci return EcmaString::StringsAreEqualUtf16(str2, data + str1->GetLength(), str2->GetLength()); 6934514f5e3Sopenharmony_ci } 6944514f5e3Sopenharmony_ci } else { 6954514f5e3Sopenharmony_ci CVector<uint8_t> buf; 6964514f5e3Sopenharmony_ci const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 6974514f5e3Sopenharmony_ci if (EcmaString::StringIsEqualUint8Data(str1, data, str1->GetLength(), this->IsUtf8())) { 6984514f5e3Sopenharmony_ci return EcmaString::StringIsEqualUint8Data(str2, data + str1->GetLength(), 6994514f5e3Sopenharmony_ci str2->GetLength(), this->IsUtf8()); 7004514f5e3Sopenharmony_ci } 7014514f5e3Sopenharmony_ci } 7024514f5e3Sopenharmony_ci return false; 7034514f5e3Sopenharmony_ci} 7044514f5e3Sopenharmony_ci 7054514f5e3Sopenharmony_ci/* static */ 7064514f5e3Sopenharmony_cibool EcmaString::StringsAreEqualDiffUtfEncoding(EcmaString *left, EcmaString *right) 7074514f5e3Sopenharmony_ci{ 7084514f5e3Sopenharmony_ci CVector<uint16_t> bufLeftUft16; 7094514f5e3Sopenharmony_ci CVector<uint16_t> bufRightUft16; 7104514f5e3Sopenharmony_ci CVector<uint8_t> bufLeftUft8; 7114514f5e3Sopenharmony_ci CVector<uint8_t> bufRightUft8; 7124514f5e3Sopenharmony_ci int32_t lhsCount = static_cast<int32_t>(left->GetLength()); 7134514f5e3Sopenharmony_ci int32_t rhsCount = static_cast<int32_t>(right->GetLength()); 7144514f5e3Sopenharmony_ci if (!left->IsUtf16() && !right->IsUtf16()) { 7154514f5e3Sopenharmony_ci const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8); 7164514f5e3Sopenharmony_ci const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8); 7174514f5e3Sopenharmony_ci Span<const uint8_t> lhsSp(data1, lhsCount); 7184514f5e3Sopenharmony_ci Span<const uint8_t> rhsSp(data2, rhsCount); 7194514f5e3Sopenharmony_ci return EcmaString::StringsAreEquals(lhsSp, rhsSp); 7204514f5e3Sopenharmony_ci } else if (!left->IsUtf16()) { 7214514f5e3Sopenharmony_ci const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8); 7224514f5e3Sopenharmony_ci const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16); 7234514f5e3Sopenharmony_ci Span<const uint8_t> lhsSp(data1, lhsCount); 7244514f5e3Sopenharmony_ci Span<const uint16_t> rhsSp(data2, rhsCount); 7254514f5e3Sopenharmony_ci return EcmaString::StringsAreEquals(lhsSp, rhsSp); 7264514f5e3Sopenharmony_ci } else if (!right->IsUtf16()) { 7274514f5e3Sopenharmony_ci const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16); 7284514f5e3Sopenharmony_ci const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8); 7294514f5e3Sopenharmony_ci Span<const uint16_t> lhsSp(data1, lhsCount); 7304514f5e3Sopenharmony_ci Span<const uint8_t> rhsSp(data2, rhsCount); 7314514f5e3Sopenharmony_ci return EcmaString::StringsAreEquals(lhsSp, rhsSp); 7324514f5e3Sopenharmony_ci } else { 7334514f5e3Sopenharmony_ci const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16); 7344514f5e3Sopenharmony_ci const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16); 7354514f5e3Sopenharmony_ci Span<const uint16_t> lhsSp(data1, lhsCount); 7364514f5e3Sopenharmony_ci Span<const uint16_t> rhsSp(data2, rhsCount); 7374514f5e3Sopenharmony_ci return EcmaString::StringsAreEquals(lhsSp, rhsSp); 7384514f5e3Sopenharmony_ci } 7394514f5e3Sopenharmony_ci} 7404514f5e3Sopenharmony_ci 7414514f5e3Sopenharmony_ci/* static */ 7424514f5e3Sopenharmony_cibool EcmaString::StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right) 7434514f5e3Sopenharmony_ci{ 7444514f5e3Sopenharmony_ci int32_t lhsCount = static_cast<int32_t>(left.GetLength()); 7454514f5e3Sopenharmony_ci int32_t rhsCount = static_cast<int32_t>(right.GetLength()); 7464514f5e3Sopenharmony_ci if (!left.IsUtf16() && !right.IsUtf16()) { 7474514f5e3Sopenharmony_ci Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount); 7484514f5e3Sopenharmony_ci Span<const uint8_t> rhsSp(right.GetDataUtf8(), rhsCount); 7494514f5e3Sopenharmony_ci return EcmaString::StringsAreEquals(lhsSp, rhsSp); 7504514f5e3Sopenharmony_ci } else if (!left.IsUtf16()) { 7514514f5e3Sopenharmony_ci Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount); 7524514f5e3Sopenharmony_ci Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount); 7534514f5e3Sopenharmony_ci return EcmaString::StringsAreEquals(lhsSp, rhsSp); 7544514f5e3Sopenharmony_ci } else if (!right.IsUtf16()) { 7554514f5e3Sopenharmony_ci Span<const uint16_t> lhsSp(left.GetDataUtf16(), rhsCount); 7564514f5e3Sopenharmony_ci Span<const uint8_t> rhsSp(right.GetDataUtf8(), lhsCount); 7574514f5e3Sopenharmony_ci return EcmaString::StringsAreEquals(lhsSp, rhsSp); 7584514f5e3Sopenharmony_ci } else { 7594514f5e3Sopenharmony_ci Span<const uint16_t> lhsSp(left.GetDataUtf16(), lhsCount); 7604514f5e3Sopenharmony_ci Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount); 7614514f5e3Sopenharmony_ci return EcmaString::StringsAreEquals(lhsSp, rhsSp); 7624514f5e3Sopenharmony_ci } 7634514f5e3Sopenharmony_ci} 7644514f5e3Sopenharmony_ci 7654514f5e3Sopenharmony_cibool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2) 7664514f5e3Sopenharmony_ci{ 7674514f5e3Sopenharmony_ci if (str1 == str2) { 7684514f5e3Sopenharmony_ci return true; 7694514f5e3Sopenharmony_ci } 7704514f5e3Sopenharmony_ci if (str1->IsInternString() && str2->IsInternString()) { 7714514f5e3Sopenharmony_ci return false; 7724514f5e3Sopenharmony_ci } 7734514f5e3Sopenharmony_ci uint32_t str1Len = str1->GetLength(); 7744514f5e3Sopenharmony_ci if (str1Len != str2->GetLength()) { 7754514f5e3Sopenharmony_ci return false; 7764514f5e3Sopenharmony_ci } 7774514f5e3Sopenharmony_ci if (str1Len == 0) { 7784514f5e3Sopenharmony_ci return true; 7794514f5e3Sopenharmony_ci } 7804514f5e3Sopenharmony_ci 7814514f5e3Sopenharmony_ci uint32_t str1Hash; 7824514f5e3Sopenharmony_ci uint32_t str2Hash; 7834514f5e3Sopenharmony_ci if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) { 7844514f5e3Sopenharmony_ci if (str1Hash != str2Hash) { 7854514f5e3Sopenharmony_ci return false; 7864514f5e3Sopenharmony_ci } 7874514f5e3Sopenharmony_ci } 7884514f5e3Sopenharmony_ci FlatStringInfo str1Flat = FlattenAllString(vm, str1); 7894514f5e3Sopenharmony_ci JSHandle<EcmaString> string(vm->GetJSThread(), str1Flat.GetString()); 7904514f5e3Sopenharmony_ci FlatStringInfo str2Flat = FlattenAllString(vm, str2); 7914514f5e3Sopenharmony_ci str1Flat.SetString(*string); 7924514f5e3Sopenharmony_ci return StringsAreEqualDiffUtfEncoding(str1Flat, str2Flat); 7934514f5e3Sopenharmony_ci} 7944514f5e3Sopenharmony_ci 7954514f5e3Sopenharmony_ci/* static */ 7964514f5e3Sopenharmony_cibool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2) 7974514f5e3Sopenharmony_ci{ 7984514f5e3Sopenharmony_ci if (str1 == str2) { 7994514f5e3Sopenharmony_ci return true; 8004514f5e3Sopenharmony_ci } 8014514f5e3Sopenharmony_ci uint32_t str1Len = str1->GetLength(); 8024514f5e3Sopenharmony_ci if (str1Len != str2->GetLength()) { 8034514f5e3Sopenharmony_ci return false; 8044514f5e3Sopenharmony_ci } 8054514f5e3Sopenharmony_ci if (str1Len == 0) { 8064514f5e3Sopenharmony_ci return true; 8074514f5e3Sopenharmony_ci } 8084514f5e3Sopenharmony_ci 8094514f5e3Sopenharmony_ci uint32_t str1Hash; 8104514f5e3Sopenharmony_ci uint32_t str2Hash; 8114514f5e3Sopenharmony_ci if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) { 8124514f5e3Sopenharmony_ci if (str1Hash != str2Hash) { 8134514f5e3Sopenharmony_ci return false; 8144514f5e3Sopenharmony_ci } 8154514f5e3Sopenharmony_ci } 8164514f5e3Sopenharmony_ci return StringsAreEqualDiffUtfEncoding(str1, str2); 8174514f5e3Sopenharmony_ci} 8184514f5e3Sopenharmony_ci 8194514f5e3Sopenharmony_ci/* static */ 8204514f5e3Sopenharmony_cibool EcmaString::StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, 8214514f5e3Sopenharmony_ci bool canBeCompressToUtf8) 8224514f5e3Sopenharmony_ci{ 8234514f5e3Sopenharmony_ci if (!str1->IsSlicedString() && canBeCompressToUtf8 != str1->IsUtf8()) { 8244514f5e3Sopenharmony_ci return false; 8254514f5e3Sopenharmony_ci } 8264514f5e3Sopenharmony_ci if (canBeCompressToUtf8 && str1->GetLength() != dataLen) { 8274514f5e3Sopenharmony_ci return false; 8284514f5e3Sopenharmony_ci } 8294514f5e3Sopenharmony_ci if (str1->IsUtf8()) { 8304514f5e3Sopenharmony_ci CVector<uint8_t> buf; 8314514f5e3Sopenharmony_ci Span<const uint8_t> data1(EcmaString::GetUtf8DataFlat(str1, buf), dataLen); 8324514f5e3Sopenharmony_ci Span<const uint8_t> data2(dataAddr, dataLen); 8334514f5e3Sopenharmony_ci return EcmaString::StringsAreEquals(data1, data2); 8344514f5e3Sopenharmony_ci } 8354514f5e3Sopenharmony_ci CVector<uint16_t> buf; 8364514f5e3Sopenharmony_ci uint32_t length = str1->GetLength(); 8374514f5e3Sopenharmony_ci const uint16_t *data = EcmaString::GetUtf16DataFlat(str1, buf); 8384514f5e3Sopenharmony_ci return IsUtf8EqualsUtf16(dataAddr, dataLen, data, length); 8394514f5e3Sopenharmony_ci} 8404514f5e3Sopenharmony_ci 8414514f5e3Sopenharmony_ci/* static */ 8424514f5e3Sopenharmony_cibool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len) 8434514f5e3Sopenharmony_ci{ 8444514f5e3Sopenharmony_ci uint32_t length = str1->GetLength(); 8454514f5e3Sopenharmony_ci if (length != utf16Len) { 8464514f5e3Sopenharmony_ci return false; 8474514f5e3Sopenharmony_ci } 8484514f5e3Sopenharmony_ci if (str1->IsUtf8()) { 8494514f5e3Sopenharmony_ci CVector<uint8_t> buf; 8504514f5e3Sopenharmony_ci const uint8_t *data = EcmaString::GetUtf8DataFlat(str1, buf); 8514514f5e3Sopenharmony_ci return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len); 8524514f5e3Sopenharmony_ci } else { 8534514f5e3Sopenharmony_ci CVector<uint16_t> buf; 8544514f5e3Sopenharmony_ci Span<const uint16_t> data1(EcmaString::GetUtf16DataFlat(str1, buf), length); 8554514f5e3Sopenharmony_ci Span<const uint16_t> data2(utf16Data, utf16Len); 8564514f5e3Sopenharmony_ci return EcmaString::StringsAreEquals(data1, data2); 8574514f5e3Sopenharmony_ci } 8584514f5e3Sopenharmony_ci} 8594514f5e3Sopenharmony_ci 8604514f5e3Sopenharmony_citemplate<typename T> 8614514f5e3Sopenharmony_cibool EcmaString::MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count) 8624514f5e3Sopenharmony_ci{ 8634514f5e3Sopenharmony_ci ASSERT(dstMax >= count); 8644514f5e3Sopenharmony_ci ASSERT(dst.Size() >= src.Size()); 8654514f5e3Sopenharmony_ci if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) { 8664514f5e3Sopenharmony_ci LOG_FULL(FATAL) << "memcpy_s failed"; 8674514f5e3Sopenharmony_ci UNREACHABLE(); 8684514f5e3Sopenharmony_ci } 8694514f5e3Sopenharmony_ci return true; 8704514f5e3Sopenharmony_ci} 8714514f5e3Sopenharmony_ci 8724514f5e3Sopenharmony_cibool EcmaString::HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const 8734514f5e3Sopenharmony_ci{ 8744514f5e3Sopenharmony_ci ASSERT(length >= 0); 8754514f5e3Sopenharmony_ci Span<const uint8_t> str = FastToUtf8Span(); 8764514f5e3Sopenharmony_ci return HashIntegerString(str.data(), length, hash, hashSeed); 8774514f5e3Sopenharmony_ci} 8784514f5e3Sopenharmony_ci 8794514f5e3Sopenharmony_ciuint32_t EcmaString::ComputeHashcode() const 8804514f5e3Sopenharmony_ci{ 8814514f5e3Sopenharmony_ci auto [hash, isInteger] = ComputeRawHashcode(); 8824514f5e3Sopenharmony_ci return MixHashcode(hash, isInteger); 8834514f5e3Sopenharmony_ci} 8844514f5e3Sopenharmony_ci 8854514f5e3Sopenharmony_ci// hashSeed only be used when computing two separate strings merged hashcode. 8864514f5e3Sopenharmony_cistd::pair<uint32_t, bool> EcmaString::ComputeRawHashcode() const 8874514f5e3Sopenharmony_ci{ 8884514f5e3Sopenharmony_ci uint32_t hash = 0; 8894514f5e3Sopenharmony_ci uint32_t length = GetLength(); 8904514f5e3Sopenharmony_ci if (length == 0) { 8914514f5e3Sopenharmony_ci return {hash, false}; 8924514f5e3Sopenharmony_ci } 8934514f5e3Sopenharmony_ci 8944514f5e3Sopenharmony_ci if (IsUtf8()) { 8954514f5e3Sopenharmony_ci // String using UTF8 encoding, and length smaller than 10, try to compute integer hash. 8964514f5e3Sopenharmony_ci if (length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, 0)) { 8974514f5e3Sopenharmony_ci return {hash, true}; 8984514f5e3Sopenharmony_ci } 8994514f5e3Sopenharmony_ci CVector<uint8_t> buf; 9004514f5e3Sopenharmony_ci const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 9014514f5e3Sopenharmony_ci // String can not convert to integer number, using normal hashcode computing algorithm. 9024514f5e3Sopenharmony_ci hash = this->ComputeHashForData(data, length, 0); 9034514f5e3Sopenharmony_ci return {hash, false}; 9044514f5e3Sopenharmony_ci } else { 9054514f5e3Sopenharmony_ci CVector<uint16_t> buf; 9064514f5e3Sopenharmony_ci const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf); 9074514f5e3Sopenharmony_ci // If rawSeed has certain value, and second string uses UTF16 encoding, 9084514f5e3Sopenharmony_ci // then merged string can not be small integer number. 9094514f5e3Sopenharmony_ci hash = this->ComputeHashForData(data, length, 0); 9104514f5e3Sopenharmony_ci return {hash, false}; 9114514f5e3Sopenharmony_ci } 9124514f5e3Sopenharmony_ci} 9134514f5e3Sopenharmony_ci 9144514f5e3Sopenharmony_ci// hashSeed only be used when computing two separate strings merged hashcode. 9154514f5e3Sopenharmony_ciuint32_t EcmaString::ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const 9164514f5e3Sopenharmony_ci{ 9174514f5e3Sopenharmony_ci uint32_t hash; 9184514f5e3Sopenharmony_ci uint32_t length = GetLength(); 9194514f5e3Sopenharmony_ci if (length == 0) { 9204514f5e3Sopenharmony_ci return MixHashcode(rawHashSeed, isInteger); 9214514f5e3Sopenharmony_ci } 9224514f5e3Sopenharmony_ci 9234514f5e3Sopenharmony_ci if (IsUtf8()) { 9244514f5e3Sopenharmony_ci // String using UTF8 encoding, and length smaller than 10, try to compute integer hash. 9254514f5e3Sopenharmony_ci if ((rawHashSeed == 0 || isInteger) && 9264514f5e3Sopenharmony_ci length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, rawHashSeed)) { 9274514f5e3Sopenharmony_ci return hash; 9284514f5e3Sopenharmony_ci } 9294514f5e3Sopenharmony_ci CVector<uint8_t> buf; 9304514f5e3Sopenharmony_ci const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 9314514f5e3Sopenharmony_ci // String can not convert to integer number, using normal hashcode computing algorithm. 9324514f5e3Sopenharmony_ci hash = this->ComputeHashForData(data, length, rawHashSeed); 9334514f5e3Sopenharmony_ci return MixHashcode(hash, NOT_INTEGER); 9344514f5e3Sopenharmony_ci } else { 9354514f5e3Sopenharmony_ci CVector<uint16_t> buf; 9364514f5e3Sopenharmony_ci const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf); 9374514f5e3Sopenharmony_ci // If rawSeed has certain value, and second string uses UTF16 encoding, 9384514f5e3Sopenharmony_ci // then merged string can not be small integer number. 9394514f5e3Sopenharmony_ci hash = this->ComputeHashForData(data, length, rawHashSeed); 9404514f5e3Sopenharmony_ci return MixHashcode(hash, NOT_INTEGER); 9414514f5e3Sopenharmony_ci } 9424514f5e3Sopenharmony_ci} 9434514f5e3Sopenharmony_ci 9444514f5e3Sopenharmony_ci/* static */ 9454514f5e3Sopenharmony_ciuint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress) 9464514f5e3Sopenharmony_ci{ 9474514f5e3Sopenharmony_ci uint32_t mixHash = 0; 9484514f5e3Sopenharmony_ci if (canBeCompress) { 9494514f5e3Sopenharmony_ci // String using UTF8 encoding, and length smaller than 10, try to compute integer hash. 9504514f5e3Sopenharmony_ci if (utf8Len < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf8Data, utf8Len, &mixHash, 0)) { 9514514f5e3Sopenharmony_ci return mixHash; 9524514f5e3Sopenharmony_ci } 9534514f5e3Sopenharmony_ci uint32_t hash = ComputeHashForData(utf8Data, utf8Len, 0); 9544514f5e3Sopenharmony_ci return MixHashcode(hash, NOT_INTEGER); 9554514f5e3Sopenharmony_ci } else { 9564514f5e3Sopenharmony_ci auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len); 9574514f5e3Sopenharmony_ci CVector<uint16_t> tmpBuffer(utf16Len); 9584514f5e3Sopenharmony_ci [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len, 9594514f5e3Sopenharmony_ci utf16Len); 9604514f5e3Sopenharmony_ci ASSERT(len == utf16Len); 9614514f5e3Sopenharmony_ci uint32_t hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0); 9624514f5e3Sopenharmony_ci return MixHashcode(hash, NOT_INTEGER); 9634514f5e3Sopenharmony_ci } 9644514f5e3Sopenharmony_ci LOG_ECMA(FATAL) << "this branch is unreachable"; 9654514f5e3Sopenharmony_ci UNREACHABLE(); 9664514f5e3Sopenharmony_ci} 9674514f5e3Sopenharmony_ci 9684514f5e3Sopenharmony_ci/* static */ 9694514f5e3Sopenharmony_ciuint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length) 9704514f5e3Sopenharmony_ci{ 9714514f5e3Sopenharmony_ci uint32_t mixHash = 0; 9724514f5e3Sopenharmony_ci // String length smaller than 10, try to compute integer hash. 9734514f5e3Sopenharmony_ci if (length < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf16Data, length, &mixHash, 0)) { 9744514f5e3Sopenharmony_ci return mixHash; 9754514f5e3Sopenharmony_ci } 9764514f5e3Sopenharmony_ci uint32_t hash = ComputeHashForData(utf16Data, length, 0); 9774514f5e3Sopenharmony_ci return MixHashcode(hash, NOT_INTEGER); 9784514f5e3Sopenharmony_ci} 9794514f5e3Sopenharmony_ci 9804514f5e3Sopenharmony_ci// drop the tail bytes if the remain length can't fill the length it represents. 9814514f5e3Sopenharmony_cistatic size_t FixUtf8Len(const uint8_t* utf8, size_t utf8Len) 9824514f5e3Sopenharmony_ci{ 9834514f5e3Sopenharmony_ci constexpr size_t TWO_BYTES_LENGTH = 2; 9844514f5e3Sopenharmony_ci constexpr size_t THREE_BYTES_LENGTH = 3; 9854514f5e3Sopenharmony_ci size_t trimSize = 0; 9864514f5e3Sopenharmony_ci if (utf8Len >= 1 && utf8[utf8Len - 1] >= 0xC0) { 9874514f5e3Sopenharmony_ci // The last one char claim there are more than 1 byte next to it, it's invalid, so drop the last one. 9884514f5e3Sopenharmony_ci trimSize = 1; 9894514f5e3Sopenharmony_ci } 9904514f5e3Sopenharmony_ci if (utf8Len >= TWO_BYTES_LENGTH && utf8[utf8Len - TWO_BYTES_LENGTH] >= 0xE0) { 9914514f5e3Sopenharmony_ci // The second to last char claim there are more than 2 bytes next to it, it's invalid, so drop the last two. 9924514f5e3Sopenharmony_ci trimSize = TWO_BYTES_LENGTH; 9934514f5e3Sopenharmony_ci } 9944514f5e3Sopenharmony_ci if (utf8Len >= THREE_BYTES_LENGTH && utf8[utf8Len - THREE_BYTES_LENGTH] >= 0xF0) { 9954514f5e3Sopenharmony_ci // The third to last char claim there are more than 3 bytes next to it, it's invalid, so drop the last three. 9964514f5e3Sopenharmony_ci trimSize = THREE_BYTES_LENGTH; 9974514f5e3Sopenharmony_ci } 9984514f5e3Sopenharmony_ci return utf8Len - trimSize; 9994514f5e3Sopenharmony_ci} 10004514f5e3Sopenharmony_ci 10014514f5e3Sopenharmony_ci 10024514f5e3Sopenharmony_ci/* static */ 10034514f5e3Sopenharmony_cibool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, 10044514f5e3Sopenharmony_ci const uint16_t *utf16Data, uint32_t utf16Len) 10054514f5e3Sopenharmony_ci{ 10064514f5e3Sopenharmony_ci size_t safeUtf8Len = FixUtf8Len(utf8Data, utf8Len); 10074514f5e3Sopenharmony_ci const uint8_t *utf8End = utf8Data + utf8Len; 10084514f5e3Sopenharmony_ci const uint8_t *utf8SafeEnd = utf8Data + safeUtf8Len; 10094514f5e3Sopenharmony_ci const uint16_t *utf16End = utf16Data + utf16Len; 10104514f5e3Sopenharmony_ci while (utf8Data < utf8SafeEnd && utf16Data < utf16End) { 10114514f5e3Sopenharmony_ci uint8_t src = *utf8Data; 10124514f5e3Sopenharmony_ci switch (src & 0xF0) { 10134514f5e3Sopenharmony_ci case 0xF0: { 10144514f5e3Sopenharmony_ci const uint8_t c2 = *(++utf8Data); 10154514f5e3Sopenharmony_ci const uint8_t c3 = *(++utf8Data); 10164514f5e3Sopenharmony_ci const uint8_t c4 = *(++utf8Data); 10174514f5e3Sopenharmony_ci uint32_t codePoint = ((src & LOW_3BITS) << OFFSET_18POS) | ((c2 & LOW_6BITS) << OFFSET_12POS) | 10184514f5e3Sopenharmony_ci ((c3 & LOW_6BITS) << OFFSET_6POS) | (c4 & LOW_6BITS); 10194514f5e3Sopenharmony_ci if (codePoint >= SURROGATE_RAIR_START) { 10204514f5e3Sopenharmony_ci if (utf16Data >= utf16End - 1) { 10214514f5e3Sopenharmony_ci return false; 10224514f5e3Sopenharmony_ci } 10234514f5e3Sopenharmony_ci codePoint -= SURROGATE_RAIR_START; 10244514f5e3Sopenharmony_ci if (*utf16Data++ != static_cast<uint16_t>((codePoint >> OFFSET_10POS) | H_SURROGATE_START)) { 10254514f5e3Sopenharmony_ci return false; 10264514f5e3Sopenharmony_ci } else if (*utf16Data++ != static_cast<uint16_t>((codePoint & 0x3FF) | L_SURROGATE_START)) { 10274514f5e3Sopenharmony_ci return false; 10284514f5e3Sopenharmony_ci } 10294514f5e3Sopenharmony_ci } else { 10304514f5e3Sopenharmony_ci if (*utf16Data++ != static_cast<uint16_t>(codePoint)) { 10314514f5e3Sopenharmony_ci return false; 10324514f5e3Sopenharmony_ci } 10334514f5e3Sopenharmony_ci } 10344514f5e3Sopenharmony_ci utf8Data++; 10354514f5e3Sopenharmony_ci break; 10364514f5e3Sopenharmony_ci } 10374514f5e3Sopenharmony_ci case 0xE0: { 10384514f5e3Sopenharmony_ci const uint8_t c2 = *(++utf8Data); 10394514f5e3Sopenharmony_ci const uint8_t c3 = *(++utf8Data); 10404514f5e3Sopenharmony_ci if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_4BITS) << OFFSET_12POS) | 10414514f5e3Sopenharmony_ci ((c2 & LOW_6BITS) << OFFSET_6POS) | (c3 & LOW_6BITS))) { 10424514f5e3Sopenharmony_ci return false; 10434514f5e3Sopenharmony_ci } 10444514f5e3Sopenharmony_ci utf8Data++; 10454514f5e3Sopenharmony_ci break; 10464514f5e3Sopenharmony_ci } 10474514f5e3Sopenharmony_ci case 0xD0: 10484514f5e3Sopenharmony_ci case 0xC0: { 10494514f5e3Sopenharmony_ci const uint8_t c2 = *(++utf8Data); 10504514f5e3Sopenharmony_ci if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_5BITS) << OFFSET_6POS) | (c2 & LOW_6BITS))) { 10514514f5e3Sopenharmony_ci return false; 10524514f5e3Sopenharmony_ci } 10534514f5e3Sopenharmony_ci utf8Data++; 10544514f5e3Sopenharmony_ci break; 10554514f5e3Sopenharmony_ci } 10564514f5e3Sopenharmony_ci default: 10574514f5e3Sopenharmony_ci do { 10584514f5e3Sopenharmony_ci if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) { 10594514f5e3Sopenharmony_ci return false; 10604514f5e3Sopenharmony_ci } 10614514f5e3Sopenharmony_ci } while (utf8Data < utf8SafeEnd && utf16Data < utf16End && *utf8Data < 0x80); 10624514f5e3Sopenharmony_ci break; 10634514f5e3Sopenharmony_ci } 10644514f5e3Sopenharmony_ci } 10654514f5e3Sopenharmony_ci // The remain chars should be treated as single byte char. 10664514f5e3Sopenharmony_ci while (utf8Data < utf8End && utf16Data < utf16End) { 10674514f5e3Sopenharmony_ci if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) { 10684514f5e3Sopenharmony_ci return false; 10694514f5e3Sopenharmony_ci } 10704514f5e3Sopenharmony_ci } 10714514f5e3Sopenharmony_ci return utf8Data == utf8End && utf16Data == utf16End; 10724514f5e3Sopenharmony_ci} 10734514f5e3Sopenharmony_ci 10744514f5e3Sopenharmony_cibool EcmaString::ToElementIndex(uint32_t *index) 10754514f5e3Sopenharmony_ci{ 10764514f5e3Sopenharmony_ci uint32_t len = GetLength(); 10774514f5e3Sopenharmony_ci if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers) 10784514f5e3Sopenharmony_ci return false; 10794514f5e3Sopenharmony_ci } 10804514f5e3Sopenharmony_ci if (UNLIKELY(IsUtf16())) { 10814514f5e3Sopenharmony_ci return false; 10824514f5e3Sopenharmony_ci } 10834514f5e3Sopenharmony_ci 10844514f5e3Sopenharmony_ci // fast path: get integer from string's hash value 10854514f5e3Sopenharmony_ci if (TryToGetInteger(index)) { 10864514f5e3Sopenharmony_ci return true; 10874514f5e3Sopenharmony_ci } 10884514f5e3Sopenharmony_ci 10894514f5e3Sopenharmony_ci CVector<uint8_t> buf; 10904514f5e3Sopenharmony_ci const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 10914514f5e3Sopenharmony_ci uint32_t c = data[0]; 10924514f5e3Sopenharmony_ci uint64_t n = 0; 10934514f5e3Sopenharmony_ci if (c == '0') { 10944514f5e3Sopenharmony_ci *index = 0; 10954514f5e3Sopenharmony_ci return len == 1; 10964514f5e3Sopenharmony_ci } 10974514f5e3Sopenharmony_ci uint32_t loopStart = 0; 10984514f5e3Sopenharmony_ci if (ToUInt64FromLoopStart(&n, loopStart, data) && n < JSObject::MAX_ELEMENT_INDEX) { 10994514f5e3Sopenharmony_ci *index = n; 11004514f5e3Sopenharmony_ci return true; 11014514f5e3Sopenharmony_ci } 11024514f5e3Sopenharmony_ci return false; 11034514f5e3Sopenharmony_ci} 11044514f5e3Sopenharmony_ci 11054514f5e3Sopenharmony_cibool EcmaString::ToInt(int32_t *index, bool *negative) 11064514f5e3Sopenharmony_ci{ 11074514f5e3Sopenharmony_ci uint32_t len = GetLength(); 11084514f5e3Sopenharmony_ci if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers) 11094514f5e3Sopenharmony_ci return false; 11104514f5e3Sopenharmony_ci } 11114514f5e3Sopenharmony_ci if (UNLIKELY(IsUtf16())) { 11124514f5e3Sopenharmony_ci return false; 11134514f5e3Sopenharmony_ci } 11144514f5e3Sopenharmony_ci CVector<uint8_t> buf; 11154514f5e3Sopenharmony_ci const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 11164514f5e3Sopenharmony_ci uint32_t c = data[0]; 11174514f5e3Sopenharmony_ci uint32_t loopStart = 0; 11184514f5e3Sopenharmony_ci uint64_t n = 0; 11194514f5e3Sopenharmony_ci if (c == '0') { 11204514f5e3Sopenharmony_ci *index = 0; 11214514f5e3Sopenharmony_ci return len == 1; 11224514f5e3Sopenharmony_ci } 11234514f5e3Sopenharmony_ci if (c == '-' && len > 1) { 11244514f5e3Sopenharmony_ci *negative = true; 11254514f5e3Sopenharmony_ci loopStart = 1; 11264514f5e3Sopenharmony_ci } 11274514f5e3Sopenharmony_ci 11284514f5e3Sopenharmony_ci if (ToUInt64FromLoopStart(&n, loopStart, data) && n <= std::numeric_limits<int32_t>::max()) { 11294514f5e3Sopenharmony_ci *index = *negative ? -n : n; 11304514f5e3Sopenharmony_ci return true; 11314514f5e3Sopenharmony_ci } 11324514f5e3Sopenharmony_ci return false; 11334514f5e3Sopenharmony_ci} 11344514f5e3Sopenharmony_ci 11354514f5e3Sopenharmony_cibool EcmaString::ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data) 11364514f5e3Sopenharmony_ci{ 11374514f5e3Sopenharmony_ci uint64_t n = 0; 11384514f5e3Sopenharmony_ci uint32_t len = GetLength(); 11394514f5e3Sopenharmony_ci if (UNLIKELY(loopStart >= len)) { 11404514f5e3Sopenharmony_ci return false; 11414514f5e3Sopenharmony_ci } 11424514f5e3Sopenharmony_ci for (uint32_t i = loopStart; i < len; i++) { 11434514f5e3Sopenharmony_ci uint32_t c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 11444514f5e3Sopenharmony_ci if (c < '0' || c > '9') { 11454514f5e3Sopenharmony_ci return false; 11464514f5e3Sopenharmony_ci } 11474514f5e3Sopenharmony_ci // NOLINTNEXTLINE(readability-magic-numbers) 11484514f5e3Sopenharmony_ci n = n * 10 + (c - '0'); // 10: decimal factor 11494514f5e3Sopenharmony_ci } 11504514f5e3Sopenharmony_ci *index = n; 11514514f5e3Sopenharmony_ci return true; 11524514f5e3Sopenharmony_ci} 11534514f5e3Sopenharmony_ci 11544514f5e3Sopenharmony_cibool EcmaString::ToTypedArrayIndex(uint32_t *index) 11554514f5e3Sopenharmony_ci{ 11564514f5e3Sopenharmony_ci uint32_t len = GetLength(); 11574514f5e3Sopenharmony_ci if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { 11584514f5e3Sopenharmony_ci return false; 11594514f5e3Sopenharmony_ci } 11604514f5e3Sopenharmony_ci if (UNLIKELY(IsUtf16())) { 11614514f5e3Sopenharmony_ci return false; 11624514f5e3Sopenharmony_ci } 11634514f5e3Sopenharmony_ci 11644514f5e3Sopenharmony_ci CVector<uint8_t> buf; 11654514f5e3Sopenharmony_ci const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 11664514f5e3Sopenharmony_ci uint32_t c = data[0]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 11674514f5e3Sopenharmony_ci uint64_t n = 0; 11684514f5e3Sopenharmony_ci if (c == '0') { 11694514f5e3Sopenharmony_ci *index = 0; 11704514f5e3Sopenharmony_ci return len == 1; 11714514f5e3Sopenharmony_ci } 11724514f5e3Sopenharmony_ci if (c > '0' && c <= '9') { 11734514f5e3Sopenharmony_ci n = c - '0'; 11744514f5e3Sopenharmony_ci for (uint32_t i = 1; i < len; i++) { 11754514f5e3Sopenharmony_ci c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 11764514f5e3Sopenharmony_ci if (c >= '0' && c <= '9') { 11774514f5e3Sopenharmony_ci // NOLINTNEXTLINE(readability-magic-numbers) 11784514f5e3Sopenharmony_ci n = n * 10 + (c - '0'); // 10: decimal factor 11794514f5e3Sopenharmony_ci } else if (c == '.') { 11804514f5e3Sopenharmony_ci n = JSObject::MAX_ELEMENT_INDEX; 11814514f5e3Sopenharmony_ci break; 11824514f5e3Sopenharmony_ci } else { 11834514f5e3Sopenharmony_ci return false; 11844514f5e3Sopenharmony_ci } 11854514f5e3Sopenharmony_ci } 11864514f5e3Sopenharmony_ci if (n < JSObject::MAX_ELEMENT_INDEX) { 11874514f5e3Sopenharmony_ci *index = n; 11884514f5e3Sopenharmony_ci return true; 11894514f5e3Sopenharmony_ci } else { 11904514f5e3Sopenharmony_ci *index = JSObject::MAX_ELEMENT_INDEX; 11914514f5e3Sopenharmony_ci return true; 11924514f5e3Sopenharmony_ci } 11934514f5e3Sopenharmony_ci } else if (c == '-') { 11944514f5e3Sopenharmony_ci *index = JSObject::MAX_ELEMENT_INDEX; 11954514f5e3Sopenharmony_ci return true; 11964514f5e3Sopenharmony_ci } 11974514f5e3Sopenharmony_ci return false; 11984514f5e3Sopenharmony_ci} 11994514f5e3Sopenharmony_ci 12004514f5e3Sopenharmony_citemplate<typename T> 12014514f5e3Sopenharmony_ciEcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode) 12024514f5e3Sopenharmony_ci{ 12034514f5e3Sopenharmony_ci uint32_t srcLen = src->GetLength(); 12044514f5e3Sopenharmony_ci int32_t start = 0; 12054514f5e3Sopenharmony_ci int32_t end = static_cast<int32_t>(srcLen) - 1; 12064514f5e3Sopenharmony_ci 12074514f5e3Sopenharmony_ci if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) { 12084514f5e3Sopenharmony_ci start = static_cast<int32_t>(base::StringHelper::GetStart(data, srcLen)); 12094514f5e3Sopenharmony_ci } 12104514f5e3Sopenharmony_ci if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) { 12114514f5e3Sopenharmony_ci end = base::StringHelper::GetEnd(data, start, srcLen); 12124514f5e3Sopenharmony_ci } 12134514f5e3Sopenharmony_ci EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast<uint32_t>(end - start + 1)); 12144514f5e3Sopenharmony_ci return res; 12154514f5e3Sopenharmony_ci} 12164514f5e3Sopenharmony_ci 12174514f5e3Sopenharmony_ci/* static */ 12184514f5e3Sopenharmony_ciEcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src) 12194514f5e3Sopenharmony_ci{ 12204514f5e3Sopenharmony_ci auto srcFlat = FlattenAllString(vm, src); 12214514f5e3Sopenharmony_ci uint32_t srcLength = srcFlat.GetLength(); 12224514f5e3Sopenharmony_ci auto factory = vm->GetFactory(); 12234514f5e3Sopenharmony_ci if (srcFlat.IsUtf16()) { 12244514f5e3Sopenharmony_ci std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength); 12254514f5e3Sopenharmony_ci std::string res = base::StringHelper::ToLower(u16str); 12264514f5e3Sopenharmony_ci return *(factory->NewFromStdString(res)); 12274514f5e3Sopenharmony_ci } else { 12284514f5e3Sopenharmony_ci return ConvertUtf8ToLowerOrUpper(vm, src, true); 12294514f5e3Sopenharmony_ci } 12304514f5e3Sopenharmony_ci} 12314514f5e3Sopenharmony_ci 12324514f5e3Sopenharmony_ci/* static */ 12334514f5e3Sopenharmony_ciEcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src) 12344514f5e3Sopenharmony_ci{ 12354514f5e3Sopenharmony_ci auto srcFlat = FlattenAllString(vm, src); 12364514f5e3Sopenharmony_ci uint32_t srcLength = srcFlat.GetLength(); 12374514f5e3Sopenharmony_ci const char start = 'A'; 12384514f5e3Sopenharmony_ci const char end = 'Z'; 12394514f5e3Sopenharmony_ci uint32_t upperIndex = srcLength; 12404514f5e3Sopenharmony_ci Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength); 12414514f5e3Sopenharmony_ci for (uint32_t index = 0; index < srcLength; ++index) { 12424514f5e3Sopenharmony_ci if (base::StringHelper::Utf8CharInRange(data[index], start, end)) { 12434514f5e3Sopenharmony_ci upperIndex = index; 12444514f5e3Sopenharmony_ci break; 12454514f5e3Sopenharmony_ci } 12464514f5e3Sopenharmony_ci } 12474514f5e3Sopenharmony_ci if (upperIndex == srcLength) { 12484514f5e3Sopenharmony_ci return *src; 12494514f5e3Sopenharmony_ci } 12504514f5e3Sopenharmony_ci return ConvertUtf8ToLowerOrUpper(vm, src, true, upperIndex); 12514514f5e3Sopenharmony_ci} 12524514f5e3Sopenharmony_ci 12534514f5e3Sopenharmony_ci/* static */ 12544514f5e3Sopenharmony_ciEcmaString *EcmaString::TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src) 12554514f5e3Sopenharmony_ci{ 12564514f5e3Sopenharmony_ci auto srcFlat = FlattenAllString(vm, src); 12574514f5e3Sopenharmony_ci uint32_t srcLength = srcFlat.GetLength(); 12584514f5e3Sopenharmony_ci const char start = 'a'; 12594514f5e3Sopenharmony_ci const char end = 'z'; 12604514f5e3Sopenharmony_ci uint32_t lowerIndex = srcLength; 12614514f5e3Sopenharmony_ci Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength); 12624514f5e3Sopenharmony_ci for (uint32_t index = 0; index < srcLength; ++index) { 12634514f5e3Sopenharmony_ci if (base::StringHelper::Utf8CharInRange(data[index], start, end)) { 12644514f5e3Sopenharmony_ci lowerIndex = index; 12654514f5e3Sopenharmony_ci break; 12664514f5e3Sopenharmony_ci } 12674514f5e3Sopenharmony_ci } 12684514f5e3Sopenharmony_ci if (lowerIndex == srcLength) { 12694514f5e3Sopenharmony_ci return *src; 12704514f5e3Sopenharmony_ci } 12714514f5e3Sopenharmony_ci return ConvertUtf8ToLowerOrUpper(vm, src, false, lowerIndex); 12724514f5e3Sopenharmony_ci} 12734514f5e3Sopenharmony_ci 12744514f5e3Sopenharmony_ci/* static */ 12754514f5e3Sopenharmony_ciEcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, 12764514f5e3Sopenharmony_ci bool toLower, uint32_t startIndex) 12774514f5e3Sopenharmony_ci{ 12784514f5e3Sopenharmony_ci const char start = toLower ? 'A' : 'a'; 12794514f5e3Sopenharmony_ci const char end = toLower ? 'Z' : 'z'; 12804514f5e3Sopenharmony_ci uint32_t srcLength = src->GetLength(); 12814514f5e3Sopenharmony_ci JSHandle<EcmaString> newString(vm->GetJSThread(), CreateLineString(vm, srcLength, true)); 12824514f5e3Sopenharmony_ci auto srcFlat = FlattenAllString(vm, src); 12834514f5e3Sopenharmony_ci Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength); 12844514f5e3Sopenharmony_ci auto newStringPtr = newString->GetDataUtf8Writable(); 12854514f5e3Sopenharmony_ci if (startIndex > 0) { 12864514f5e3Sopenharmony_ci if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) { 12874514f5e3Sopenharmony_ci LOG_FULL(FATAL) << "memcpy_s failed"; 12884514f5e3Sopenharmony_ci UNREACHABLE(); 12894514f5e3Sopenharmony_ci } 12904514f5e3Sopenharmony_ci } 12914514f5e3Sopenharmony_ci for (uint32_t index = startIndex; index < srcLength; ++index) { 12924514f5e3Sopenharmony_ci if (base::StringHelper::Utf8CharInRange(data[index], start, end)) { 12934514f5e3Sopenharmony_ci *(newStringPtr + index) = data[index] ^ (1 << 5); // 1 and 5 means lower to upper or upper to lower 12944514f5e3Sopenharmony_ci } else { 12954514f5e3Sopenharmony_ci *(newStringPtr + index) = data[index]; 12964514f5e3Sopenharmony_ci } 12974514f5e3Sopenharmony_ci } 12984514f5e3Sopenharmony_ci return *newString; 12994514f5e3Sopenharmony_ci} 13004514f5e3Sopenharmony_ci 13014514f5e3Sopenharmony_ci/* static */ 13024514f5e3Sopenharmony_ciEcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src) 13034514f5e3Sopenharmony_ci{ 13044514f5e3Sopenharmony_ci FlatStringInfo srcFlat = FlattenAllString(vm, src); 13054514f5e3Sopenharmony_ci uint32_t srcLength = srcFlat.GetLength(); 13064514f5e3Sopenharmony_ci auto factory = vm->GetFactory(); 13074514f5e3Sopenharmony_ci if (srcFlat.IsUtf16()) { 13084514f5e3Sopenharmony_ci std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength); 13094514f5e3Sopenharmony_ci std::string res = base::StringHelper::ToUpper(u16str); 13104514f5e3Sopenharmony_ci return *(factory->NewFromStdString(res)); 13114514f5e3Sopenharmony_ci } else { 13124514f5e3Sopenharmony_ci return ConvertUtf8ToLowerOrUpper(vm, src, false); 13134514f5e3Sopenharmony_ci } 13144514f5e3Sopenharmony_ci} 13154514f5e3Sopenharmony_ci 13164514f5e3Sopenharmony_ci/* static */ 13174514f5e3Sopenharmony_ciEcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale) 13184514f5e3Sopenharmony_ci{ 13194514f5e3Sopenharmony_ci auto factory = vm->GetFactory(); 13204514f5e3Sopenharmony_ci FlatStringInfo srcFlat = FlattenAllString(vm, src); 13214514f5e3Sopenharmony_ci std::u16string utf16 = srcFlat.ToU16String(); 13224514f5e3Sopenharmony_ci std::string res = base::StringHelper::ToLocaleLower(utf16, locale); 13234514f5e3Sopenharmony_ci return *(factory->NewFromStdString(res)); 13244514f5e3Sopenharmony_ci} 13254514f5e3Sopenharmony_ci 13264514f5e3Sopenharmony_ci/* static */ 13274514f5e3Sopenharmony_ciEcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale) 13284514f5e3Sopenharmony_ci{ 13294514f5e3Sopenharmony_ci auto factory = vm->GetFactory(); 13304514f5e3Sopenharmony_ci FlatStringInfo srcFlat = FlattenAllString(vm, src); 13314514f5e3Sopenharmony_ci std::u16string utf16 = srcFlat.ToU16String(); 13324514f5e3Sopenharmony_ci std::string res = base::StringHelper::ToLocaleUpper(utf16, locale); 13334514f5e3Sopenharmony_ci return *(factory->NewFromStdString(res)); 13344514f5e3Sopenharmony_ci} 13354514f5e3Sopenharmony_ci 13364514f5e3Sopenharmony_ciEcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode) 13374514f5e3Sopenharmony_ci{ 13384514f5e3Sopenharmony_ci FlatStringInfo srcFlat = FlattenAllString(thread->GetEcmaVM(), src); 13394514f5e3Sopenharmony_ci uint32_t srcLen = srcFlat.GetLength(); 13404514f5e3Sopenharmony_ci if (UNLIKELY(srcLen == 0)) { 13414514f5e3Sopenharmony_ci return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject()); 13424514f5e3Sopenharmony_ci } 13434514f5e3Sopenharmony_ci if (srcFlat.IsUtf8()) { 13444514f5e3Sopenharmony_ci Span<const uint8_t> data(srcFlat.GetDataUtf8(), srcLen); 13454514f5e3Sopenharmony_ci return TrimBody(thread, src, data, mode); 13464514f5e3Sopenharmony_ci } else { 13474514f5e3Sopenharmony_ci Span<const uint16_t> data(srcFlat.GetDataUtf16(), srcLen); 13484514f5e3Sopenharmony_ci return TrimBody(thread, src, data, mode); 13494514f5e3Sopenharmony_ci } 13504514f5e3Sopenharmony_ci} 13514514f5e3Sopenharmony_ci 13524514f5e3Sopenharmony_ciEcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type) 13534514f5e3Sopenharmony_ci{ 13544514f5e3Sopenharmony_ci ASSERT(string->IsTreeString() || string->IsSlicedString()); 13554514f5e3Sopenharmony_ci ASSERT(IsSMemSpace(type)); 13564514f5e3Sopenharmony_ci auto thread = vm->GetJSThread(); 13574514f5e3Sopenharmony_ci uint32_t length = string->GetLength(); 13584514f5e3Sopenharmony_ci EcmaString *result = nullptr; 13594514f5e3Sopenharmony_ci if (string->IsUtf8()) { 13604514f5e3Sopenharmony_ci result = CreateLineStringWithSpaceType(vm, length, true, type); 13614514f5e3Sopenharmony_ci WriteToFlat<uint8_t>(*string, result->GetDataUtf8Writable(), length); 13624514f5e3Sopenharmony_ci } else { 13634514f5e3Sopenharmony_ci result = CreateLineStringWithSpaceType(vm, length, false, type); 13644514f5e3Sopenharmony_ci WriteToFlat<uint16_t>(*string, result->GetDataUtf16Writable(), length); 13654514f5e3Sopenharmony_ci } 13664514f5e3Sopenharmony_ci if (string->IsTreeString()) { 13674514f5e3Sopenharmony_ci JSHandle<TreeEcmaString> tree(string); 13684514f5e3Sopenharmony_ci ASSERT(EcmaString::Cast(tree->GetSecond())->GetLength() != 0); 13694514f5e3Sopenharmony_ci tree->SetFirst(thread, JSTaggedValue(result)); 13704514f5e3Sopenharmony_ci tree->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString())); 13714514f5e3Sopenharmony_ci } 13724514f5e3Sopenharmony_ci return result; 13734514f5e3Sopenharmony_ci} 13744514f5e3Sopenharmony_ci 13754514f5e3Sopenharmony_ciEcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type) 13764514f5e3Sopenharmony_ci{ 13774514f5e3Sopenharmony_ci EcmaString *s = *string; 13784514f5e3Sopenharmony_ci if (!s->IsTreeString()) { 13794514f5e3Sopenharmony_ci return s; 13804514f5e3Sopenharmony_ci } 13814514f5e3Sopenharmony_ci JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string); 13824514f5e3Sopenharmony_ci if (!tree->IsFlat()) { 13834514f5e3Sopenharmony_ci return SlowFlatten(vm, string, type); 13844514f5e3Sopenharmony_ci } 13854514f5e3Sopenharmony_ci return EcmaString::Cast(tree->GetFirst()); 13864514f5e3Sopenharmony_ci} 13874514f5e3Sopenharmony_ci 13884514f5e3Sopenharmony_ciFlatStringInfo EcmaString::FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type) 13894514f5e3Sopenharmony_ci{ 13904514f5e3Sopenharmony_ci ASSERT(IsSMemSpace(type)); 13914514f5e3Sopenharmony_ci EcmaString *s = *string; 13924514f5e3Sopenharmony_ci uint32_t startIndex = 0; 13934514f5e3Sopenharmony_ci if (s->IsLineOrConstantString()) { 13944514f5e3Sopenharmony_ci return FlatStringInfo(s, startIndex, s->GetLength()); 13954514f5e3Sopenharmony_ci } 13964514f5e3Sopenharmony_ci if (string->IsTreeString()) { 13974514f5e3Sopenharmony_ci JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string); 13984514f5e3Sopenharmony_ci if (!tree->IsFlat()) { 13994514f5e3Sopenharmony_ci s = SlowFlatten(vm, string, type); 14004514f5e3Sopenharmony_ci } else { 14014514f5e3Sopenharmony_ci s = EcmaString::Cast(tree->GetFirst()); 14024514f5e3Sopenharmony_ci } 14034514f5e3Sopenharmony_ci } else if (string->IsSlicedString()) { 14044514f5e3Sopenharmony_ci s = EcmaString::Cast(SlicedString::Cast(*string)->GetParent()); 14054514f5e3Sopenharmony_ci startIndex = SlicedString::Cast(*string)->GetStartIndex(); 14064514f5e3Sopenharmony_ci } 14074514f5e3Sopenharmony_ci return FlatStringInfo(s, startIndex, string->GetLength()); 14084514f5e3Sopenharmony_ci} 14094514f5e3Sopenharmony_ci 14104514f5e3Sopenharmony_ciEcmaString *EcmaString::FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string) 14114514f5e3Sopenharmony_ci{ 14124514f5e3Sopenharmony_ci DISALLOW_GARBAGE_COLLECTION; 14134514f5e3Sopenharmony_ci if (string->IsLineOrConstantString()) { 14144514f5e3Sopenharmony_ci return string; 14154514f5e3Sopenharmony_ci } 14164514f5e3Sopenharmony_ci if (string->IsTreeString()) { 14174514f5e3Sopenharmony_ci TreeEcmaString *tree = TreeEcmaString::Cast(string); 14184514f5e3Sopenharmony_ci if (tree->IsFlat()) { 14194514f5e3Sopenharmony_ci string = EcmaString::Cast(tree->GetFirst()); 14204514f5e3Sopenharmony_ci } else { 14214514f5e3Sopenharmony_ci uint32_t length = tree->GetLength(); 14224514f5e3Sopenharmony_ci EcmaString *result = nullptr; 14234514f5e3Sopenharmony_ci if (tree->IsUtf8()) { 14244514f5e3Sopenharmony_ci result = CreateLineStringNoGC(vm, length, true); 14254514f5e3Sopenharmony_ci WriteToFlat<uint8_t>(tree, result->GetDataUtf8Writable(), length); 14264514f5e3Sopenharmony_ci } else { 14274514f5e3Sopenharmony_ci result = CreateLineStringNoGC(vm, length, false); 14284514f5e3Sopenharmony_ci WriteToFlat<uint16_t>(tree, result->GetDataUtf16Writable(), length); 14294514f5e3Sopenharmony_ci } 14304514f5e3Sopenharmony_ci tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result)); 14314514f5e3Sopenharmony_ci tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString())); 14324514f5e3Sopenharmony_ci return result; 14334514f5e3Sopenharmony_ci } 14344514f5e3Sopenharmony_ci } else if (string->IsSlicedString()) { 14354514f5e3Sopenharmony_ci SlicedString *str = SlicedString::Cast(string); 14364514f5e3Sopenharmony_ci uint32_t length = str->GetLength(); 14374514f5e3Sopenharmony_ci EcmaString *result = nullptr; 14384514f5e3Sopenharmony_ci if (str->IsUtf8()) { 14394514f5e3Sopenharmony_ci result = CreateLineStringNoGC(vm, length, true); 14404514f5e3Sopenharmony_ci WriteToFlat<uint8_t>(str, result->GetDataUtf8Writable(), length); 14414514f5e3Sopenharmony_ci } else { 14424514f5e3Sopenharmony_ci result = CreateLineStringNoGC(vm, length, false); 14434514f5e3Sopenharmony_ci WriteToFlat<uint16_t>(str, result->GetDataUtf16Writable(), length); 14444514f5e3Sopenharmony_ci } 14454514f5e3Sopenharmony_ci return result; 14464514f5e3Sopenharmony_ci } 14474514f5e3Sopenharmony_ci return string; 14484514f5e3Sopenharmony_ci} 14494514f5e3Sopenharmony_ci 14504514f5e3Sopenharmony_ciconst uint8_t *EcmaString::GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf) 14514514f5e3Sopenharmony_ci{ 14524514f5e3Sopenharmony_ci ASSERT(src->IsUtf8()); 14534514f5e3Sopenharmony_ci uint32_t length = src->GetLength(); 14544514f5e3Sopenharmony_ci EcmaString *string = const_cast<EcmaString *>(src); 14554514f5e3Sopenharmony_ci if (string->IsTreeString()) { 14564514f5e3Sopenharmony_ci if (string->IsFlat()) { 14574514f5e3Sopenharmony_ci string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst()); 14584514f5e3Sopenharmony_ci } else { 14594514f5e3Sopenharmony_ci buf.reserve(length); 14604514f5e3Sopenharmony_ci WriteToFlat(string, buf.data(), length); 14614514f5e3Sopenharmony_ci return buf.data(); 14624514f5e3Sopenharmony_ci } 14634514f5e3Sopenharmony_ci } else if (string->IsSlicedString()) { 14644514f5e3Sopenharmony_ci SlicedString *str = SlicedString::Cast(string); 14654514f5e3Sopenharmony_ci return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex(); 14664514f5e3Sopenharmony_ci } 14674514f5e3Sopenharmony_ci return string->GetDataUtf8(); 14684514f5e3Sopenharmony_ci} 14694514f5e3Sopenharmony_ci 14704514f5e3Sopenharmony_ciconst uint8_t *EcmaString::GetNonTreeUtf8Data(const EcmaString *src) 14714514f5e3Sopenharmony_ci{ 14724514f5e3Sopenharmony_ci ASSERT(src->IsUtf8()); 14734514f5e3Sopenharmony_ci ASSERT(!src->IsTreeString()); 14744514f5e3Sopenharmony_ci EcmaString *string = const_cast<EcmaString *>(src); 14754514f5e3Sopenharmony_ci if (string->IsSlicedString()) { 14764514f5e3Sopenharmony_ci SlicedString *str = SlicedString::Cast(string); 14774514f5e3Sopenharmony_ci return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex(); 14784514f5e3Sopenharmony_ci } 14794514f5e3Sopenharmony_ci ASSERT(src->IsLineOrConstantString()); 14804514f5e3Sopenharmony_ci return string->GetDataUtf8(); 14814514f5e3Sopenharmony_ci} 14824514f5e3Sopenharmony_ci 14834514f5e3Sopenharmony_ciconst uint16_t *EcmaString::GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf) 14844514f5e3Sopenharmony_ci{ 14854514f5e3Sopenharmony_ci ASSERT(src->IsUtf16()); 14864514f5e3Sopenharmony_ci uint32_t length = src->GetLength(); 14874514f5e3Sopenharmony_ci EcmaString *string = const_cast<EcmaString *>(src); 14884514f5e3Sopenharmony_ci if (string->IsTreeString()) { 14894514f5e3Sopenharmony_ci if (string->IsFlat()) { 14904514f5e3Sopenharmony_ci string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst()); 14914514f5e3Sopenharmony_ci } else { 14924514f5e3Sopenharmony_ci buf.reserve(length); 14934514f5e3Sopenharmony_ci WriteToFlat(string, buf.data(), length); 14944514f5e3Sopenharmony_ci return buf.data(); 14954514f5e3Sopenharmony_ci } 14964514f5e3Sopenharmony_ci } else if (string->IsSlicedString()) { 14974514f5e3Sopenharmony_ci SlicedString *str = SlicedString::Cast(string); 14984514f5e3Sopenharmony_ci return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex(); 14994514f5e3Sopenharmony_ci } 15004514f5e3Sopenharmony_ci return string->GetDataUtf16(); 15014514f5e3Sopenharmony_ci} 15024514f5e3Sopenharmony_ci 15034514f5e3Sopenharmony_ciconst uint16_t *EcmaString::GetNonTreeUtf16Data(const EcmaString *src) 15044514f5e3Sopenharmony_ci{ 15054514f5e3Sopenharmony_ci ASSERT(src->IsUtf16()); 15064514f5e3Sopenharmony_ci ASSERT(!src->IsTreeString()); 15074514f5e3Sopenharmony_ci EcmaString *string = const_cast<EcmaString *>(src); 15084514f5e3Sopenharmony_ci if (string->IsSlicedString()) { 15094514f5e3Sopenharmony_ci SlicedString *str = SlicedString::Cast(string); 15104514f5e3Sopenharmony_ci return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex(); 15114514f5e3Sopenharmony_ci } 15124514f5e3Sopenharmony_ci ASSERT(src->IsLineOrConstantString()); 15134514f5e3Sopenharmony_ci return string->GetDataUtf16(); 15144514f5e3Sopenharmony_ci} 15154514f5e3Sopenharmony_ci 15164514f5e3Sopenharmony_cistd::u16string FlatStringInfo::ToU16String(uint32_t len) 15174514f5e3Sopenharmony_ci{ 15184514f5e3Sopenharmony_ci uint32_t length = len > 0 ? len : GetLength(); 15194514f5e3Sopenharmony_ci std::u16string result; 15204514f5e3Sopenharmony_ci if (IsUtf16()) { 15214514f5e3Sopenharmony_ci const uint16_t *data = this->GetDataUtf16(); 15224514f5e3Sopenharmony_ci result = base::StringHelper::Utf16ToU16String(data, length); 15234514f5e3Sopenharmony_ci } else { 15244514f5e3Sopenharmony_ci const uint8_t *data = this->GetDataUtf8(); 15254514f5e3Sopenharmony_ci result = base::StringHelper::Utf8ToU16String(data, length); 15264514f5e3Sopenharmony_ci } 15274514f5e3Sopenharmony_ci return result; 15284514f5e3Sopenharmony_ci} 15294514f5e3Sopenharmony_ci 15304514f5e3Sopenharmony_ciEcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj) 15314514f5e3Sopenharmony_ci{ 15324514f5e3Sopenharmony_ci ASSERT(obj != nullptr); 15334514f5e3Sopenharmony_ci string_ = EcmaString::Cast(obj); 15344514f5e3Sopenharmony_ci} 15354514f5e3Sopenharmony_ci 15364514f5e3Sopenharmony_ciEcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value) 15374514f5e3Sopenharmony_ci{ 15384514f5e3Sopenharmony_ci ASSERT(value.IsString()); 15394514f5e3Sopenharmony_ci string_ = EcmaString::Cast(value.GetTaggedObject()); 15404514f5e3Sopenharmony_ci} 15414514f5e3Sopenharmony_ci 15424514f5e3Sopenharmony_ciEcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle) 15434514f5e3Sopenharmony_ci : string_(*strHandle) 15444514f5e3Sopenharmony_ci{ 15454514f5e3Sopenharmony_ci} 15464514f5e3Sopenharmony_ci 15474514f5e3Sopenharmony_cistd::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage) 15484514f5e3Sopenharmony_ci{ 15494514f5e3Sopenharmony_ci if (string_ == nullptr) { 15504514f5e3Sopenharmony_ci return ""; 15514514f5e3Sopenharmony_ci } 15524514f5e3Sopenharmony_ci bool modify = (usage != StringConvertedUsage::PRINT); 15534514f5e3Sopenharmony_ci CVector<uint8_t> buf; 15544514f5e3Sopenharmony_ci Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify); 15554514f5e3Sopenharmony_ci std::string res; 15564514f5e3Sopenharmony_ci res.reserve(sp.size()); 15574514f5e3Sopenharmony_ci for (const auto &c : sp) { 15584514f5e3Sopenharmony_ci res.push_back(c); 15594514f5e3Sopenharmony_ci } 15604514f5e3Sopenharmony_ci return res; 15614514f5e3Sopenharmony_ci} 15624514f5e3Sopenharmony_ci 15634514f5e3Sopenharmony_ciCString EcmaStringAccessor::Utf8ConvertToString() 15644514f5e3Sopenharmony_ci{ 15654514f5e3Sopenharmony_ci if (string_ == nullptr) { 15664514f5e3Sopenharmony_ci return CString(""); 15674514f5e3Sopenharmony_ci } 15684514f5e3Sopenharmony_ci if (IsUtf8()) { 15694514f5e3Sopenharmony_ci std::string stdStr; 15704514f5e3Sopenharmony_ci if (IsLineString()) { 15714514f5e3Sopenharmony_ci return base::StringHelper::Utf8ToString(GetDataUtf8(), GetLength()).c_str(); 15724514f5e3Sopenharmony_ci } 15734514f5e3Sopenharmony_ci CVector<uint8_t> buf; 15744514f5e3Sopenharmony_ci const uint8_t *data = EcmaString::GetUtf8DataFlat(string_, buf); 15754514f5e3Sopenharmony_ci return base::StringHelper::Utf8ToString(data, GetLength()).c_str(); 15764514f5e3Sopenharmony_ci } else { 15774514f5e3Sopenharmony_ci return ToCString(); 15784514f5e3Sopenharmony_ci } 15794514f5e3Sopenharmony_ci} 15804514f5e3Sopenharmony_ci 15814514f5e3Sopenharmony_cistd::string EcmaStringAccessor::DebuggerToStdString(StringConvertedUsage usage) 15824514f5e3Sopenharmony_ci{ 15834514f5e3Sopenharmony_ci if (string_ == nullptr) { 15844514f5e3Sopenharmony_ci return ""; 15854514f5e3Sopenharmony_ci } 15864514f5e3Sopenharmony_ci 15874514f5e3Sopenharmony_ci bool modify = (usage != StringConvertedUsage::PRINT); 15884514f5e3Sopenharmony_ci CVector<uint8_t> buf; 15894514f5e3Sopenharmony_ci Span<const uint8_t> sp = string_->DebuggerToUtf8Span(buf, modify); 15904514f5e3Sopenharmony_ci std::string res; 15914514f5e3Sopenharmony_ci res.reserve(sp.size()); 15924514f5e3Sopenharmony_ci for (const auto &c : sp) { 15934514f5e3Sopenharmony_ci res.push_back(c); 15944514f5e3Sopenharmony_ci } 15954514f5e3Sopenharmony_ci return res; 15964514f5e3Sopenharmony_ci} 15974514f5e3Sopenharmony_ci 15984514f5e3Sopenharmony_ciCString EcmaStringAccessor::ToCString(StringConvertedUsage usage, bool cesu8) 15994514f5e3Sopenharmony_ci{ 16004514f5e3Sopenharmony_ci if (string_ == nullptr) { 16014514f5e3Sopenharmony_ci return ""; 16024514f5e3Sopenharmony_ci } 16034514f5e3Sopenharmony_ci bool modify = (usage != StringConvertedUsage::PRINT); 16044514f5e3Sopenharmony_ci CVector<uint8_t> buf; 16054514f5e3Sopenharmony_ci Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify, cesu8); 16064514f5e3Sopenharmony_ci CString res; 16074514f5e3Sopenharmony_ci res.reserve(sp.size()); 16084514f5e3Sopenharmony_ci for (const auto &c : sp) { 16094514f5e3Sopenharmony_ci res.push_back(c); 16104514f5e3Sopenharmony_ci } 16114514f5e3Sopenharmony_ci return res; 16124514f5e3Sopenharmony_ci} 16134514f5e3Sopenharmony_ci 16144514f5e3Sopenharmony_ci// static 16154514f5e3Sopenharmony_ciEcmaString *EcmaStringAccessor::CreateLineString(const EcmaVM *vm, size_t length, bool compressed) 16164514f5e3Sopenharmony_ci{ 16174514f5e3Sopenharmony_ci return EcmaString::CreateLineString(vm, length, compressed); 16184514f5e3Sopenharmony_ci} 16194514f5e3Sopenharmony_ci} // namespace panda::ecmascript 1620