1 /* 2 * Copyright (c) 2021 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ECMASCRIPT_STRING_H 17 #define ECMASCRIPT_STRING_H 18 19 #include <cstddef> 20 #include <cstdint> 21 #include <cstring> 22 23 #include "ecmascript/base/utf_helper.h" 24 #include "ecmascript/common.h" 25 #include "ecmascript/ecma_macros.h" 26 #include "ecmascript/js_hclass.h" 27 #include "ecmascript/js_tagged_value.h" 28 #include "ecmascript/mem/barriers.h" 29 #include "ecmascript/mem/space.h" 30 #include "ecmascript/mem/tagged_object.h" 31 #include "ecmascript/platform/ecma_string_hash_helper.h" 32 33 #include "libpandabase/macros.h" 34 #include "securec.h" 35 #include "unicode/locid.h" 36 37 namespace panda { 38 namespace test { 39 class EcmaStringEqualsTest; 40 } 41 namespace ecmascript { 42 template<typename T> 43 class JSHandle; 44 class JSPandaFile; 45 class EcmaVM; 46 class LineEcmaString; 47 class ConstantString; 48 class TreeEcmaString; 49 class SlicedString; 50 class FlatStringInfo; 51 52 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) 53 #define ECMA_STRING_CHECK_LENGTH_AND_TRHOW(vm, length) \ 54 if ((length) >= MAX_STRING_LENGTH) { \ 55 THROW_RANGE_ERROR_AND_RETURN((vm)->GetJSThread(), "Invalid string length", nullptr); \ 56 } 57 58 class EcmaString : public TaggedObject { 59 /* Mix Hash Code: -- { 0 | [31 bits raw hash code] } computed through string 60 \ { 1 | [31 bits integer numbers] } fastpath for string to number 61 */ 62 public: 63 CAST_CHECK(EcmaString, IsString); 64 65 static constexpr uint32_t IS_INTEGER_MASK = 1U << 31; 66 static constexpr uint32_t STRING_COMPRESSED_BIT = 0x1; 67 static constexpr uint32_t STRING_INTERN_BIT = 0x2; 68 static constexpr size_t MAX_STRING_LENGTH = 0x40000000U; // 30 bits for string length, 2 bits for special meaning 69 static constexpr uint32_t STRING_LENGTH_SHIFT_COUNT = 2U; 70 static constexpr uint32_t MAX_INTEGER_HASH_NUMBER = 0x3B9AC9FF; 71 static constexpr uint32_t MAX_CACHED_INTEGER_SIZE = 9; 72 73 static constexpr size_t MIX_LENGTH_OFFSET = TaggedObjectSize(); 74 // In last bit of mix_length we store if this string is compressed or not. 75 ACCESSORS_PRIMITIVE_FIELD(MixLength, uint32_t, MIX_LENGTH_OFFSET, MIX_HASHCODE_OFFSET) 76 // In last bit of mix_hash we store if this string is small-integer number or not. 77 ACCESSORS_PRIMITIVE_FIELD(MixHashcode, uint32_t, MIX_HASHCODE_OFFSET, SIZE) 78 79 enum CompressedStatus { 80 STRING_COMPRESSED, 81 STRING_UNCOMPRESSED, 82 }; 83 84 enum IsIntegerStatus { 85 NOT_INTEGER = 0, 86 IS_INTEGER, 87 }; 88 89 enum TrimMode : uint8_t { 90 TRIM, 91 TRIM_START, 92 TRIM_END, 93 }; 94 95 enum ConcatOptStatus { 96 BEGIN_STRING_ADD = 1, 97 IN_STRING_ADD, 98 CONFIRMED_IN_STRING_ADD, 99 END_STRING_ADD, 100 INVALID_STRING_ADD, 101 HAS_BACKING_STORE, 102 }; 103 104 private: 105 friend class EcmaStringAccessor; 106 friend class LineEcmaString; 107 friend class ConstantString; 108 friend class TreeEcmaString; 109 friend class SlicedString; 110 friend class FlatStringInfo; 111 friend class NameDictionary; 112 friend class panda::test::EcmaStringEqualsTest; 113 114 static EcmaString *CreateEmptyString(const EcmaVM *vm); 115 static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 116 bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false, 117 uint32_t idOffset = 0); 118 static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 119 uint32_t offset, uint32_t utf8Len, MemSpaceType type = MemSpaceType::SEMI_SPACE); 120 static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 121 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 122 static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len, 123 bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 124 static SlicedString *CreateSlicedString(const EcmaVM *vm, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 125 static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed); 126 static EcmaString *CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed); 127 static EcmaString *CreateLineStringWithSpaceType(const EcmaVM *vm, 128 size_t length, bool compressed, MemSpaceType type); 129 static EcmaString *CreateTreeString(const EcmaVM *vm, 130 const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed); 131 static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, 132 size_t length, bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0); 133 static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &left, 134 const JSHandle<EcmaString> &right, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 135 template<typename T1, typename T2> 136 static uint32_t CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst, 137 const T2 *dataSecond, size_t sizeSecond); 138 static uint32_t CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString, 139 const JSHandle<EcmaString> &secondString); 140 static uint32_t CalculateConcatHashCode(const JSHandle<EcmaString> &firstString, 141 const JSHandle<EcmaString> &secondString); 142 static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, 143 uint32_t length, bool compressed); 144 static EcmaString *FastSubString(const EcmaVM *vm, 145 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 146 static EcmaString *GetSlicedString(const EcmaVM *vm, 147 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 148 static EcmaString *GetSubString(const EcmaVM *vm, 149 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 150 // require src is LineString 151 // not change src data structure 152 static inline EcmaString *FastSubUtf8String(const EcmaVM *vm, 153 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 154 // require src is LineString 155 // not change src data structure 156 static inline EcmaString *FastSubUtf16String(const EcmaVM *vm, 157 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length); 158 inline void TrimLineString(const JSThread *thread, uint32_t newLength); IsUtf8() const159 inline bool IsUtf8() const 160 { 161 return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_COMPRESSED; 162 } 163 IsUtf16() const164 inline bool IsUtf16() const 165 { 166 return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_UNCOMPRESSED; 167 } 168 IsInteger()169 inline bool IsInteger() 170 { 171 return (GetHashcode() & IS_INTEGER_MASK) == IS_INTEGER_MASK; 172 } 173 174 // require is LineString 175 inline uint16_t *GetData() const; 176 inline const uint8_t *GetDataUtf8() const; 177 inline const uint16_t *GetDataUtf16() const; 178 179 // require is LineString 180 inline uint8_t *GetDataUtf8Writable(); 181 inline uint16_t *GetDataUtf16Writable(); 182 GetLength() const183 inline uint32_t GetLength() const 184 { 185 return GetMixLength() >> STRING_LENGTH_SHIFT_COUNT; 186 } 187 SetLength(uint32_t length, bool compressed = false)188 inline void SetLength(uint32_t length, bool compressed = false) 189 { 190 ASSERT(length < MAX_STRING_LENGTH); 191 // Use 0u for compressed/utf8 expression 192 SetMixLength((length << STRING_LENGTH_SHIFT_COUNT) | (compressed ? STRING_COMPRESSED : STRING_UNCOMPRESSED)); 193 } 194 195 inline uint32_t GetRawHashcode() const 196 { 197 return GetMixHashcode() & (~IS_INTEGER_MASK); 198 } 199 200 static inline uint32_t MixHashcode(uint32_t hashcode, bool isInteger) 201 { 202 return isInteger ? (hashcode | IS_INTEGER_MASK) : (hashcode & (~IS_INTEGER_MASK)); 203 } 204 205 inline void SetRawHashcode(uint32_t hashcode, bool isInteger = false) 206 { 207 // Use 0u for not integer string's expression 208 SetMixHashcode(MixHashcode(hashcode, isInteger)); 209 } 210 211 inline size_t GetUtf8Length(bool modify = true, bool isGetBufferSize = false) const; 212 213 inline void SetIsInternString() 214 { 215 SetMixLength(GetMixLength() | STRING_INTERN_BIT); 216 } 217 218 inline bool IsInternString() const 219 { 220 return (GetMixLength() & STRING_INTERN_BIT) != 0; 221 } 222 223 inline void ClearInternStringFlag() 224 { 225 SetMixLength(GetMixLength() & ~STRING_INTERN_BIT); 226 } 227 228 inline bool TryGetHashCode(uint32_t *hash) 229 { 230 uint32_t hashcode = GetMixHashcode(); 231 if (hashcode == 0 && GetLength() != 0) { 232 return false; 233 } 234 *hash = hashcode; 235 return true; 236 } 237 238 inline uint32_t GetIntegerCode() 239 { 240 ASSERT(GetMixHashcode() & IS_INTEGER_MASK); 241 return GetRawHashcode(); 242 } 243 244 // not change this data structure. 245 // if string is not flat, this func has low efficiency. 246 uint32_t PUBLIC_API GetHashcode() 247 { 248 uint32_t hashcode = GetMixHashcode(); 249 // GetLength() == 0 means it's an empty array.No need to computeHashCode again when hashseed is 0. 250 if (hashcode == 0 && GetLength() != 0) { 251 hashcode = ComputeHashcode(); 252 SetMixHashcode(hashcode); 253 } 254 return hashcode; 255 } 256 257 template<typename T> 258 inline static bool IsDecimalDigitChar(const T c) 259 { 260 return (c >= '0' && c <= '9'); 261 } 262 263 static uint32_t ComputeIntegerHash(uint32_t *num, uint8_t c) 264 { 265 if (!IsDecimalDigitChar(c)) { 266 return false; 267 } 268 int charDate = c - '0'; 269 *num = (*num) * 10 + charDate; // 10: decimal factor 270 return true; 271 } 272 273 bool HashIntegerString(uint32_t length, uint32_t *hash, uint32_t hashSeed) const; 274 275 template<typename T> 276 static bool HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed) 277 { 278 ASSERT(size >= 0); 279 if (hashSeed == 0) { 280 if (IsDecimalDigitChar(data[0]) && data[0] != '0') { 281 uint32_t num = data[0] - '0'; 282 uint32_t i = 1; 283 do { 284 if (i == size) { 285 // compute mix hash 286 if (num <= MAX_INTEGER_HASH_NUMBER) { 287 *hash = MixHashcode(num, IS_INTEGER); 288 return true; 289 } 290 return false; 291 } 292 } while (ComputeIntegerHash(&num, data[i++])); 293 } 294 if (size == 1 && (data[0] == '0')) { 295 *hash = MixHashcode(0, IS_INTEGER); 296 return true; 297 } 298 } else { 299 if (IsDecimalDigitChar(data[0])) { 300 uint32_t num = hashSeed * 10 + (data[0] - '0'); // 10: decimal factor 301 uint32_t i = 1; 302 do { 303 if (i == size) { 304 // compute mix hash 305 if (num <= MAX_INTEGER_HASH_NUMBER) { 306 *hash = MixHashcode(num, IS_INTEGER); 307 return true; 308 } 309 return false; 310 } 311 } while (ComputeIntegerHash(&num, data[i++])); 312 } 313 } 314 return false; 315 } 316 317 // not change this data structure. 318 // if string is not flat, this func has low efficiency. 319 uint32_t PUBLIC_API ComputeHashcode() const; 320 std::pair<uint32_t, bool> PUBLIC_API ComputeRawHashcode() const; 321 uint32_t PUBLIC_API ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const; 322 323 static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress); 324 static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length); 325 326 template<bool verify = true> 327 uint16_t At(int32_t index) const; 328 329 // require is LineString 330 void WriteData(uint32_t index, uint16_t src); 331 332 // can change left and right data structure 333 static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right); 334 335 static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left, 336 const JSHandle<EcmaString>& right, uint32_t offset); 337 338 // Check that two spans are equal. Should have the same length. 339 /* static */ 340 template<typename T, typename T1> 341 static bool StringsAreEquals(Span<const T> &str1, Span<const T1> &str2) 342 { 343 ASSERT(str1.Size() <= str2.Size()); 344 size_t size = str1.Size(); 345 if (!std::is_same_v<T, T1>) { 346 for (size_t i = 0; i < size; i++) { 347 auto left = static_cast<uint16_t>(str1[i]); 348 auto right = static_cast<uint16_t>(str2[i]); 349 if (left != right) { 350 return false; 351 } 352 } 353 return true; 354 } 355 356 return !memcmp(str1.data(), str2.data(), size * sizeof(T)); 357 } 358 359 // Converts utf8Data to utf16 and compare it with given utf16_data. 360 static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data, 361 uint32_t utf16Len); 362 // Compares string1 + string2 by bytes, It doesn't check canonical unicode equivalence. 363 bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2); 364 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 365 static PUBLIC_API bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, 366 const JSHandle<EcmaString> &str2); 367 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 368 static PUBLIC_API bool StringsAreEqual(EcmaString *str1, EcmaString *str2); 369 // Two strings have the same type of utf encoding format. 370 static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2); 371 static bool StringsAreEqualDiffUtfEncoding(const FlatStringInfo &str1, const FlatStringInfo &str2); 372 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 373 // not change str1 data structure. 374 // if str1 is not flat, this func has low efficiency. 375 static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, 376 bool canBeCompress); 377 // Compares strings by bytes, It doesn't check canonical unicode equivalence. 378 // not change str1 data structure. 379 // if str1 is not flat, this func has low efficiency. 380 static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len); 381 382 // can change receiver and search data structure 383 static int32_t IndexOf(const EcmaVM *vm, 384 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0); 385 386 // can change receiver and search data structure 387 static int32_t LastIndexOf(const EcmaVM *vm, 388 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0); 389 390 inline size_t CopyDataUtf8(uint8_t *buf, size_t maxLength, bool modify = true) const 391 { 392 if (maxLength == 0) { 393 return 1; // maxLength was -1 at napi 394 } 395 size_t length = GetLength(); 396 if (length > maxLength) { 397 return 0; 398 } 399 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 400 buf[maxLength - 1] = '\0'; 401 // Put comparison here so that internal usage and napi can use the same CopyDataRegionUtf8 402 return CopyDataRegionUtf8(buf, 0, length, maxLength, modify) + 1; // add place for zero in the end 403 } 404 405 // It allows user to copy into buffer even if maxLength < length 406 inline size_t WriteUtf8(uint8_t *buf, size_t maxLength, bool isWriteBuffer = false) const 407 { 408 if (maxLength == 0) { 409 return 1; // maxLength was -1 at napi 410 } 411 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 412 buf[maxLength - 1] = '\0'; 413 return CopyDataRegionUtf8(buf, 0, GetLength(), maxLength, true, isWriteBuffer) + 1; 414 } 415 416 size_t CopyDataToUtf16(uint16_t *buf, uint32_t length, uint32_t bufLength) const 417 { 418 if (IsUtf16()) { 419 CVector<uint16_t> tmpBuf; 420 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 421 if (length > bufLength) { 422 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, bufLength * sizeof(uint16_t)) != EOK) { 423 LOG_FULL(FATAL) << "memcpy_s failed when length > bufLength"; 424 UNREACHABLE(); 425 } 426 return bufLength; 427 } 428 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { 429 LOG_FULL(FATAL) << "memcpy_s failed"; 430 UNREACHABLE(); 431 } 432 return length; 433 } 434 CVector<uint8_t> tmpBuf; 435 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf); 436 if (length > bufLength) { 437 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, bufLength, bufLength); 438 } 439 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, bufLength); 440 } 441 442 // It allows user to copy into buffer even if maxLength < length 443 inline size_t WriteUtf16(uint16_t *buf, uint32_t targetLength, uint32_t bufLength) const 444 { 445 if (bufLength == 0) { 446 return 0; 447 } 448 // Returns a number representing a valid backrest length. 449 return CopyDataToUtf16(buf, targetLength, bufLength); 450 } 451 452 size_t WriteOneByte(uint8_t *buf, size_t maxLength) const 453 { 454 if (maxLength == 0) { 455 return 0; 456 } 457 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 458 buf[maxLength - 1] = '\0'; 459 uint32_t length = GetLength(); 460 if (!IsUtf16()) { 461 CVector<uint8_t> tmpBuf; 462 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf); 463 if (length > maxLength) { 464 length = maxLength; 465 } 466 if (memcpy_s(buf, maxLength, data, length) != EOK) { 467 LOG_FULL(FATAL) << "memcpy_s failed when write one byte"; 468 UNREACHABLE(); 469 } 470 return length; 471 } 472 473 CVector<uint16_t> tmpBuf; 474 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 475 if (length > maxLength) { 476 return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, maxLength, maxLength); 477 } 478 return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, length, maxLength); 479 } 480 481 size_t CopyDataRegionUtf8(uint8_t *buf, size_t start, size_t length, size_t maxLength, 482 bool modify = true, bool isWriteBuffer = false) const 483 { 484 uint32_t len = GetLength(); 485 if (start + length > len) { 486 return 0; 487 } 488 if (!IsUtf16()) { 489 if (length > std::numeric_limits<size_t>::max() / 2 - 1) { // 2: half 490 LOG_FULL(FATAL) << " length is higher than half of size_t::max"; 491 UNREACHABLE(); 492 } 493 CVector<uint8_t> tmpBuf; 494 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf) + start; 495 // Only copy maxLength number of chars into buffer if length > maxLength 496 auto dataLen = std::min(length, maxLength); 497 std::copy(data, data + dataLen, buf); 498 return dataLen; 499 } 500 CVector<uint16_t> tmpBuf; 501 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 502 if (length > maxLength) { 503 return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, maxLength, maxLength, start, 504 modify, isWriteBuffer); 505 } 506 return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start, 507 modify, isWriteBuffer); 508 } 509 510 inline uint32_t CopyDataUtf16(uint16_t *buf, uint32_t maxLength) const 511 { 512 uint32_t length = GetLength(); 513 if (length > maxLength) { 514 return 0; 515 } 516 if (IsUtf16()) { 517 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 518 CVector<uint16_t> tmpBuf; 519 const uint16_t *data = GetUtf16DataFlat(this, tmpBuf); 520 if (memcpy_s(buf, maxLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) { 521 LOG_FULL(FATAL) << "memcpy_s failed"; 522 UNREACHABLE(); 523 } 524 return length; 525 } 526 CVector<uint8_t> tmpBuf; 527 const uint8_t *data = GetUtf8DataFlat(this, tmpBuf); 528 return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, maxLength); 529 } 530 531 std::u16string ToU16String(uint32_t len = 0); 532 533 std::unique_ptr<uint8_t[]> ToOneByteDataForced() 534 { 535 uint8_t *buf = nullptr; 536 auto length = GetLength(); 537 if (IsUtf16()) { 538 auto size = length * sizeof(uint16_t); 539 buf = new uint8_t[size](); 540 CopyDataUtf16(reinterpret_cast<uint16_t *>(buf), length); 541 } else { 542 buf = new uint8_t[length + 1](); 543 CopyDataUtf8(buf, length + 1); 544 } 545 return std::unique_ptr<uint8_t[]>(buf); 546 } 547 548 Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true, bool cesu8 = false) 549 { 550 Span<const uint8_t> str; 551 uint32_t strLen = GetLength(); 552 if (UNLIKELY(IsUtf16())) { 553 CVector<uint16_t> tmpBuf; 554 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 555 ASSERT(base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) > 0); 556 size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) - 1; 557 buf.reserve(len); 558 len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify, false, cesu8); 559 str = Span<const uint8_t>(buf.data(), len); 560 } else { 561 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 562 str = Span<const uint8_t>(data, strLen); 563 } 564 return str; 565 } 566 567 Span<const uint8_t> DebuggerToUtf8Span(CVector<uint8_t> &buf, bool modify = true) 568 { 569 Span<const uint8_t> str; 570 uint32_t strLen = GetLength(); 571 if (UNLIKELY(IsUtf16())) { 572 CVector<uint16_t> tmpBuf; 573 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf); 574 size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1; 575 buf.reserve(len); 576 len = base::utf_helper::DebuggerConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify); 577 str = Span<const uint8_t>(buf.data(), len); 578 } else { 579 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf); 580 str = Span<const uint8_t>(data, strLen); 581 } 582 return str; 583 } 584 585 inline Span<const uint8_t> FastToUtf8Span() const; 586 587 bool TryToGetInteger(uint32_t *result) 588 { 589 if (!IsInteger()) { 590 return false; 591 } 592 ASSERT(GetLength() <= MAX_CACHED_INTEGER_SIZE); 593 *result = GetIntegerCode(); 594 return true; 595 } 596 597 // using integer number set into hash 598 inline bool TryToSetIntegerHash(int32_t num) 599 { 600 uint32_t hashcode = GetMixHashcode(); 601 if (hashcode == 0 && GetLength() != 0) { 602 SetRawHashcode(static_cast<uint32_t>(num), IS_INTEGER); 603 return true; 604 } 605 return false; 606 } 607 608 void WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length); 609 610 static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len); 611 static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len); 612 static bool CanBeCompressed(const EcmaString *string); 613 614 bool PUBLIC_API ToElementIndex(uint32_t *index); 615 616 bool ToInt(int32_t *index, bool *negative); 617 618 bool ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data); 619 620 bool PUBLIC_API ToTypedArrayIndex(uint32_t *index); 621 622 template<bool isLower> 623 static EcmaString *ConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src); 624 625 template<bool isLower> 626 static EcmaString *LocaleConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 627 628 template<typename T> 629 static EcmaString *TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode); 630 631 static EcmaString *Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode = TrimMode::TRIM); 632 633 // single char copy for loop 634 template<typename DstType, typename SrcType> 635 static void CopyChars(DstType *dst, SrcType *src, uint32_t count) 636 { 637 Span<SrcType> srcSp(src, count); 638 Span<DstType> dstSp(dst, count); 639 for (uint32_t i = 0; i < count; i++) { 640 dstSp[i] = srcSp[i]; 641 } 642 } 643 644 // memory block copy 645 template<typename T> 646 static bool MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count); 647 648 // To change the hash algorithm of EcmaString, please modify EcmaString::CalculateConcatHashCode 649 // and EcmaStringHashHelper::ComputeHashForDataPlatform simultaneously!! 650 template <typename T> 651 static uint32_t ComputeHashForData(const T *data, size_t size, 652 uint32_t hashSeed) 653 { 654 if (size <= static_cast<size_t>(EcmaStringHash::MIN_SIZE_FOR_UNROLLING)) { 655 uint32_t hash = hashSeed; 656 for (uint32_t i = 0; i < size ; i++) { 657 hash = (hash << static_cast<uint32_t>(EcmaStringHash::HASH_SHIFT)) - hash + data[i]; 658 } 659 return hash; 660 } 661 return EcmaStringHashHelper::ComputeHashForDataPlatform(data, size, hashSeed); 662 } 663 664 static bool IsASCIICharacter(uint16_t data) 665 { 666 if (data == 0) { 667 return false; 668 } 669 // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000'] 670 return data <= base::utf_helper::UTF8_1B_MAX; 671 } 672 673 template<typename T1, typename T2> 674 static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max); 675 676 template<typename T1, typename T2> 677 static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos); 678 679 bool IsFlat() const; 680 681 bool IsLineString() const 682 { 683 return GetClass()->IsLineString(); 684 } 685 bool IsConstantString() const 686 { 687 return GetClass()->IsConstantString(); 688 } 689 bool IsSlicedString() const 690 { 691 return GetClass()->IsSlicedString(); 692 } 693 bool IsTreeString() const 694 { 695 return GetClass()->IsTreeString(); 696 } 697 bool NotTreeString() const 698 { 699 return !IsTreeString(); 700 } 701 bool IsLineOrConstantString() const 702 { 703 auto hclass = GetClass(); 704 return hclass->IsLineString() || hclass->IsConstantString(); 705 } 706 707 JSType GetStringType() const 708 { 709 JSType type = GetClass()->GetObjectType(); 710 ASSERT(type >= JSType::STRING_FIRST && type <= JSType::STRING_LAST); 711 return type; 712 } 713 714 template <typename Char> 715 static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength); 716 717 template <typename Char> 718 static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos); 719 720 static const uint8_t *PUBLIC_API GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf); 721 722 static const uint8_t *PUBLIC_API GetNonTreeUtf8Data(const EcmaString *src); 723 724 static const uint16_t *PUBLIC_API GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf); 725 726 static const uint16_t *PUBLIC_API GetNonTreeUtf16Data(const EcmaString *src); 727 728 // string must be not flat 729 static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type); 730 731 PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 732 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 733 734 static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 735 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE); 736 737 static EcmaString *FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string); 738 739 static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src); 740 741 static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src); 742 743 static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 744 745 static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale); 746 747 static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src); 748 749 static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src); 750 751 static EcmaString *ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, 752 bool toLower, uint32_t startIndex = 0); 753 }; 754 755 // The LineEcmaString abstract class captures sequential string values, only LineEcmaString can store chars data 756 class LineEcmaString : public EcmaString { 757 public: 758 static constexpr uint32_t MAX_LENGTH = (1 << 28) - 16; 759 static constexpr uint32_t INIT_LENGTH_TIMES = 4; 760 // DATA_OFFSET: the string data stored after the string header. 761 // Data can be stored in utf8 or utf16 form according to compressed bit. 762 static constexpr size_t DATA_OFFSET = EcmaString::SIZE; // DATA_OFFSET equal to Empty String size 763 764 CAST_CHECK(LineEcmaString, IsLineString); 765 766 DECL_VISIT_ARRAY(DATA_OFFSET, 0, GetPointerLength()); 767 768 static LineEcmaString *Cast(EcmaString *str) 769 { 770 return static_cast<LineEcmaString *>(str); 771 } 772 773 static LineEcmaString *Cast(const EcmaString *str) 774 { 775 return LineEcmaString::Cast(const_cast<EcmaString *>(str)); 776 } 777 778 static size_t ComputeSizeUtf8(uint32_t utf8Len) 779 { 780 return DATA_OFFSET + utf8Len; 781 } 782 783 static size_t ComputeSizeUtf16(uint32_t utf16Len) 784 { 785 return DATA_OFFSET + utf16Len * sizeof(uint16_t); 786 } 787 788 static size_t ObjectSize(EcmaString *str) 789 { 790 uint32_t length = str->GetLength(); 791 return str->IsUtf16() ? ComputeSizeUtf16(length) : ComputeSizeUtf8(length); 792 } 793 794 static size_t DataSize(EcmaString *str) 795 { 796 uint32_t length = str->GetLength(); 797 return str->IsUtf16() ? length * sizeof(uint16_t) : length; 798 } 799 800 size_t GetPointerLength() 801 { 802 size_t byteSize = DataSize(this); 803 return AlignUp(byteSize, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT)) / sizeof(JSTaggedType); 804 } 805 806 uint16_t *GetData() const 807 { 808 return reinterpret_cast<uint16_t *>(ToUintPtr(this) + DATA_OFFSET); 809 } 810 811 template<bool verify = true> 812 uint16_t Get(int32_t index) const 813 { 814 int32_t length = static_cast<int32_t>(GetLength()); 815 if (verify) { 816 if ((index < 0) || (index >= length)) { 817 return 0; 818 } 819 } 820 if (!IsUtf16()) { 821 Span<const uint8_t> sp(GetDataUtf8(), length); 822 return sp[index]; 823 } 824 Span<const uint16_t> sp(GetDataUtf16(), length); 825 return sp[index]; 826 } 827 828 void Set(uint32_t index, uint16_t src) 829 { 830 ASSERT(index < GetLength()); 831 if (IsUtf8()) { 832 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 833 *(reinterpret_cast<uint8_t *>(GetData()) + index) = static_cast<uint8_t>(src); 834 } else { 835 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 836 *(GetData() + index) = src; 837 } 838 } 839 }; 840 static_assert((LineEcmaString::DATA_OFFSET % static_cast<uint8_t>(MemAlignment::MEM_ALIGN_OBJECT)) == 0); 841 842 class ConstantString : public EcmaString { 843 public: 844 static constexpr size_t RELOCTAED_DATA_OFFSET = EcmaString::SIZE; 845 // ConstantData is the pointer of const string in the pandafile. 846 // String in pandafile is encoded by the utf8 format. 847 // EntityId is normally the uint32_t index in the pandafile. 848 // When the pandafile is to be removed, EntityId will become -1. 849 // The real string data will be reloacted into bytearray and stored in RelocatedData. 850 // ConstantData will also point at data of bytearray data. 851 ACCESSORS(RelocatedData, RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET); 852 ACCESSORS_PRIMITIVE_FIELD(EntityId, int64_t, ENTITY_ID_OFFSET, CONSTANT_DATA_OFFSET); 853 ACCESSORS_NATIVE_FIELD(ConstantData, uint8_t, CONSTANT_DATA_OFFSET, LAST_OFFSET); 854 DEFINE_ALIGN_SIZE(LAST_OFFSET); 855 856 CAST_CHECK(ConstantString, IsConstantString); 857 DECL_VISIT_OBJECT(RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET); 858 859 static ConstantString *Cast(EcmaString *str) 860 { 861 return static_cast<ConstantString *>(str); 862 } 863 864 static ConstantString *Cast(const EcmaString *str) 865 { 866 return ConstantString::Cast(const_cast<EcmaString *>(str)); 867 } 868 869 static size_t ObjectSize() 870 { 871 return ConstantString::SIZE; 872 } 873 874 uint32_t GetEntityIdU32() const 875 { 876 ASSERT(GetEntityId() >= 0); 877 return static_cast<uint32_t>(GetEntityId()); 878 } 879 880 template<bool verify = true> 881 uint16_t Get(int32_t index) const 882 { 883 int32_t length = static_cast<int32_t>(GetLength()); 884 if (verify) { 885 if ((index < 0) || (index >= length)) { 886 return 0; 887 } 888 } 889 ASSERT(IsUtf8()); 890 Span<const uint8_t> sp(GetConstantData(), length); 891 return sp[index]; 892 } 893 }; 894 895 // The substrings of another string use SlicedString to describe. 896 class SlicedString : public EcmaString { 897 public: 898 static constexpr uint32_t MIN_SLICED_ECMASTRING_LENGTH = 13; 899 static constexpr size_t PARENT_OFFSET = EcmaString::SIZE; 900 ACCESSORS(Parent, PARENT_OFFSET, STARTINDEX_OFFSET); 901 ACCESSORS_PRIMITIVE_FIELD(StartIndex, uint32_t, STARTINDEX_OFFSET, BACKING_STORE_FLAG); 902 ACCESSORS_PRIMITIVE_FIELD(HasBackingStore, uint32_t, BACKING_STORE_FLAG, SIZE); 903 904 DECL_VISIT_OBJECT(PARENT_OFFSET, STARTINDEX_OFFSET); 905 906 CAST_CHECK(SlicedString, IsSlicedString); 907 private: 908 friend class EcmaString; 909 static SlicedString *Cast(EcmaString *str) 910 { 911 return static_cast<SlicedString *>(str); 912 } 913 914 static SlicedString *Cast(const EcmaString *str) 915 { 916 return SlicedString::Cast(const_cast<EcmaString *>(str)); 917 } 918 919 static size_t ObjectSize() 920 { 921 return SlicedString::SIZE; 922 } 923 924 // Minimum length for a sliced string 925 template<bool verify = true> 926 uint16_t Get(int32_t index) const 927 { 928 int32_t length = static_cast<int32_t>(GetLength()); 929 if (verify) { 930 if ((index < 0) || (index >= length)) { 931 return 0; 932 } 933 } 934 EcmaString *parent = EcmaString::Cast(GetParent()); 935 if (parent->IsLineString()) { 936 if (parent->IsUtf8()) { 937 Span<const uint8_t> sp(parent->GetDataUtf8() + GetStartIndex(), length); 938 return sp[index]; 939 } 940 Span<const uint16_t> sp(parent->GetDataUtf16() + GetStartIndex(), length); 941 return sp[index]; 942 } 943 Span<const uint8_t> sp(ConstantString::Cast(parent)->GetConstantData() + GetStartIndex(), length); 944 return sp[index]; 945 } 946 }; 947 948 class TreeEcmaString : public EcmaString { 949 public: 950 // Minimum length for a tree string 951 static constexpr uint32_t MIN_TREE_ECMASTRING_LENGTH = 13; 952 953 static constexpr size_t FIRST_OFFSET = EcmaString::SIZE; 954 ACCESSORS(First, FIRST_OFFSET, SECOND_OFFSET); 955 ACCESSORS(Second, SECOND_OFFSET, SIZE); 956 957 DECL_VISIT_OBJECT(FIRST_OFFSET, SIZE); 958 959 CAST_CHECK(TreeEcmaString, IsTreeString); 960 Cast(EcmaString *str)961 static TreeEcmaString *Cast(EcmaString *str) 962 { 963 return static_cast<TreeEcmaString *>(str); 964 } 965 Cast(const EcmaString *str)966 static TreeEcmaString *Cast(const EcmaString *str) 967 { 968 return TreeEcmaString::Cast(const_cast<EcmaString *>(str)); 969 } 970 IsFlat() const971 bool IsFlat() const 972 { 973 auto strSecond = EcmaString::Cast(GetSecond()); 974 return strSecond->GetLength() == 0; 975 } 976 977 template<bool verify = true> Get(int32_t index) const978 uint16_t Get(int32_t index) const 979 { 980 int32_t length = static_cast<int32_t>(GetLength()); 981 if (verify) { 982 if ((index < 0) || (index >= length)) { 983 return 0; 984 } 985 } 986 987 if (IsFlat()) { 988 EcmaString *first = EcmaString::Cast(GetFirst()); 989 return first->At<verify>(index); 990 } 991 EcmaString *string = const_cast<TreeEcmaString *>(this); 992 while (true) { 993 if (string->IsTreeString()) { 994 EcmaString *first = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst()); 995 if (static_cast<int32_t>(first->GetLength()) > index) { 996 string = first; 997 } else { 998 index -= static_cast<int32_t>(first->GetLength()); 999 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetSecond()); 1000 } 1001 } else { 1002 return string->At<verify>(index); 1003 } 1004 } 1005 UNREACHABLE(); 1006 } 1007 }; 1008 1009 class FlatStringInfo { 1010 public: FlatStringInfo(EcmaString *string, uint32_t startIndex, uint32_t length)1011 FlatStringInfo(EcmaString *string, uint32_t startIndex, uint32_t length) : string_(string), 1012 startIndex_(startIndex), 1013 length_(length) {} IsUtf8() const1014 bool IsUtf8() const 1015 { 1016 return string_->IsUtf8(); 1017 } 1018 IsUtf16() const1019 bool IsUtf16() const 1020 { 1021 return string_->IsUtf16(); 1022 } 1023 GetString() const1024 EcmaString *GetString() const 1025 { 1026 return string_; 1027 } 1028 SetString(EcmaString *string)1029 void SetString(EcmaString *string) 1030 { 1031 string_ = string; 1032 } 1033 GetStartIndex() const1034 uint32_t GetStartIndex() const 1035 { 1036 return startIndex_; 1037 } 1038 SetStartIndex(uint32_t index)1039 void SetStartIndex(uint32_t index) 1040 { 1041 startIndex_ = index; 1042 } 1043 GetLength() const1044 uint32_t GetLength() const 1045 { 1046 return length_; 1047 } 1048 1049 const uint8_t *GetDataUtf8() const; 1050 const uint16_t *GetDataUtf16() const; 1051 uint8_t *GetDataUtf8Writable() const; 1052 uint16_t *GetDataUtf16Writable() const; 1053 std::u16string ToU16String(uint32_t len = 0); 1054 private: 1055 EcmaString *string_ {nullptr}; 1056 uint32_t startIndex_ {0}; 1057 uint32_t length_ {0}; 1058 }; 1059 1060 // if you want to use functions of EcmaString, please not use directly, 1061 // and use functions of EcmaStringAccessor alternatively. 1062 // eg: EcmaString *str = ***; str->GetLength() -----> EcmaStringAccessor(str).GetLength() 1063 class PUBLIC_API EcmaStringAccessor { 1064 public: EcmaStringAccessor(EcmaString *string)1065 explicit inline EcmaStringAccessor(EcmaString *string) 1066 { 1067 ASSERT(string != nullptr); 1068 string_ = string; 1069 } 1070 1071 explicit EcmaStringAccessor(TaggedObject *obj); 1072 1073 explicit EcmaStringAccessor(JSTaggedValue value); 1074 1075 explicit EcmaStringAccessor(const JSHandle<EcmaString> &strHandle); 1076 CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString, const JSHandle<EcmaString> &secondString)1077 static uint32_t CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString, 1078 const JSHandle<EcmaString> &secondString) 1079 { 1080 return EcmaString::CalculateAllConcatHashCode(firstString, secondString); 1081 } 1082 1083 static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed); 1084 CreateEmptyString(const EcmaVM *vm)1085 static EcmaString *CreateEmptyString(const EcmaVM *vm) 1086 { 1087 return EcmaString::CreateEmptyString(vm); 1088 } 1089 CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false, uint32_t idOffset = 0)1090 static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress, 1091 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false, 1092 uint32_t idOffset = 0) 1093 { 1094 return EcmaString::CreateFromUtf8(vm, utf8Data, utf8Len, canBeCompress, type, isConstantString, idOffset); 1095 } 1096 CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string, uint32_t offset, uint32_t utf8Len, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)1097 static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1098 uint32_t offset, uint32_t utf8Len, 1099 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1100 { 1101 return EcmaString::CreateFromUtf8CompressedSubString(vm, string, offset, utf8Len, type); 1102 } 1103 CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, size_t length, bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0)1104 static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, size_t length, 1105 bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0) 1106 { 1107 return EcmaString::CreateConstantString(vm, utf8Data, length, compressed, type, idOffset); 1108 } 1109 CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)1110 static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, 1111 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1112 { 1113 return EcmaString::CreateUtf16StringFromUtf8(vm, utf8Data, utf8Len, type); 1114 } 1115 CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len, bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)1116 static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len, 1117 bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1118 { 1119 return EcmaString::CreateFromUtf16(vm, utf16Data, utf16Len, canBeCompress, type); 1120 } 1121 Concat(const EcmaVM *vm, const JSHandle<EcmaString> &str1Handle, const JSHandle<EcmaString> &str2Handle, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)1122 static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &str1Handle, 1123 const JSHandle<EcmaString> &str2Handle, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1124 { 1125 return EcmaString::Concat(vm, str1Handle, str2Handle, type); 1126 } 1127 CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, uint32_t length, bool compressed)1128 static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, 1129 uint32_t length, bool compressed) 1130 { 1131 return EcmaString::CopyStringToOldSpace(vm, original, length, compressed); 1132 } 1133 1134 // can change src data structure FastSubString(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)1135 static EcmaString *FastSubString(const EcmaVM *vm, 1136 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1137 { 1138 return EcmaString::FastSubString(vm, src, start, length); 1139 } 1140 1141 // get GetSubString(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)1142 static EcmaString *GetSubString(const EcmaVM *vm, 1143 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length) 1144 { 1145 return EcmaString::GetSubString(vm, src, start, length); 1146 } 1147 IsUtf8() const1148 bool IsUtf8() const 1149 { 1150 return string_->IsUtf8(); 1151 } 1152 IsUtf16() const1153 bool IsUtf16() const 1154 { 1155 return string_->IsUtf16(); 1156 } 1157 GetLength() const1158 uint32_t GetLength() const 1159 { 1160 return string_->GetLength(); 1161 } 1162 1163 // require is LineString 1164 inline size_t GetUtf8Length(bool isGetBufferSize = false) const; 1165 ObjectSize() const1166 size_t ObjectSize() const 1167 { 1168 if (string_->IsLineString()) { 1169 return LineEcmaString::ObjectSize(string_); 1170 } if (string_->IsConstantString()) { 1171 return ConstantString::ObjectSize(); 1172 } else { 1173 return TreeEcmaString::SIZE; 1174 } 1175 } 1176 1177 // For TreeString, the calculation result is size of LineString correspondingly. GetFlatStringSize() const1178 size_t GetFlatStringSize() const 1179 { 1180 if (string_->IsConstantString()) { 1181 return ConstantString::ObjectSize(); 1182 } 1183 return LineEcmaString::ObjectSize(string_); 1184 } 1185 IsInternString() const1186 bool IsInternString() const 1187 { 1188 return string_->IsInternString(); 1189 } 1190 SetInternString()1191 void SetInternString() 1192 { 1193 string_->SetIsInternString(); 1194 } 1195 ClearInternString()1196 void ClearInternString() 1197 { 1198 string_->ClearInternStringFlag(); 1199 } 1200 1201 // require is LineString 1202 // It's Utf8 format, but without 0 in the end. 1203 inline const uint8_t *GetDataUtf8(); 1204 1205 // require is LineString 1206 inline const uint16_t *GetDataUtf16(); 1207 1208 // not change string data structure. 1209 // if string is not flat, this func has low efficiency. ToU16String(uint32_t len = 0)1210 std::u16string ToU16String(uint32_t len = 0) 1211 { 1212 return string_->ToU16String(len); 1213 } 1214 1215 // not change string data structure. 1216 // if string is not flat, this func has low efficiency. ToOneByteDataForced()1217 std::unique_ptr<uint8_t[]> ToOneByteDataForced() 1218 { 1219 return string_->ToOneByteDataForced(); 1220 } 1221 1222 // not change string data structure. 1223 // if string is not flat, this func has low efficiency. ToUtf8Span(CVector<uint8_t> &buf)1224 Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf) 1225 { 1226 return string_->ToUtf8Span(buf); 1227 } 1228 1229 // only for string is flat and using UTF8 encoding 1230 inline Span<const uint8_t> FastToUtf8Span(); 1231 1232 // Using string's hash to figure out whether the string can be converted to integer TryToGetInteger(uint32_t *result)1233 inline bool TryToGetInteger(uint32_t *result) 1234 { 1235 return string_->TryToGetInteger(result); 1236 } 1237 TryToSetIntegerHash(int32_t num)1238 inline bool TryToSetIntegerHash(int32_t num) 1239 { 1240 return string_->TryToSetIntegerHash(num); 1241 } 1242 1243 // not change string data structure. 1244 // if string is not flat, this func has low efficiency. 1245 std::string ToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT); 1246 1247 // this function convert for Utf8 1248 CString Utf8ConvertToString(); 1249 1250 std::string DebuggerToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT); 1251 // not change string data structure. 1252 // if string is not flat, this func has low efficiency. 1253 CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION, bool cesu8 = false); 1254 1255 // not change string data structure. 1256 // if string is not flat, this func has low efficiency. WriteToFlatUtf8(uint8_t *buf, uint32_t maxLength, bool isWriteBuffer = false)1257 uint32_t WriteToFlatUtf8(uint8_t *buf, uint32_t maxLength, bool isWriteBuffer = false) 1258 { 1259 return string_->WriteUtf8(buf, maxLength, isWriteBuffer); 1260 } 1261 WriteToUtf16(uint16_t *buf, uint32_t bufLength)1262 uint32_t WriteToUtf16(uint16_t *buf, uint32_t bufLength) 1263 { 1264 return string_->WriteUtf16(buf, GetLength(), bufLength); 1265 } 1266 WriteToOneByte(uint8_t *buf, uint32_t maxLength)1267 uint32_t WriteToOneByte(uint8_t *buf, uint32_t maxLength) 1268 { 1269 return string_->WriteOneByte(buf, maxLength); 1270 } 1271 1272 // not change string data structure. 1273 // if string is not flat, this func has low efficiency. WriteToFlatUtf16(uint16_t *buf, uint32_t maxLength) const1274 uint32_t WriteToFlatUtf16(uint16_t *buf, uint32_t maxLength) const 1275 { 1276 return string_->CopyDataUtf16(buf, maxLength); 1277 } 1278 1279 template <typename Char> WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos)1280 static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos) 1281 { 1282 src->WriteToFlatWithPos(src, buf, length, pos); 1283 } 1284 1285 template <typename Char> WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength)1286 static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength) 1287 { 1288 src->WriteToFlat(src, buf, maxLength); 1289 } 1290 1291 // require dst is LineString 1292 // not change src data structure. 1293 // if src is not flat, this func has low efficiency. 1294 inline static void ReadData(EcmaString * dst, EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length); 1295 1296 // not change src data structure. 1297 // if src is not flat, this func has low efficiency. 1298 template<bool verify = true> Get(uint32_t index) const1299 uint16_t Get(uint32_t index) const 1300 { 1301 return string_->At<verify>(index); 1302 } 1303 1304 // require string is LineString. Set(uint32_t index, uint16_t src)1305 void Set(uint32_t index, uint16_t src) 1306 { 1307 return string_->WriteData(index, src); 1308 } 1309 1310 // not change src data structure. 1311 // if src is not flat, this func has low efficiency. GetHashcode()1312 uint32_t GetHashcode() 1313 { 1314 return string_->GetHashcode(); 1315 } 1316 GetRawHashcode()1317 uint32_t GetRawHashcode() 1318 { 1319 return string_->GetRawHashcode(); 1320 } 1321 1322 // not change src data structure. 1323 // if src is not flat, this func has low efficiency. ComputeRawHashcode()1324 std::pair<uint32_t, bool> ComputeRawHashcode() 1325 { 1326 return string_->ComputeRawHashcode(); 1327 } 1328 ComputeHashcode()1329 uint32_t ComputeHashcode() 1330 { 1331 return string_->ComputeHashcode(); 1332 } 1333 ComputeHashcode(uint32_t rawHashSeed, bool isInteger)1334 uint32_t ComputeHashcode(uint32_t rawHashSeed, bool isInteger) 1335 { 1336 return string_->ComputeHashcode(rawHashSeed, isInteger); 1337 } 1338 ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)1339 static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress) 1340 { 1341 return EcmaString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress); 1342 } 1343 ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)1344 static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length) 1345 { 1346 return EcmaString::ComputeHashcodeUtf16(utf16Data, length); 1347 } 1348 1349 // can change receiver and search data structure IndexOf(const EcmaVM *vm, const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0)1350 static int32_t IndexOf(const EcmaVM *vm, 1351 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0) 1352 { 1353 return EcmaString::IndexOf(vm, receiver, search, pos); 1354 } 1355 1356 // can change receiver and search data structure LastIndexOf(const EcmaVM *vm, const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0)1357 static int32_t LastIndexOf(const EcmaVM *vm, 1358 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0) 1359 { 1360 return EcmaString::LastIndexOf(vm, receiver, search, pos); 1361 } 1362 1363 // can change receiver and search data structure Compare(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right)1364 static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right) 1365 { 1366 return EcmaString::Compare(vm, left, right); 1367 } 1368 1369 1370 // can change receiver and search data structure IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right, uint32_t offset = 0)1371 static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left, 1372 const JSHandle<EcmaString>& right, uint32_t offset = 0) 1373 { 1374 return EcmaString::IsSubStringAt(vm, left, right, offset); 1375 } 1376 1377 // can change str1 and str2 data structure StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)1378 static bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2) 1379 { 1380 return EcmaString::StringsAreEqual(vm, str1, str2); 1381 } 1382 1383 // not change str1 and str2 data structure. 1384 // if str1 or str2 is not flat, this func has low efficiency. StringsAreEqual(EcmaString *str1, EcmaString *str2)1385 static bool StringsAreEqual(EcmaString *str1, EcmaString *str2) 1386 { 1387 return EcmaString::StringsAreEqual(str1, str2); 1388 } 1389 1390 // not change str1 and str2 data structure. 1391 // if str1 or str2 is not flat, this func has low efficiency. StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2)1392 static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2) 1393 { 1394 return EcmaString::StringsAreEqualDiffUtfEncoding(str1, str2); 1395 } 1396 1397 // not change str1 data structure. 1398 // if str1 is not flat, this func has low efficiency. StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, bool canBeCompress)1399 static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, 1400 bool canBeCompress) 1401 { 1402 return EcmaString::StringIsEqualUint8Data(str1, dataAddr, dataLen, canBeCompress); 1403 } 1404 1405 // not change str1 data structure. 1406 // if str1 is not flat, this func has low efficiency. StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)1407 static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len) 1408 { 1409 return EcmaString::StringsAreEqualUtf16(str1, utf16Data, utf16Len); 1410 } 1411 1412 // require str1 and str2 are LineString. 1413 // not change string data structure. 1414 // if string is not flat, this func has low efficiency. EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)1415 bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2) 1416 { 1417 return string_->EqualToSplicedString(str1, str2); 1418 } 1419 CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)1420 static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len) 1421 { 1422 return EcmaString::CanBeCompressed(utf8Data, utf8Len); 1423 } 1424 CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)1425 static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len) 1426 { 1427 return EcmaString::CanBeCompressed(utf16Data, utf16Len); 1428 } 1429 1430 // require string is LineString CanBeCompressed(const EcmaString *string)1431 static bool CanBeCompressed(const EcmaString *string) 1432 { 1433 return EcmaString::CanBeCompressed(string); 1434 } 1435 1436 // not change string data structure. 1437 // if string is not flat, this func has low efficiency. ToElementIndex(uint32_t *index)1438 bool ToElementIndex(uint32_t *index) 1439 { 1440 return string_->ToElementIndex(index); 1441 } 1442 1443 // not change string data structure. 1444 // if string is not flat, this func has low efficiency. ToInt(int32_t *index, bool *negative)1445 bool ToInt(int32_t *index, bool *negative) 1446 { 1447 return string_->ToInt(index, negative); 1448 } 1449 1450 // not change string data structure. 1451 // if string is not flat, this func has low efficiency. ToTypedArrayIndex(uint32_t *index)1452 bool PUBLIC_API ToTypedArrayIndex(uint32_t *index) 1453 { 1454 return string_->ToTypedArrayIndex(index); 1455 } 1456 ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)1457 static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1458 { 1459 return EcmaString::ToLower(vm, src); 1460 } 1461 TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)1462 static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1463 { 1464 return EcmaString::TryToLower(vm, src); 1465 } 1466 TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)1467 static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1468 { 1469 return EcmaString::TryToUpper(vm, src); 1470 } 1471 ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)1472 static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src) 1473 { 1474 return EcmaString::ToUpper(vm, src); 1475 } 1476 ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)1477 static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale) 1478 { 1479 return EcmaString::ToLocaleLower(vm, src, locale); 1480 } 1481 ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)1482 static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale) 1483 { 1484 return EcmaString::ToLocaleUpper(vm, src, locale); 1485 } 1486 Trim(const JSThread *thread, const JSHandle<EcmaString> &src, EcmaString::TrimMode mode = EcmaString::TrimMode::TRIM)1487 static EcmaString *Trim(const JSThread *thread, 1488 const JSHandle<EcmaString> &src, EcmaString::TrimMode mode = EcmaString::TrimMode::TRIM) 1489 { 1490 return EcmaString::Trim(thread, src, mode); 1491 } 1492 IsASCIICharacter(uint16_t data)1493 static bool IsASCIICharacter(uint16_t data) 1494 { 1495 if (data == 0) { 1496 return false; 1497 } 1498 // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000'] 1499 return data <= base::utf_helper::UTF8_1B_MAX; 1500 } 1501 IsFlat() const1502 bool IsFlat() const 1503 { 1504 return string_->IsFlat(); 1505 } 1506 IsLineString() const1507 bool IsLineString() const 1508 { 1509 return string_->IsLineString(); 1510 } 1511 IsConstantString() const1512 bool IsConstantString() const 1513 { 1514 return string_->IsConstantString(); 1515 } 1516 IsSlicedString() const1517 bool IsSlicedString() const 1518 { 1519 return string_->IsSlicedString(); 1520 } 1521 IsLineOrConstantString() const1522 bool IsLineOrConstantString() const 1523 { 1524 return string_->IsLineOrConstantString(); 1525 } 1526 GetStringType() const1527 JSType GetStringType() const 1528 { 1529 return string_->GetStringType(); 1530 } 1531 IsTreeString() const1532 bool IsTreeString() const 1533 { 1534 return string_->IsTreeString(); 1535 } 1536 NotTreeString() const1537 bool NotTreeString() const 1538 { 1539 return string_->NotTreeString(); 1540 } 1541 1542 // the returned string may be a linestring, constantstring, or slicestring!! Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)1543 PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1544 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1545 { 1546 return EcmaString::Flatten(vm, string, type); 1547 } 1548 FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)1549 static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1550 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1551 { 1552 return EcmaString::FlattenAllString(vm, string, type); 1553 } 1554 SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)1555 static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, 1556 MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE) 1557 { 1558 return EcmaString::SlowFlatten(vm, string, type); 1559 } 1560 FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string)1561 static EcmaString *FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string) 1562 { 1563 return EcmaString::FlattenNoGCForSnapshot(vm, string); 1564 } 1565 GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)1566 static const uint8_t *GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf) 1567 { 1568 return EcmaString::GetUtf8DataFlat(src, buf); 1569 } 1570 GetNonTreeUtf8Data(const EcmaString *src)1571 static const uint8_t *GetNonTreeUtf8Data(const EcmaString *src) 1572 { 1573 return EcmaString::GetNonTreeUtf8Data(src); 1574 } 1575 GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)1576 static const uint16_t *GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf) 1577 { 1578 return EcmaString::GetUtf16DataFlat(src, buf); 1579 } 1580 GetNonTreeUtf16Data(const EcmaString *src)1581 static const uint16_t *GetNonTreeUtf16Data(const EcmaString *src) 1582 { 1583 return EcmaString::GetNonTreeUtf16Data(src); 1584 } 1585 1586 static JSTaggedValue StringToList(JSThread *thread, JSHandle<JSTaggedValue> &str); 1587 1588 private: 1589 EcmaString *string_ {nullptr}; 1590 }; 1591 } // namespace ecmascript 1592 } // namespace panda 1593 #endif // ECMASCRIPT_STRING_H 1594