1/*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef ECMASCRIPT_STRING_H
17#define ECMASCRIPT_STRING_H
18
19#include <cstddef>
20#include <cstdint>
21#include <cstring>
22
23#include "ecmascript/base/utf_helper.h"
24#include "ecmascript/common.h"
25#include "ecmascript/ecma_macros.h"
26#include "ecmascript/js_hclass.h"
27#include "ecmascript/js_tagged_value.h"
28#include "ecmascript/mem/barriers.h"
29#include "ecmascript/mem/space.h"
30#include "ecmascript/mem/tagged_object.h"
31#include "ecmascript/platform/ecma_string_hash_helper.h"
32
33#include "libpandabase/macros.h"
34#include "securec.h"
35#include "unicode/locid.h"
36
37namespace panda {
38namespace test {
39    class EcmaStringEqualsTest;
40}
41namespace ecmascript {
42template<typename T>
43class JSHandle;
44class JSPandaFile;
45class EcmaVM;
46class LineEcmaString;
47class ConstantString;
48class TreeEcmaString;
49class SlicedString;
50class FlatStringInfo;
51
52// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
53#define ECMA_STRING_CHECK_LENGTH_AND_TRHOW(vm, length)                                        \
54    if ((length) >= MAX_STRING_LENGTH) {                                                      \
55        THROW_RANGE_ERROR_AND_RETURN((vm)->GetJSThread(), "Invalid string length", nullptr);  \
56    }
57
58class EcmaString : public TaggedObject {
59    /* Mix Hash Code: --   { 0 | [31 bits raw hash code] }     computed through string
60                      \    { 1 | [31 bits integer numbers] }   fastpath for string to number
61    */
62public:
63    CAST_CHECK(EcmaString, IsString);
64
65    static constexpr uint32_t IS_INTEGER_MASK = 1U << 31;
66    static constexpr uint32_t STRING_COMPRESSED_BIT = 0x1;
67    static constexpr uint32_t STRING_INTERN_BIT = 0x2;
68    static constexpr size_t MAX_STRING_LENGTH = 0x40000000U; // 30 bits for string length, 2 bits for special meaning
69    static constexpr uint32_t STRING_LENGTH_SHIFT_COUNT = 2U;
70    static constexpr uint32_t MAX_INTEGER_HASH_NUMBER = 0x3B9AC9FF;
71    static constexpr uint32_t MAX_CACHED_INTEGER_SIZE = 9;
72
73    static constexpr size_t MIX_LENGTH_OFFSET = TaggedObjectSize();
74    // In last bit of mix_length we store if this string is compressed or not.
75    ACCESSORS_PRIMITIVE_FIELD(MixLength, uint32_t, MIX_LENGTH_OFFSET, MIX_HASHCODE_OFFSET)
76    // In last bit of mix_hash we store if this string is small-integer number or not.
77    ACCESSORS_PRIMITIVE_FIELD(MixHashcode, uint32_t, MIX_HASHCODE_OFFSET, SIZE)
78
79    enum CompressedStatus {
80        STRING_COMPRESSED,
81        STRING_UNCOMPRESSED,
82    };
83
84    enum IsIntegerStatus {
85        NOT_INTEGER = 0,
86        IS_INTEGER,
87    };
88
89    enum TrimMode : uint8_t {
90        TRIM,
91        TRIM_START,
92        TRIM_END,
93    };
94
95    enum ConcatOptStatus {
96        BEGIN_STRING_ADD = 1,
97        IN_STRING_ADD,
98        CONFIRMED_IN_STRING_ADD,
99        END_STRING_ADD,
100        INVALID_STRING_ADD,
101        HAS_BACKING_STORE,
102    };
103
104private:
105    friend class EcmaStringAccessor;
106    friend class LineEcmaString;
107    friend class ConstantString;
108    friend class TreeEcmaString;
109    friend class SlicedString;
110    friend class FlatStringInfo;
111    friend class NameDictionary;
112    friend class panda::test::EcmaStringEqualsTest;
113
114    static EcmaString *CreateEmptyString(const EcmaVM *vm);
115    static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
116        bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false,
117        uint32_t idOffset = 0);
118    static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
119        uint32_t offset, uint32_t utf8Len, MemSpaceType type = MemSpaceType::SEMI_SPACE);
120    static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
121        MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
122    static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
123        bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
124    static SlicedString *CreateSlicedString(const EcmaVM *vm, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
125    static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed);
126    static EcmaString *CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed);
127    static EcmaString *CreateLineStringWithSpaceType(const EcmaVM *vm,
128        size_t length, bool compressed, MemSpaceType type);
129    static EcmaString *CreateTreeString(const EcmaVM *vm,
130        const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed);
131    static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data,
132        size_t length, bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0);
133    static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &left,
134        const JSHandle<EcmaString> &right, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
135    template<typename T1, typename T2>
136    static uint32_t CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst,
137                                                const T2 *dataSecond, size_t sizeSecond);
138    static uint32_t CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString,
139                                               const JSHandle<EcmaString> &secondString);
140    static uint32_t CalculateConcatHashCode(const JSHandle<EcmaString> &firstString,
141                                            const JSHandle<EcmaString> &secondString);
142    static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
143        uint32_t length, bool compressed);
144    static EcmaString *FastSubString(const EcmaVM *vm,
145        const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
146    static EcmaString *GetSlicedString(const EcmaVM *vm,
147        const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
148    static EcmaString *GetSubString(const EcmaVM *vm,
149        const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
150    // require src is LineString
151    // not change src data structure
152    static inline EcmaString *FastSubUtf8String(const EcmaVM *vm,
153        const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
154    // require src is LineString
155    // not change src data structure
156    static inline EcmaString *FastSubUtf16String(const EcmaVM *vm,
157        const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
158    inline void TrimLineString(const JSThread *thread, uint32_t newLength);
159    inline bool IsUtf8() const
160    {
161        return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_COMPRESSED;
162    }
163
164    inline bool IsUtf16() const
165    {
166        return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_UNCOMPRESSED;
167    }
168
169    inline bool IsInteger()
170    {
171        return (GetHashcode() & IS_INTEGER_MASK) == IS_INTEGER_MASK;
172    }
173
174    // require is LineString
175    inline uint16_t *GetData() const;
176    inline const uint8_t *GetDataUtf8() const;
177    inline const uint16_t *GetDataUtf16() const;
178
179    // require is LineString
180    inline uint8_t *GetDataUtf8Writable();
181    inline uint16_t *GetDataUtf16Writable();
182
183    inline uint32_t GetLength() const
184    {
185        return GetMixLength() >> STRING_LENGTH_SHIFT_COUNT;
186    }
187
188    inline void SetLength(uint32_t length, bool compressed = false)
189    {
190        ASSERT(length < MAX_STRING_LENGTH);
191        // Use 0u for compressed/utf8 expression
192        SetMixLength((length << STRING_LENGTH_SHIFT_COUNT) | (compressed ? STRING_COMPRESSED : STRING_UNCOMPRESSED));
193    }
194
195    inline uint32_t GetRawHashcode() const
196    {
197        return GetMixHashcode() & (~IS_INTEGER_MASK);
198    }
199
200    static inline uint32_t MixHashcode(uint32_t hashcode, bool isInteger)
201    {
202        return isInteger ? (hashcode | IS_INTEGER_MASK) : (hashcode & (~IS_INTEGER_MASK));
203    }
204
205    inline void SetRawHashcode(uint32_t hashcode, bool isInteger = false)
206    {
207        // Use 0u for not integer string's expression
208        SetMixHashcode(MixHashcode(hashcode, isInteger));
209    }
210
211    inline size_t GetUtf8Length(bool modify = true, bool isGetBufferSize = false) const;
212
213    inline void SetIsInternString()
214    {
215        SetMixLength(GetMixLength() | STRING_INTERN_BIT);
216    }
217
218    inline bool IsInternString() const
219    {
220        return (GetMixLength() & STRING_INTERN_BIT) != 0;
221    }
222
223    inline void ClearInternStringFlag()
224    {
225        SetMixLength(GetMixLength() & ~STRING_INTERN_BIT);
226    }
227
228    inline bool TryGetHashCode(uint32_t *hash)
229    {
230        uint32_t hashcode = GetMixHashcode();
231        if (hashcode == 0 && GetLength() != 0) {
232            return false;
233        }
234        *hash = hashcode;
235        return true;
236    }
237
238    inline uint32_t GetIntegerCode()
239    {
240        ASSERT(GetMixHashcode() & IS_INTEGER_MASK);
241        return GetRawHashcode();
242    }
243
244    // not change this data structure.
245    // if string is not flat, this func has low efficiency.
246    uint32_t PUBLIC_API GetHashcode()
247    {
248        uint32_t hashcode = GetMixHashcode();
249        // GetLength() == 0 means it's an empty array.No need to computeHashCode again when hashseed is 0.
250        if (hashcode == 0 && GetLength() != 0) {
251            hashcode = ComputeHashcode();
252            SetMixHashcode(hashcode);
253        }
254        return hashcode;
255    }
256
257    template<typename T>
258    inline static bool IsDecimalDigitChar(const T c)
259    {
260        return (c >= '0' && c <= '9');
261    }
262
263    static uint32_t ComputeIntegerHash(uint32_t *num, uint8_t c)
264    {
265        if (!IsDecimalDigitChar(c)) {
266            return false;
267        }
268        int charDate = c - '0';
269        *num = (*num) * 10 + charDate; // 10: decimal factor
270        return true;
271    }
272
273    bool HashIntegerString(uint32_t length, uint32_t *hash, uint32_t hashSeed) const;
274
275    template<typename T>
276    static bool HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed)
277    {
278        ASSERT(size >= 0);
279        if (hashSeed == 0) {
280            if (IsDecimalDigitChar(data[0]) && data[0] != '0') {
281                uint32_t num = data[0] - '0';
282                uint32_t i = 1;
283                do {
284                    if (i == size) {
285                        // compute mix hash
286                        if (num <= MAX_INTEGER_HASH_NUMBER) {
287                            *hash = MixHashcode(num, IS_INTEGER);
288                            return true;
289                        }
290                        return false;
291                    }
292                } while (ComputeIntegerHash(&num, data[i++]));
293            }
294            if (size == 1 && (data[0] == '0')) {
295                *hash = MixHashcode(0, IS_INTEGER);
296                return true;
297            }
298        } else {
299            if (IsDecimalDigitChar(data[0])) {
300                uint32_t num = hashSeed * 10 + (data[0] - '0'); // 10: decimal factor
301                uint32_t i = 1;
302                do {
303                    if (i == size) {
304                        // compute mix hash
305                        if (num <= MAX_INTEGER_HASH_NUMBER) {
306                            *hash = MixHashcode(num, IS_INTEGER);
307                            return true;
308                        }
309                        return false;
310                    }
311                } while (ComputeIntegerHash(&num, data[i++]));
312            }
313        }
314        return false;
315    }
316
317    // not change this data structure.
318    // if string is not flat, this func has low efficiency.
319    uint32_t PUBLIC_API ComputeHashcode() const;
320    std::pair<uint32_t, bool> PUBLIC_API ComputeRawHashcode() const;
321    uint32_t PUBLIC_API ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const;
322
323    static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress);
324    static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length);
325
326    template<bool verify = true>
327    uint16_t At(int32_t index) const;
328
329    // require is LineString
330    void WriteData(uint32_t index, uint16_t src);
331
332    // can change left and right data structure
333    static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right);
334
335    static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
336        const JSHandle<EcmaString>& right, uint32_t offset);
337
338    // Check that two spans are equal. Should have the same length.
339    /* static */
340    template<typename T, typename T1>
341    static bool StringsAreEquals(Span<const T> &str1, Span<const T1> &str2)
342    {
343        ASSERT(str1.Size() <= str2.Size());
344        size_t size = str1.Size();
345        if (!std::is_same_v<T, T1>) {
346            for (size_t i = 0; i < size; i++) {
347                auto left = static_cast<uint16_t>(str1[i]);
348                auto right = static_cast<uint16_t>(str2[i]);
349                if (left != right) {
350                    return false;
351                }
352            }
353            return true;
354        }
355
356        return !memcmp(str1.data(), str2.data(), size * sizeof(T));
357    }
358
359    // Converts utf8Data to utf16 and compare it with given utf16_data.
360    static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
361                                  uint32_t utf16Len);
362    // Compares string1 + string2 by bytes, It doesn't check canonical unicode equivalence.
363    bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2);
364    // Compares strings by bytes, It doesn't check canonical unicode equivalence.
365    static PUBLIC_API bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1,
366        const JSHandle<EcmaString> &str2);
367    // Compares strings by bytes, It doesn't check canonical unicode equivalence.
368    static PUBLIC_API bool StringsAreEqual(EcmaString *str1, EcmaString *str2);
369    // Two strings have the same type of utf encoding format.
370    static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2);
371    static bool StringsAreEqualDiffUtfEncoding(const FlatStringInfo &str1, const FlatStringInfo &str2);
372    // Compares strings by bytes, It doesn't check canonical unicode equivalence.
373    // not change str1 data structure.
374    // if str1 is not flat, this func has low efficiency.
375    static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
376                                       bool canBeCompress);
377    // Compares strings by bytes, It doesn't check canonical unicode equivalence.
378    // not change str1 data structure.
379    // if str1 is not flat, this func has low efficiency.
380    static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len);
381
382    // can change receiver and search data structure
383    static int32_t IndexOf(const EcmaVM *vm,
384        const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0);
385
386    // can change receiver and search data structure
387    static int32_t LastIndexOf(const EcmaVM *vm,
388        const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0);
389
390    inline size_t CopyDataUtf8(uint8_t *buf, size_t maxLength, bool modify = true) const
391    {
392        if (maxLength == 0) {
393            return 1; // maxLength was -1 at napi
394        }
395        size_t length = GetLength();
396        if (length > maxLength) {
397            return 0;
398        }
399        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
400        buf[maxLength - 1] = '\0';
401        // Put comparison here so that internal usage and napi can use the same CopyDataRegionUtf8
402        return CopyDataRegionUtf8(buf, 0, length, maxLength, modify) + 1;  // add place for zero in the end
403    }
404
405    // It allows user to copy into buffer even if maxLength < length
406    inline size_t WriteUtf8(uint8_t *buf, size_t maxLength, bool isWriteBuffer = false) const
407    {
408        if (maxLength == 0) {
409            return 1; // maxLength was -1 at napi
410        }
411        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
412        buf[maxLength - 1] = '\0';
413        return CopyDataRegionUtf8(buf, 0, GetLength(), maxLength, true, isWriteBuffer) + 1;
414    }
415
416    size_t CopyDataToUtf16(uint16_t *buf, uint32_t length, uint32_t bufLength) const
417    {
418        if (IsUtf16()) {
419            CVector<uint16_t> tmpBuf;
420            const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
421            if (length > bufLength) {
422                if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, bufLength * sizeof(uint16_t)) != EOK) {
423                    LOG_FULL(FATAL) << "memcpy_s failed when length > bufLength";
424                    UNREACHABLE();
425                }
426                return bufLength;
427            }
428            if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
429                LOG_FULL(FATAL) << "memcpy_s failed";
430                UNREACHABLE();
431            }
432            return length;
433        }
434        CVector<uint8_t> tmpBuf;
435        const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf);
436        if (length > bufLength) {
437            return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, bufLength, bufLength);
438        }
439        return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, bufLength);
440    }
441
442    // It allows user to copy into buffer even if maxLength < length
443    inline size_t WriteUtf16(uint16_t *buf, uint32_t targetLength, uint32_t bufLength) const
444    {
445        if (bufLength == 0) {
446            return 0;
447        }
448        // Returns a number representing a valid backrest length.
449        return CopyDataToUtf16(buf, targetLength, bufLength);
450    }
451
452    size_t WriteOneByte(uint8_t *buf, size_t maxLength) const
453    {
454        if (maxLength == 0) {
455            return 0;
456        }
457        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
458        buf[maxLength - 1] = '\0';
459        uint32_t length = GetLength();
460        if (!IsUtf16()) {
461            CVector<uint8_t> tmpBuf;
462            const uint8_t *data = GetUtf8DataFlat(this, tmpBuf);
463            if (length > maxLength) {
464                length = maxLength;
465            }
466            if (memcpy_s(buf, maxLength, data, length) != EOK) {
467                LOG_FULL(FATAL) << "memcpy_s failed when write one byte";
468                UNREACHABLE();
469            }
470            return length;
471        }
472
473        CVector<uint16_t> tmpBuf;
474        const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
475        if (length > maxLength) {
476            return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, maxLength, maxLength);
477        }
478        return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, length, maxLength);
479    }
480
481    size_t CopyDataRegionUtf8(uint8_t *buf, size_t start, size_t length, size_t maxLength,
482                              bool modify = true, bool isWriteBuffer = false) const
483    {
484        uint32_t len = GetLength();
485        if (start + length > len) {
486            return 0;
487        }
488        if (!IsUtf16()) {
489            if (length > std::numeric_limits<size_t>::max() / 2 - 1) {  // 2: half
490                LOG_FULL(FATAL) << " length is higher than half of size_t::max";
491                UNREACHABLE();
492            }
493            CVector<uint8_t> tmpBuf;
494            const uint8_t *data = GetUtf8DataFlat(this, tmpBuf) + start;
495            // Only copy maxLength number of chars into buffer if length > maxLength
496            auto dataLen = std::min(length, maxLength);
497            std::copy(data, data + dataLen, buf);
498            return dataLen;
499        }
500        CVector<uint16_t> tmpBuf;
501        const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
502        if (length > maxLength) {
503            return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, maxLength, maxLength, start,
504                                                              modify, isWriteBuffer);
505        }
506        return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start,
507                                                          modify, isWriteBuffer);
508    }
509
510    inline uint32_t CopyDataUtf16(uint16_t *buf, uint32_t maxLength) const
511    {
512        uint32_t length = GetLength();
513        if (length > maxLength) {
514            return 0;
515        }
516        if (IsUtf16()) {
517            // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
518            CVector<uint16_t> tmpBuf;
519            const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
520            if (memcpy_s(buf, maxLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
521                LOG_FULL(FATAL) << "memcpy_s failed";
522                UNREACHABLE();
523            }
524            return length;
525        }
526        CVector<uint8_t> tmpBuf;
527        const uint8_t *data = GetUtf8DataFlat(this, tmpBuf);
528        return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, maxLength);
529    }
530
531    std::u16string ToU16String(uint32_t len = 0);
532
533    std::unique_ptr<uint8_t[]> ToOneByteDataForced()
534    {
535        uint8_t *buf = nullptr;
536        auto length = GetLength();
537        if (IsUtf16()) {
538            auto size = length * sizeof(uint16_t);
539            buf = new uint8_t[size]();
540            CopyDataUtf16(reinterpret_cast<uint16_t *>(buf), length);
541        } else {
542            buf = new uint8_t[length + 1]();
543            CopyDataUtf8(buf, length + 1);
544        }
545        return std::unique_ptr<uint8_t[]>(buf);
546    }
547
548    Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true, bool cesu8 = false)
549    {
550        Span<const uint8_t> str;
551        uint32_t strLen = GetLength();
552        if (UNLIKELY(IsUtf16())) {
553            CVector<uint16_t> tmpBuf;
554            const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
555            ASSERT(base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) > 0);
556            size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) - 1;
557            buf.reserve(len);
558            len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify, false, cesu8);
559            str = Span<const uint8_t>(buf.data(), len);
560        } else {
561            const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
562            str = Span<const uint8_t>(data, strLen);
563        }
564        return str;
565    }
566
567    Span<const uint8_t> DebuggerToUtf8Span(CVector<uint8_t> &buf, bool modify = true)
568    {
569        Span<const uint8_t> str;
570        uint32_t strLen = GetLength();
571        if (UNLIKELY(IsUtf16())) {
572            CVector<uint16_t> tmpBuf;
573            const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
574            size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1;
575            buf.reserve(len);
576            len = base::utf_helper::DebuggerConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify);
577            str = Span<const uint8_t>(buf.data(), len);
578        } else {
579            const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
580            str = Span<const uint8_t>(data, strLen);
581        }
582        return str;
583    }
584
585    inline Span<const uint8_t> FastToUtf8Span() const;
586
587    bool TryToGetInteger(uint32_t *result)
588    {
589        if (!IsInteger()) {
590            return false;
591        }
592        ASSERT(GetLength() <= MAX_CACHED_INTEGER_SIZE);
593        *result = GetIntegerCode();
594        return true;
595    }
596
597    // using integer number set into hash
598    inline bool TryToSetIntegerHash(int32_t num)
599    {
600        uint32_t hashcode = GetMixHashcode();
601        if (hashcode == 0 && GetLength() != 0) {
602            SetRawHashcode(static_cast<uint32_t>(num), IS_INTEGER);
603            return true;
604        }
605        return false;
606    }
607
608    void WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length);
609
610    static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len);
611    static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len);
612    static bool CanBeCompressed(const EcmaString *string);
613
614    bool PUBLIC_API ToElementIndex(uint32_t *index);
615
616    bool ToInt(int32_t *index, bool *negative);
617
618    bool ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data);
619
620    bool PUBLIC_API ToTypedArrayIndex(uint32_t *index);
621
622    template<bool isLower>
623    static EcmaString *ConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src);
624
625    template<bool isLower>
626    static EcmaString *LocaleConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
627
628    template<typename T>
629    static EcmaString *TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode);
630
631    static EcmaString *Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode = TrimMode::TRIM);
632
633    // single char copy for loop
634    template<typename DstType, typename SrcType>
635    static void CopyChars(DstType *dst, SrcType *src, uint32_t count)
636    {
637        Span<SrcType> srcSp(src, count);
638        Span<DstType> dstSp(dst, count);
639        for (uint32_t i = 0; i < count; i++) {
640            dstSp[i] = srcSp[i];
641        }
642    }
643
644    // memory block copy
645    template<typename T>
646    static bool MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count);
647
648    // To change the hash algorithm of EcmaString, please modify EcmaString::CalculateConcatHashCode
649    // and EcmaStringHashHelper::ComputeHashForDataPlatform simultaneously!!
650    template <typename T>
651    static uint32_t ComputeHashForData(const T *data, size_t size,
652                                       uint32_t hashSeed)
653    {
654        if (size <= static_cast<size_t>(EcmaStringHash::MIN_SIZE_FOR_UNROLLING)) {
655            uint32_t hash = hashSeed;
656            for (uint32_t i = 0; i < size ; i++) {
657                hash = (hash << static_cast<uint32_t>(EcmaStringHash::HASH_SHIFT)) - hash + data[i];
658            }
659            return hash;
660        }
661        return EcmaStringHashHelper::ComputeHashForDataPlatform(data, size, hashSeed);
662    }
663
664    static bool IsASCIICharacter(uint16_t data)
665    {
666        if (data == 0) {
667            return false;
668        }
669        // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000']
670        return data <= base::utf_helper::UTF8_1B_MAX;
671    }
672
673    template<typename T1, typename T2>
674    static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max);
675
676    template<typename T1, typename T2>
677    static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos);
678
679    bool IsFlat() const;
680
681    bool IsLineString() const
682    {
683        return GetClass()->IsLineString();
684    }
685    bool IsConstantString() const
686    {
687        return GetClass()->IsConstantString();
688    }
689    bool IsSlicedString() const
690    {
691        return GetClass()->IsSlicedString();
692    }
693    bool IsTreeString() const
694    {
695        return GetClass()->IsTreeString();
696    }
697    bool NotTreeString() const
698    {
699        return !IsTreeString();
700    }
701    bool IsLineOrConstantString() const
702    {
703        auto hclass = GetClass();
704        return hclass->IsLineString() || hclass->IsConstantString();
705    }
706
707    JSType GetStringType() const
708    {
709        JSType type = GetClass()->GetObjectType();
710        ASSERT(type >= JSType::STRING_FIRST && type <= JSType::STRING_LAST);
711        return type;
712    }
713
714    template <typename Char>
715    static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength);
716
717    template <typename Char>
718    static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos);
719
720    static const uint8_t *PUBLIC_API GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf);
721
722    static const uint8_t *PUBLIC_API GetNonTreeUtf8Data(const EcmaString *src);
723
724    static const uint16_t *PUBLIC_API GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf);
725
726    static const uint16_t *PUBLIC_API GetNonTreeUtf16Data(const EcmaString *src);
727
728    // string must be not flat
729    static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type);
730
731    PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
732                               MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
733
734    static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
735                                            MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
736
737    static EcmaString *FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string);
738
739    static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src);
740
741    static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src);
742
743    static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
744
745    static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
746
747    static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src);
748
749    static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src);
750
751    static EcmaString *ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
752                                                 bool toLower, uint32_t startIndex = 0);
753};
754
755// The LineEcmaString abstract class captures sequential string values, only LineEcmaString can store chars data
756class LineEcmaString : public EcmaString {
757public:
758    static constexpr uint32_t MAX_LENGTH = (1 << 28) - 16;
759    static constexpr uint32_t INIT_LENGTH_TIMES = 4;
760    // DATA_OFFSET: the string data stored after the string header.
761    // Data can be stored in utf8 or utf16 form according to compressed bit.
762    static constexpr size_t DATA_OFFSET = EcmaString::SIZE;  // DATA_OFFSET equal to Empty String size
763
764    CAST_CHECK(LineEcmaString, IsLineString);
765
766    DECL_VISIT_ARRAY(DATA_OFFSET, 0, GetPointerLength());
767
768    static LineEcmaString *Cast(EcmaString *str)
769    {
770        return static_cast<LineEcmaString *>(str);
771    }
772
773    static LineEcmaString *Cast(const EcmaString *str)
774    {
775        return LineEcmaString::Cast(const_cast<EcmaString *>(str));
776    }
777
778    static size_t ComputeSizeUtf8(uint32_t utf8Len)
779    {
780        return DATA_OFFSET + utf8Len;
781    }
782
783    static size_t ComputeSizeUtf16(uint32_t utf16Len)
784    {
785        return DATA_OFFSET + utf16Len * sizeof(uint16_t);
786    }
787
788    static size_t ObjectSize(EcmaString *str)
789    {
790        uint32_t length = str->GetLength();
791        return str->IsUtf16() ? ComputeSizeUtf16(length) : ComputeSizeUtf8(length);
792    }
793
794    static size_t DataSize(EcmaString *str)
795    {
796        uint32_t length = str->GetLength();
797        return str->IsUtf16() ? length * sizeof(uint16_t) : length;
798    }
799
800    size_t GetPointerLength()
801    {
802        size_t byteSize = DataSize(this);
803        return AlignUp(byteSize, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT)) / sizeof(JSTaggedType);
804    }
805
806    uint16_t *GetData() const
807    {
808        return reinterpret_cast<uint16_t *>(ToUintPtr(this) + DATA_OFFSET);
809    }
810
811    template<bool verify = true>
812    uint16_t Get(int32_t index) const
813    {
814        int32_t length = static_cast<int32_t>(GetLength());
815        if (verify) {
816            if ((index < 0) || (index >= length)) {
817                return 0;
818            }
819        }
820        if (!IsUtf16()) {
821            Span<const uint8_t> sp(GetDataUtf8(), length);
822            return sp[index];
823        }
824        Span<const uint16_t> sp(GetDataUtf16(), length);
825        return sp[index];
826    }
827
828    void Set(uint32_t index, uint16_t src)
829    {
830        ASSERT(index < GetLength());
831        if (IsUtf8()) {
832            // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
833            *(reinterpret_cast<uint8_t *>(GetData()) + index) = static_cast<uint8_t>(src);
834        } else {
835            // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
836            *(GetData() + index) = src;
837        }
838    }
839};
840static_assert((LineEcmaString::DATA_OFFSET % static_cast<uint8_t>(MemAlignment::MEM_ALIGN_OBJECT)) == 0);
841
842class ConstantString : public EcmaString {
843public:
844    static constexpr size_t RELOCTAED_DATA_OFFSET = EcmaString::SIZE;
845    // ConstantData is the pointer of const string in the pandafile.
846    // String in pandafile is encoded by the utf8 format.
847    // EntityId is normally the uint32_t index in the pandafile.
848    // When the pandafile is to be removed, EntityId will become -1.
849    // The real string data will be reloacted into bytearray and stored in RelocatedData.
850    // ConstantData will also point at data of bytearray data.
851    ACCESSORS(RelocatedData, RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET);
852    ACCESSORS_PRIMITIVE_FIELD(EntityId, int64_t, ENTITY_ID_OFFSET, CONSTANT_DATA_OFFSET);
853    ACCESSORS_NATIVE_FIELD(ConstantData, uint8_t, CONSTANT_DATA_OFFSET, LAST_OFFSET);
854    DEFINE_ALIGN_SIZE(LAST_OFFSET);
855
856    CAST_CHECK(ConstantString, IsConstantString);
857    DECL_VISIT_OBJECT(RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET);
858
859    static ConstantString *Cast(EcmaString *str)
860    {
861        return static_cast<ConstantString *>(str);
862    }
863
864    static ConstantString *Cast(const EcmaString *str)
865    {
866        return ConstantString::Cast(const_cast<EcmaString *>(str));
867    }
868
869    static size_t ObjectSize()
870    {
871        return ConstantString::SIZE;
872    }
873
874    uint32_t GetEntityIdU32() const
875    {
876        ASSERT(GetEntityId() >= 0);
877        return static_cast<uint32_t>(GetEntityId());
878    }
879
880    template<bool verify = true>
881    uint16_t Get(int32_t index) const
882    {
883        int32_t length = static_cast<int32_t>(GetLength());
884        if (verify) {
885            if ((index < 0) || (index >= length)) {
886                return 0;
887            }
888        }
889        ASSERT(IsUtf8());
890        Span<const uint8_t> sp(GetConstantData(), length);
891        return sp[index];
892    }
893};
894
895// The substrings of another string use SlicedString to describe.
896class SlicedString : public EcmaString {
897public:
898    static constexpr uint32_t MIN_SLICED_ECMASTRING_LENGTH = 13;
899    static constexpr size_t PARENT_OFFSET = EcmaString::SIZE;
900    ACCESSORS(Parent, PARENT_OFFSET, STARTINDEX_OFFSET);
901    ACCESSORS_PRIMITIVE_FIELD(StartIndex, uint32_t, STARTINDEX_OFFSET, BACKING_STORE_FLAG);
902    ACCESSORS_PRIMITIVE_FIELD(HasBackingStore, uint32_t, BACKING_STORE_FLAG, SIZE);
903
904    DECL_VISIT_OBJECT(PARENT_OFFSET, STARTINDEX_OFFSET);
905
906    CAST_CHECK(SlicedString, IsSlicedString);
907private:
908    friend class EcmaString;
909    static SlicedString *Cast(EcmaString *str)
910    {
911        return static_cast<SlicedString *>(str);
912    }
913
914    static SlicedString *Cast(const EcmaString *str)
915    {
916        return SlicedString::Cast(const_cast<EcmaString *>(str));
917    }
918
919    static size_t ObjectSize()
920    {
921        return SlicedString::SIZE;
922    }
923
924    // Minimum length for a sliced string
925    template<bool verify = true>
926    uint16_t Get(int32_t index) const
927    {
928        int32_t length = static_cast<int32_t>(GetLength());
929        if (verify) {
930            if ((index < 0) || (index >= length)) {
931                return 0;
932            }
933        }
934        EcmaString *parent = EcmaString::Cast(GetParent());
935        if (parent->IsLineString()) {
936            if (parent->IsUtf8()) {
937                Span<const uint8_t> sp(parent->GetDataUtf8() + GetStartIndex(), length);
938                return sp[index];
939            }
940            Span<const uint16_t> sp(parent->GetDataUtf16() + GetStartIndex(), length);
941            return sp[index];
942        }
943        Span<const uint8_t> sp(ConstantString::Cast(parent)->GetConstantData() + GetStartIndex(), length);
944        return sp[index];
945    }
946};
947
948class TreeEcmaString : public EcmaString {
949public:
950    // Minimum length for a tree string
951    static constexpr uint32_t MIN_TREE_ECMASTRING_LENGTH = 13;
952
953    static constexpr size_t FIRST_OFFSET = EcmaString::SIZE;
954    ACCESSORS(First, FIRST_OFFSET, SECOND_OFFSET);
955    ACCESSORS(Second, SECOND_OFFSET, SIZE);
956
957    DECL_VISIT_OBJECT(FIRST_OFFSET, SIZE);
958
959    CAST_CHECK(TreeEcmaString, IsTreeString);
960
961    static TreeEcmaString *Cast(EcmaString *str)
962    {
963        return static_cast<TreeEcmaString *>(str);
964    }
965
966    static TreeEcmaString *Cast(const EcmaString *str)
967    {
968        return TreeEcmaString::Cast(const_cast<EcmaString *>(str));
969    }
970
971    bool IsFlat() const
972    {
973        auto strSecond = EcmaString::Cast(GetSecond());
974        return strSecond->GetLength() == 0;
975    }
976
977    template<bool verify = true>
978    uint16_t Get(int32_t index) const
979    {
980        int32_t length = static_cast<int32_t>(GetLength());
981        if (verify) {
982            if ((index < 0) || (index >= length)) {
983                return 0;
984            }
985        }
986
987        if (IsFlat()) {
988            EcmaString *first = EcmaString::Cast(GetFirst());
989            return first->At<verify>(index);
990        }
991        EcmaString *string = const_cast<TreeEcmaString *>(this);
992        while (true) {
993            if (string->IsTreeString()) {
994                EcmaString *first = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
995                if (static_cast<int32_t>(first->GetLength()) > index) {
996                    string = first;
997                } else {
998                    index -= static_cast<int32_t>(first->GetLength());
999                    string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetSecond());
1000                }
1001            } else {
1002                return string->At<verify>(index);
1003            }
1004        }
1005        UNREACHABLE();
1006    }
1007};
1008
1009class FlatStringInfo {
1010public:
1011    FlatStringInfo(EcmaString *string, uint32_t startIndex, uint32_t length) : string_(string),
1012                                                                               startIndex_(startIndex),
1013                                                                               length_(length) {}
1014    bool IsUtf8() const
1015    {
1016        return string_->IsUtf8();
1017    }
1018
1019    bool IsUtf16() const
1020    {
1021        return string_->IsUtf16();
1022    }
1023
1024    EcmaString *GetString() const
1025    {
1026        return string_;
1027    }
1028
1029    void SetString(EcmaString *string)
1030    {
1031        string_ = string;
1032    }
1033
1034    uint32_t GetStartIndex() const
1035    {
1036        return startIndex_;
1037    }
1038
1039    void SetStartIndex(uint32_t index)
1040    {
1041        startIndex_ = index;
1042    }
1043
1044    uint32_t GetLength() const
1045    {
1046        return length_;
1047    }
1048
1049    const uint8_t *GetDataUtf8() const;
1050    const uint16_t *GetDataUtf16() const;
1051    uint8_t *GetDataUtf8Writable() const;
1052    uint16_t *GetDataUtf16Writable() const;
1053    std::u16string ToU16String(uint32_t len = 0);
1054private:
1055    EcmaString *string_ {nullptr};
1056    uint32_t startIndex_ {0};
1057    uint32_t length_ {0};
1058};
1059
1060// if you want to use functions of EcmaString, please not use directly,
1061// and use functions of EcmaStringAccessor alternatively.
1062// eg: EcmaString *str = ***; str->GetLength() ----->  EcmaStringAccessor(str).GetLength()
1063class PUBLIC_API EcmaStringAccessor {
1064public:
1065    explicit inline EcmaStringAccessor(EcmaString *string)
1066    {
1067        ASSERT(string != nullptr);
1068        string_ = string;
1069    }
1070
1071    explicit EcmaStringAccessor(TaggedObject *obj);
1072
1073    explicit EcmaStringAccessor(JSTaggedValue value);
1074
1075    explicit EcmaStringAccessor(const JSHandle<EcmaString> &strHandle);
1076
1077    static uint32_t CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString,
1078                                               const JSHandle<EcmaString> &secondString)
1079    {
1080        return EcmaString::CalculateAllConcatHashCode(firstString, secondString);
1081    }
1082
1083    static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed);
1084
1085    static EcmaString *CreateEmptyString(const EcmaVM *vm)
1086    {
1087        return EcmaString::CreateEmptyString(vm);
1088    }
1089
1090    static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress,
1091                                      MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false,
1092                                      uint32_t idOffset = 0)
1093    {
1094        return EcmaString::CreateFromUtf8(vm, utf8Data, utf8Len, canBeCompress, type, isConstantString, idOffset);
1095    }
1096
1097    static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1098                                                         uint32_t offset, uint32_t utf8Len,
1099                                                         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1100    {
1101        return EcmaString::CreateFromUtf8CompressedSubString(vm, string, offset, utf8Len, type);
1102    }
1103
1104    static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, size_t length,
1105        bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0)
1106    {
1107        return EcmaString::CreateConstantString(vm, utf8Data, length, compressed, type, idOffset);
1108    }
1109
1110    static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
1111        MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1112    {
1113        return EcmaString::CreateUtf16StringFromUtf8(vm, utf8Data, utf8Len, type);
1114    }
1115
1116    static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
1117                                       bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1118    {
1119        return EcmaString::CreateFromUtf16(vm, utf16Data, utf16Len, canBeCompress, type);
1120    }
1121
1122    static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &str1Handle,
1123        const JSHandle<EcmaString> &str2Handle, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1124    {
1125        return EcmaString::Concat(vm, str1Handle, str2Handle, type);
1126    }
1127
1128    static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
1129        uint32_t length, bool compressed)
1130    {
1131        return EcmaString::CopyStringToOldSpace(vm, original, length, compressed);
1132    }
1133
1134    // can change src data structure
1135    static EcmaString *FastSubString(const EcmaVM *vm,
1136        const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
1137    {
1138        return EcmaString::FastSubString(vm, src, start, length);
1139    }
1140
1141    // get
1142    static EcmaString *GetSubString(const EcmaVM *vm,
1143        const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
1144    {
1145        return EcmaString::GetSubString(vm, src, start, length);
1146    }
1147
1148    bool IsUtf8() const
1149    {
1150        return string_->IsUtf8();
1151    }
1152
1153    bool IsUtf16() const
1154    {
1155        return string_->IsUtf16();
1156    }
1157
1158    uint32_t GetLength() const
1159    {
1160        return string_->GetLength();
1161    }
1162
1163    // require is LineString
1164    inline size_t GetUtf8Length(bool isGetBufferSize = false) const;
1165
1166    size_t ObjectSize() const
1167    {
1168        if (string_->IsLineString()) {
1169            return LineEcmaString::ObjectSize(string_);
1170        } if (string_->IsConstantString()) {
1171            return ConstantString::ObjectSize();
1172        } else {
1173            return TreeEcmaString::SIZE;
1174        }
1175    }
1176
1177    // For TreeString, the calculation result is size of LineString correspondingly.
1178    size_t GetFlatStringSize() const
1179    {
1180        if (string_->IsConstantString()) {
1181            return ConstantString::ObjectSize();
1182        }
1183        return LineEcmaString::ObjectSize(string_);
1184    }
1185
1186    bool IsInternString() const
1187    {
1188        return string_->IsInternString();
1189    }
1190
1191    void SetInternString()
1192    {
1193        string_->SetIsInternString();
1194    }
1195
1196    void ClearInternString()
1197    {
1198        string_->ClearInternStringFlag();
1199    }
1200
1201    // require is LineString
1202    // It's Utf8 format, but without 0 in the end.
1203    inline const uint8_t *GetDataUtf8();
1204
1205    // require is LineString
1206    inline const uint16_t *GetDataUtf16();
1207
1208    // not change string data structure.
1209    // if string is not flat, this func has low efficiency.
1210    std::u16string ToU16String(uint32_t len = 0)
1211    {
1212        return string_->ToU16String(len);
1213    }
1214
1215    // not change string data structure.
1216    // if string is not flat, this func has low efficiency.
1217    std::unique_ptr<uint8_t[]> ToOneByteDataForced()
1218    {
1219        return string_->ToOneByteDataForced();
1220    }
1221
1222    // not change string data structure.
1223    // if string is not flat, this func has low efficiency.
1224    Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf)
1225    {
1226        return string_->ToUtf8Span(buf);
1227    }
1228
1229    // only for string is flat and using UTF8 encoding
1230    inline Span<const uint8_t> FastToUtf8Span();
1231
1232    // Using string's hash to figure out whether the string can be converted to integer
1233    inline bool TryToGetInteger(uint32_t *result)
1234    {
1235        return string_->TryToGetInteger(result);
1236    }
1237
1238    inline bool TryToSetIntegerHash(int32_t num)
1239    {
1240        return string_->TryToSetIntegerHash(num);
1241    }
1242
1243    // not change string data structure.
1244    // if string is not flat, this func has low efficiency.
1245    std::string ToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT);
1246
1247    // this function convert for Utf8
1248    CString Utf8ConvertToString();
1249
1250    std::string DebuggerToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT);
1251    // not change string data structure.
1252    // if string is not flat, this func has low efficiency.
1253    CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION, bool cesu8 = false);
1254
1255    // not change string data structure.
1256    // if string is not flat, this func has low efficiency.
1257    uint32_t WriteToFlatUtf8(uint8_t *buf, uint32_t maxLength, bool isWriteBuffer = false)
1258    {
1259        return string_->WriteUtf8(buf, maxLength, isWriteBuffer);
1260    }
1261
1262    uint32_t WriteToUtf16(uint16_t *buf, uint32_t bufLength)
1263    {
1264        return string_->WriteUtf16(buf, GetLength(), bufLength);
1265    }
1266
1267    uint32_t WriteToOneByte(uint8_t *buf, uint32_t maxLength)
1268    {
1269        return string_->WriteOneByte(buf, maxLength);
1270    }
1271
1272    // not change string data structure.
1273    // if string is not flat, this func has low efficiency.
1274    uint32_t WriteToFlatUtf16(uint16_t *buf, uint32_t maxLength) const
1275    {
1276        return string_->CopyDataUtf16(buf, maxLength);
1277    }
1278
1279    template <typename Char>
1280    static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos)
1281    {
1282        src->WriteToFlatWithPos(src, buf, length, pos);
1283    }
1284
1285    template <typename Char>
1286    static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength)
1287    {
1288        src->WriteToFlat(src, buf, maxLength);
1289    }
1290
1291    // require dst is LineString
1292    // not change src data structure.
1293    // if src is not flat, this func has low efficiency.
1294    inline static void ReadData(EcmaString * dst, EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length);
1295
1296    // not change src data structure.
1297    // if src is not flat, this func has low efficiency.
1298    template<bool verify = true>
1299    uint16_t Get(uint32_t index) const
1300    {
1301        return string_->At<verify>(index);
1302    }
1303
1304    // require string is LineString.
1305    void Set(uint32_t index, uint16_t src)
1306    {
1307        return string_->WriteData(index, src);
1308    }
1309
1310    // not change src data structure.
1311    // if src is not flat, this func has low efficiency.
1312    uint32_t GetHashcode()
1313    {
1314        return string_->GetHashcode();
1315    }
1316
1317    uint32_t GetRawHashcode()
1318    {
1319        return string_->GetRawHashcode();
1320    }
1321
1322    // not change src data structure.
1323    // if src is not flat, this func has low efficiency.
1324    std::pair<uint32_t, bool> ComputeRawHashcode()
1325    {
1326        return string_->ComputeRawHashcode();
1327    }
1328
1329    uint32_t ComputeHashcode()
1330    {
1331        return string_->ComputeHashcode();
1332    }
1333
1334    uint32_t ComputeHashcode(uint32_t rawHashSeed, bool isInteger)
1335    {
1336        return string_->ComputeHashcode(rawHashSeed, isInteger);
1337    }
1338
1339    static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
1340    {
1341        return EcmaString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress);
1342    }
1343
1344    static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
1345    {
1346        return EcmaString::ComputeHashcodeUtf16(utf16Data, length);
1347    }
1348
1349    // can change receiver and search data structure
1350    static int32_t IndexOf(const EcmaVM *vm,
1351        const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0)
1352    {
1353        return EcmaString::IndexOf(vm, receiver, search, pos);
1354    }
1355
1356    // can change receiver and search data structure
1357    static int32_t LastIndexOf(const EcmaVM *vm,
1358        const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0)
1359    {
1360        return EcmaString::LastIndexOf(vm, receiver, search, pos);
1361    }
1362
1363    // can change receiver and search data structure
1364    static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right)
1365    {
1366        return EcmaString::Compare(vm, left, right);
1367    }
1368
1369
1370    // can change receiver and search data structure
1371    static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
1372        const JSHandle<EcmaString>& right, uint32_t offset = 0)
1373    {
1374        return EcmaString::IsSubStringAt(vm, left, right, offset);
1375    }
1376
1377    // can change str1 and str2 data structure
1378    static bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
1379    {
1380        return EcmaString::StringsAreEqual(vm, str1, str2);
1381    }
1382
1383    // not change str1 and str2 data structure.
1384    // if str1 or str2 is not flat, this func has low efficiency.
1385    static bool StringsAreEqual(EcmaString *str1, EcmaString *str2)
1386    {
1387        return EcmaString::StringsAreEqual(str1, str2);
1388    }
1389
1390    // not change str1 and str2 data structure.
1391    // if str1 or str2 is not flat, this func has low efficiency.
1392    static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2)
1393    {
1394        return EcmaString::StringsAreEqualDiffUtfEncoding(str1, str2);
1395    }
1396
1397    // not change str1 data structure.
1398    // if str1 is not flat, this func has low efficiency.
1399    static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
1400                                       bool canBeCompress)
1401    {
1402        return EcmaString::StringIsEqualUint8Data(str1, dataAddr, dataLen, canBeCompress);
1403    }
1404
1405    // not change str1 data structure.
1406    // if str1 is not flat, this func has low efficiency.
1407    static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
1408    {
1409        return EcmaString::StringsAreEqualUtf16(str1, utf16Data, utf16Len);
1410    }
1411
1412    // require str1 and str2 are LineString.
1413    // not change string data structure.
1414    // if string is not flat, this func has low efficiency.
1415    bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
1416    {
1417        return string_->EqualToSplicedString(str1, str2);
1418    }
1419
1420    static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
1421    {
1422        return EcmaString::CanBeCompressed(utf8Data, utf8Len);
1423    }
1424
1425    static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
1426    {
1427        return EcmaString::CanBeCompressed(utf16Data, utf16Len);
1428    }
1429
1430    // require string is LineString
1431    static bool CanBeCompressed(const EcmaString *string)
1432    {
1433        return EcmaString::CanBeCompressed(string);
1434    }
1435
1436    // not change string data structure.
1437    // if string is not flat, this func has low efficiency.
1438    bool ToElementIndex(uint32_t *index)
1439    {
1440        return string_->ToElementIndex(index);
1441    }
1442
1443    // not change string data structure.
1444    // if string is not flat, this func has low efficiency.
1445    bool ToInt(int32_t *index, bool *negative)
1446    {
1447        return string_->ToInt(index, negative);
1448    }
1449
1450    // not change string data structure.
1451    // if string is not flat, this func has low efficiency.
1452    bool PUBLIC_API ToTypedArrayIndex(uint32_t *index)
1453    {
1454        return string_->ToTypedArrayIndex(index);
1455    }
1456
1457    static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1458    {
1459        return EcmaString::ToLower(vm, src);
1460    }
1461
1462    static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1463    {
1464        return EcmaString::TryToLower(vm, src);
1465    }
1466
1467    static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1468    {
1469        return EcmaString::TryToUpper(vm, src);
1470    }
1471
1472    static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1473    {
1474        return EcmaString::ToUpper(vm, src);
1475    }
1476
1477    static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1478    {
1479        return EcmaString::ToLocaleLower(vm, src, locale);
1480    }
1481
1482    static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1483    {
1484        return EcmaString::ToLocaleUpper(vm, src, locale);
1485    }
1486
1487    static EcmaString *Trim(const JSThread *thread,
1488        const JSHandle<EcmaString> &src, EcmaString::TrimMode mode = EcmaString::TrimMode::TRIM)
1489    {
1490        return EcmaString::Trim(thread, src, mode);
1491    }
1492
1493    static bool IsASCIICharacter(uint16_t data)
1494    {
1495        if (data == 0) {
1496            return false;
1497        }
1498        // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000']
1499        return data <= base::utf_helper::UTF8_1B_MAX;
1500    }
1501
1502    bool IsFlat() const
1503    {
1504        return string_->IsFlat();
1505    }
1506
1507    bool IsLineString() const
1508    {
1509        return string_->IsLineString();
1510    }
1511
1512    bool IsConstantString() const
1513    {
1514        return string_->IsConstantString();
1515    }
1516
1517    bool IsSlicedString() const
1518    {
1519        return string_->IsSlicedString();
1520    }
1521
1522    bool IsLineOrConstantString() const
1523    {
1524        return string_->IsLineOrConstantString();
1525    }
1526
1527    JSType GetStringType() const
1528    {
1529        return string_->GetStringType();
1530    }
1531
1532    bool IsTreeString() const
1533    {
1534        return string_->IsTreeString();
1535    }
1536
1537    bool NotTreeString() const
1538    {
1539        return string_->NotTreeString();
1540    }
1541
1542    // the returned string may be a linestring, constantstring, or slicestring!!
1543    PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1544        MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1545    {
1546        return EcmaString::Flatten(vm, string, type);
1547    }
1548
1549    static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1550        MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1551    {
1552        return EcmaString::FlattenAllString(vm, string, type);
1553    }
1554
1555    static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1556        MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1557    {
1558        return EcmaString::SlowFlatten(vm, string, type);
1559    }
1560
1561    static EcmaString *FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string)
1562    {
1563        return EcmaString::FlattenNoGCForSnapshot(vm, string);
1564    }
1565
1566    static const uint8_t *GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
1567    {
1568        return EcmaString::GetUtf8DataFlat(src, buf);
1569    }
1570
1571    static const uint8_t *GetNonTreeUtf8Data(const EcmaString *src)
1572    {
1573        return EcmaString::GetNonTreeUtf8Data(src);
1574    }
1575
1576    static const uint16_t *GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
1577    {
1578        return EcmaString::GetUtf16DataFlat(src, buf);
1579    }
1580
1581    static const uint16_t *GetNonTreeUtf16Data(const EcmaString *src)
1582    {
1583        return EcmaString::GetNonTreeUtf16Data(src);
1584    }
1585
1586    static JSTaggedValue StringToList(JSThread *thread, JSHandle<JSTaggedValue> &str);
1587
1588private:
1589    EcmaString *string_ {nullptr};
1590};
1591}  // namespace ecmascript
1592}  // namespace panda
1593#endif  // ECMASCRIPT_STRING_H
1594