1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/ecma_string-inl.h"
17 
18 #include "ecmascript/ecma_string_table.h"
19 #include "ecmascript/platform/ecma_string_hash.h"
20 
21 namespace panda::ecmascript {
22 
23 constexpr size_t LOW_3BITS = 0x7;
24 constexpr size_t LOW_4BITS = 0xF;
25 constexpr size_t LOW_5BITS = 0x1F;
26 constexpr size_t LOW_6BITS = 0x3F;
27 constexpr size_t L_SURROGATE_START = 0xDC00;
28 constexpr size_t H_SURROGATE_START = 0xD800;
29 constexpr size_t SURROGATE_RAIR_START = 0x10000;
30 constexpr size_t OFFSET_18POS = 18;
31 constexpr size_t OFFSET_12POS = 12;
32 constexpr size_t OFFSET_10POS = 10;
33 constexpr size_t OFFSET_6POS = 6;
34 
Concat(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)35 EcmaString *EcmaString::Concat(const EcmaVM *vm,
36     const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)
37 {
38     ASSERT(IsSMemSpace(type));
39     // allocator may trig gc and move src, need to hold it
40     EcmaString *strLeft = *left;
41     EcmaString *strRight = *right;
42     uint32_t leftLength = strLeft->GetLength();
43     uint32_t rightLength = strRight->GetLength();
44     uint32_t newLength = leftLength + rightLength;
45     if (newLength == 0) {
46         return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
47     }
48 
49     if (leftLength == 0) {
50         return strRight;
51     }
52     if (rightLength == 0) {
53         return strLeft;
54     }
55     // if the result string is small, make a LineString
56     bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8());
57     if (newLength < TreeEcmaString::MIN_TREE_ECMASTRING_LENGTH) {
58         ASSERT(strLeft->IsLineOrConstantString());
59         ASSERT(strRight->IsLineOrConstantString());
60         auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type);
61         // retrieve strings after gc
62         strLeft = *left;
63         strRight = *right;
64         if (compressed) {
65             // copy left part
66             Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
67             Span<const uint8_t> srcLeft(strLeft->GetDataUtf8(), leftLength);
68             EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength);
69             // copy right part
70             sp = sp.SubSpan(leftLength);
71             Span<const uint8_t> srcRight(strRight->GetDataUtf8(), rightLength);
72             EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength);
73         } else {
74             // copy left part
75             Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
76             if (strLeft->IsUtf8()) {
77                 EcmaString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength);
78             } else {
79                 Span<const uint16_t> srcLeft(strLeft->GetDataUtf16(), leftLength);
80                 EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U);
81             }
82             // copy right part
83             sp = sp.SubSpan(leftLength);
84             if (strRight->IsUtf8()) {
85                 EcmaString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength);
86             } else {
87                 Span<const uint16_t> srcRight(strRight->GetDataUtf16(), rightLength);
88                 EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U);
89             }
90         }
91         ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
92         return newString;
93     }
94     return CreateTreeString(vm, left, right, newLength, compressed);
95 }
96 
97 /* static */
CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, uint32_t length, bool compressed)98 EcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
99     uint32_t length, bool compressed)
100 {
101     if (original->IsConstantString()) {
102         return CreateConstantString(vm, original->GetDataUtf8(), length, MemSpaceType::OLD_SPACE);
103     }
104     JSHandle<EcmaString> newString(vm->GetJSThread(),
105         CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE));
106     auto strOrigin = FlattenAllString(vm, original);
107     if (compressed) {
108         // copy
109         Span<uint8_t> sp(newString->GetDataUtf8Writable(), length);
110         Span<const uint8_t> srcSp(strOrigin.GetDataUtf8(), length);
111         EcmaString::MemCopyChars(sp, length, srcSp, length);
112     } else {
113         // copy left part
114         Span<uint16_t> sp(newString->GetDataUtf16Writable(), length);
115         if (strOrigin.IsUtf8()) {
116             EcmaString::CopyChars(sp.data(), strOrigin.GetDataUtf8(), length);
117         } else {
118             Span<const uint16_t> srcSp(strOrigin.GetDataUtf16(), length);
119             EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U);
120         }
121     }
122     ASSERT_PRINT(compressed == CanBeCompressed(*newString), "compressed does not match the real value!");
123     return *newString;
124 }
125 
126 /* static */
FastSubString(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)127 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
128     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
129 {
130     ASSERT((start + length) <= src->GetLength());
131     if (length == 0) {
132         return *vm->GetFactory()->GetEmptyString();
133     }
134     if (start == 0 && length == src->GetLength()) {
135         return *src;
136     }
137     if (src->IsUtf8()) {
138         return FastSubUtf8String(vm, src, start, length);
139     }
140     return FastSubUtf16String(vm, src, start, length);
141 }
142 
143 /* static */
GetSlicedString(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)144 EcmaString *EcmaString::GetSlicedString(const EcmaVM *vm,
145     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
146 {
147     ASSERT((start + length) <= src->GetLength());
148     JSHandle<SlicedString> slicedString(vm->GetJSThread(), CreateSlicedString(vm));
149     FlatStringInfo srcFlat = FlattenAllString(vm, src);
150     slicedString->SetLength(length, srcFlat.GetString()->IsUtf8());
151     slicedString->SetParent(vm->GetJSThread(), JSTaggedValue(srcFlat.GetString()));
152     slicedString->SetStartIndex(start + srcFlat.GetStartIndex());
153     return *slicedString;
154 }
155 
156 /* static */
GetSubString(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)157 EcmaString *EcmaString::GetSubString(const EcmaVM *vm,
158     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
159 {
160     ASSERT((start + length) <= src->GetLength());
161     if (length == 1) {
162         JSThread *thread = vm->GetJSThread();
163         uint16_t res = EcmaStringAccessor(src).Get<false>(start);
164         if (EcmaStringAccessor::CanBeCompressed(&res, 1)) {
165             JSHandle<SingleCharTable> singleCharTable(thread, thread->GetSingleCharTable());
166             return EcmaString::Cast(singleCharTable->GetStringFromSingleCharTable(res).GetTaggedObject());
167         }
168     }
169     if (static_cast<uint32_t>(length) >= SlicedString::MIN_SLICED_ECMASTRING_LENGTH) {
170         if (start == 0 && length == src->GetLength()) {
171             return *src;
172         }
173         if (src->IsUtf16()) {
174             FlatStringInfo srcFlat = FlattenAllString(vm, src);
175             bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
176             if (canBeCompressed) {
177                 JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed));
178                 srcFlat = FlattenAllString(vm, src);
179                 CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length);
180                 return *string;
181             }
182         }
183         return GetSlicedString(vm, src, start, length);
184     }
185     return FastSubString(vm, src, start, length);
186 }
187 
WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length)188 void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length)
189 {
190     ASSERT(IsLineString() && !IsConstantString());
191     if (IsUtf8()) {
192         ASSERT(src->IsUtf8());
193         CVector<uint8_t> buf;
194         const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
195         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
196         if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) {
197             LOG_FULL(FATAL) << "memcpy_s failed";
198             UNREACHABLE();
199         }
200     } else if (src->IsUtf8()) {
201         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
202         CVector<uint8_t> buf;
203         const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
204         Span<uint16_t> to(GetDataUtf16Writable() + start, length);
205         Span<const uint8_t> from(data, length);
206         for (uint32_t i = 0; i < length; i++) {
207             to[i] = from[i];
208         }
209     } else {
210         CVector<uint16_t> buf;
211         const uint16_t *data = EcmaString::GetUtf16DataFlat(src, buf);
212         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
213         if (length != 0 && memcpy_s(GetDataUtf16Writable() + start,
214             destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
215             LOG_FULL(FATAL) << "memcpy_s failed";
216             UNREACHABLE();
217         }
218     }
219 }
220 
221 template<typename T1, typename T2>
CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)222 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
223 {
224     for (int32_t i = 0; i < count; ++i) {
225         auto left = static_cast<int32_t>(lhsSp[i]);
226         auto right = static_cast<int32_t>(rhsSp[i]);
227         if (left != right) {
228             return left - right;
229         }
230     }
231     return 0;
232 }
233 
Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)234 int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)
235 {
236     if (*left == *right) {
237         return 0;
238     }
239     FlatStringInfo lhs = FlattenAllString(vm, left);
240     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
241     FlatStringInfo rhs = FlattenAllString(vm, right);
242     lhs.SetString(*string);
243     int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
244     int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
245     int32_t countDiff = lhsCount - rhsCount;
246     int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
247     if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
248         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
249         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
250         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
251         if (charDiff != 0) {
252             return charDiff;
253         }
254     } else if (!lhs.IsUtf16()) {
255         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
256         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
257         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
258         if (charDiff != 0) {
259             return charDiff;
260         }
261     } else if (!rhs.IsUtf16()) {
262         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), rhsCount);
263         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), lhsCount);
264         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
265         if (charDiff != 0) {
266             return charDiff;
267         }
268     } else {
269         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
270         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
271         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
272         if (charDiff != 0) {
273             return charDiff;
274         }
275     }
276     return countDiff;
277 }
278 
279 template<typename T1, typename T2>
IsSubStringAtSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, uint32_t offset)280 bool IsSubStringAtSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, uint32_t offset)
281 {
282     int rhsSize = static_cast<int>(rhsSp.size());
283     ASSERT(rhsSize + offset <= lhsSp.size());
284     for (int i = 0; i < rhsSize; ++i) {
285         auto left = static_cast<int32_t>(lhsSp[offset + static_cast<uint32_t>(i)]);
286         auto right = static_cast<int32_t>(rhsSp[i]);
287         if (left != right) {
288             return false;
289         }
290     }
291     return true;
292 }
293 
294 
295 /**
296  * left: text string
297  * right: pattern string
298  * example 1: IsSubStringAt("IsSubStringAt", "Is", 0) return true
299  * example 2: IsSubStringAt("IsSubStringAt", "It", 0) return false
300 */
IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right, uint32_t offset)301 bool EcmaString::IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
302     const JSHandle<EcmaString>& right, uint32_t offset)
303 {
304     FlatStringInfo lhs = FlattenAllString(vm, left);
305     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
306     FlatStringInfo rhs = FlattenAllString(vm, right);
307     lhs.SetString(*string);
308     int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
309     int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
310     if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
311         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
312         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
313         return IsSubStringAtSpan(lhsSp, rhsSp, offset);
314     } else if (!lhs.IsUtf16()) {
315         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
316         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
317         return IsSubStringAtSpan(lhsSp, rhsSp, offset);
318     } else if (!rhs.IsUtf16()) {
319         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
320         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
321         return IsSubStringAtSpan(lhsSp, rhsSp, offset);
322     } else {
323         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
324         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
325         return IsSubStringAtSpan(lhsSp, rhsSp, offset);
326     }
327     return false;
328 }
329 
330 /* static */
331 template<typename T1, typename T2>
IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)332 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
333 {
334     ASSERT(rhsSp.size() > 0);
335     auto first = static_cast<int32_t>(rhsSp[0]);
336     for (int32_t i = pos; i <= max; i++) {
337         if (static_cast<int32_t>(lhsSp[i]) != first) {
338             i++;
339             while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
340                 i++;
341             }
342         }
343         /* Found first character, now look at the rest of rhsSp */
344         if (i <= max) {
345             int j = i + 1;
346             int end = j + static_cast<int>(rhsSp.size()) - 1;
347 
348             for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
349             }
350             if (j == end) {
351                 /* Found whole string. */
352                 return i;
353             }
354         }
355     }
356     return -1;
357 }
358 
359 template<typename T1, typename T2>
LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)360 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
361 {
362     int rhsSize = static_cast<int>(rhsSp.size());
363     ASSERT(rhsSize > 0);
364     auto first = rhsSp[0];
365     for (int32_t i = pos; i >= 0; i--) {
366         if (lhsSp[i] != first) {
367             continue;
368         }
369         /* Found first character, now look at the rest of rhsSp */
370         int j = 1;
371         while (j < rhsSize) {
372             if (rhsSp[j] != lhsSp[i + j]) {
373                 break;
374             }
375             j++;
376         }
377         if (j == rhsSize) {
378             return i;
379         }
380     }
381     return -1;
382 }
383 
IndexOf(const EcmaVM *vm, const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)384 int32_t EcmaString::IndexOf(const EcmaVM *vm,
385     const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
386 {
387     EcmaString *lhstring = *receiver;
388     EcmaString *rhstring = *search;
389     if (lhstring == nullptr || rhstring == nullptr) {
390         return -1;
391     }
392     int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
393     int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
394 
395     if (pos > lhsCount) {
396         return -1;
397     }
398 
399     if (rhsCount == 0) {
400         return pos;
401     }
402 
403     if (pos < 0) {
404         pos = 0;
405     }
406 
407     int32_t max = lhsCount - rhsCount;
408     if (max < 0) {
409         return -1;
410     }
411 
412     if (pos + rhsCount > lhsCount) {
413         return -1;
414     }
415 
416     FlatStringInfo lhs = FlattenAllString(vm, receiver);
417     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
418     FlatStringInfo rhs = FlattenAllString(vm, search);
419     lhs.SetString(*string);
420 
421     if (rhs.IsUtf8() && lhs.IsUtf8()) {
422         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
423         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
424         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
425     } else if (rhs.IsUtf16() && lhs.IsUtf16()) {  // NOLINT(readability-else-after-return)
426         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
427         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
428         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
429     } else if (rhs.IsUtf16()) {
430         return -1;
431     } else {  // NOLINT(readability-else-after-return)
432         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
433         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
434         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
435     }
436 }
437 
LastIndexOf(const EcmaVM *vm, const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)438 int32_t EcmaString::LastIndexOf(const EcmaVM *vm,
439     const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
440 {
441     EcmaString *lhstring = *receiver;
442     EcmaString *rhstring = *search;
443     if (lhstring == nullptr || rhstring == nullptr) {
444         return -1;
445     }
446 
447     int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
448     int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
449     if (lhsCount < rhsCount) {
450         return -1;
451     }
452 
453     if (pos < 0) {
454         pos = 0;
455     }
456 
457     if (pos > lhsCount) {
458         pos = lhsCount;
459     }
460 
461     if (pos + rhsCount > lhsCount) {
462         pos = lhsCount - rhsCount;
463     }
464 
465     if (rhsCount == 0) {
466         return pos;
467     }
468 
469     FlatStringInfo lhs = FlattenAllString(vm, receiver);
470     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
471     FlatStringInfo rhs = FlattenAllString(vm, search);
472     lhs.SetString(*string);
473     if (rhs.IsUtf8() && lhs.IsUtf8()) {
474         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
475         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
476         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
477     } else if (rhs.IsUtf16() && lhs.IsUtf16()) {  // NOLINT(readability-else-after-return)
478         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
479         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
480         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
481     } else if (rhs.IsUtf16()) {
482         return -1;
483     } else {  // NOLINT(readability-else-after-return)
484         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
485         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
486         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
487     }
488 }
489 
ToU16String(uint32_t len)490 std::u16string EcmaString::ToU16String(uint32_t len)
491 {
492     uint32_t length = len > 0 ? len : GetLength();
493     std::u16string result;
494     if (IsUtf16()) {
495         CVector<uint16_t> buf;
496         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
497         result = base::StringHelper::Utf16ToU16String(data, length);
498     } else {
499         CVector<uint8_t> buf;
500         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
501         result = base::StringHelper::Utf8ToU16String(data, length);
502     }
503     return result;
504 }
505 
506 //static
CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString, const JSHandle<EcmaString> &secondString)507 uint32_t EcmaString::CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString,
508                                                 const JSHandle<EcmaString> &secondString)
509 {
510     uint32_t hashCode;
511     uint32_t firstLength = firstString->GetLength();
512     uint32_t secondLength = secondString->GetLength();
513     if ((firstLength + secondLength < MAX_ELEMENT_INDEX_LEN) &&
514         firstString->IsUtf8() && secondString->IsUtf8() &&
515         firstString->IsInteger() && secondString->IsInteger()) {
516             firstString->HashIntegerString(firstLength, &hashCode, 0);
517             secondString->HashIntegerString(secondLength, &hashCode, hashCode);
518             return hashCode;
519     }
520     hashCode = EcmaString::CalculateConcatHashCode(firstString, secondString);
521     hashCode = MixHashcode(hashCode, NOT_INTEGER);
522     return hashCode;
523 }
524 
525 // static
526 template<typename T1, typename T2>
CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst, const T2 *dataSecond, size_t sizeSecond)527 uint32_t EcmaString::CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst,
528                                                  const T2 *dataSecond, size_t sizeSecond)
529 {
530     uint32_t totalHash = 0;
531     constexpr uint32_t hashShift = static_cast<uint32_t>(EcmaStringHash::HASH_SHIFT);
532     constexpr uint32_t blockSize = static_cast<size_t>(EcmaStringHash::BLOCK_SIZE);
533     // The concatenated length of the two strings is less than MIN_SIZE_FOR_UNROLLING.
534     if (sizeFirst + sizeSecond <= static_cast<size_t>(EcmaStringHash::MIN_SIZE_FOR_UNROLLING)) {
535         for (uint32_t i = 0; i < sizeFirst; i++) {
536             totalHash = (totalHash << hashShift) - totalHash + dataFirst[i];
537         }
538         for (uint32_t i = 0; i < sizeSecond; i++) {
539             totalHash = (totalHash << hashShift) - totalHash + dataSecond[i];
540         }
541         return totalHash;
542     }
543     // Process the entire block of the first string.
544     uint32_t hash[blockSize] = {0};
545     uint32_t index = 0;
546     for (; index + blockSize <= sizeFirst; index += blockSize) {
547         hash[0] = (hash[0] << hashShift) - hash[0] + dataFirst[index];
548         hash[1] = (hash[1] << hashShift) - hash[1] + dataFirst[index + 1]; // 1: the second element
549         hash[2] = (hash[2] << hashShift) - hash[2] + dataFirst[index + 2]; // 2: the third element
550         hash[3] = (hash[3] << hashShift) - hash[3] + dataFirst[index + 3]; // 3: the fourth element
551     }
552     // The remaining total string length is less than a whole block.
553     if ((sizeFirst % blockSize) + sizeSecond < blockSize) {
554         for (; index < sizeFirst; ++index) {
555             hash[0] = (hash[0] << hashShift) - hash[0] + dataFirst[index];
556         }
557         index = 0;
558     } else {
559         //Calculate the non-integral block portion at the end of the first string.
560         for (; index < sizeFirst; ++index) {
561             hash[index % blockSize] = (hash[index % blockSize] << hashShift) -
562                                         hash[index % blockSize] + dataFirst[index];
563         }
564         //Calculate the portion of the second string
565         //that starts and aligns with an integral block at the end of the first string.
566         uint32_t wholeBlockRemain = (blockSize - sizeFirst % blockSize) % blockSize;
567         index = 0;
568         for (; index < wholeBlockRemain && index < sizeSecond; ++index) {
569             uint32_t nowHashIndex = sizeFirst % blockSize + index;
570             hash[nowHashIndex] = (hash[nowHashIndex] << hashShift) - hash[nowHashIndex] + dataSecond[index];
571         }
572         // Process the entire block of the Second string.
573         for (; index + blockSize <= sizeSecond; index += blockSize) {
574             hash[0] = (hash[0] << hashShift) - hash[0] + dataSecond[index];
575             hash[1] = (hash[1] << hashShift) - hash[1] + dataSecond[index + 1]; // 1: the second element
576             hash[2] = (hash[2] << hashShift) - hash[2] + dataSecond[index + 2]; // 2: the third element
577             hash[3] = (hash[3] << hashShift) - hash[3] + dataSecond[index + 3]; // 3: the fourth element
578         }
579     }
580     for (; index < sizeSecond; ++index) {
581         hash[0] = (hash[0] << hashShift) - hash[0] + dataSecond[index];
582     }
583     for (uint32_t i = 0; i < blockSize; ++i) {
584         totalHash = (totalHash << hashShift) - totalHash + hash[i];
585     }
586     return totalHash;
587 }
588 
589 // static
CalculateConcatHashCode(const JSHandle<EcmaString> &firstString, const JSHandle<EcmaString> &secondString)590 uint32_t EcmaString::CalculateConcatHashCode(const JSHandle<EcmaString> &firstString,
591                                              const JSHandle<EcmaString> &secondString)
592 {
593     bool isFirstStringUtf8 = EcmaStringAccessor(firstString).IsUtf8();
594     bool isSecondStringUtf8 = EcmaStringAccessor(secondString).IsUtf8();
595     EcmaString *firstStr = *firstString;
596     EcmaString *secondStr = *secondString;
597     CVector<uint8_t> bufFirstUint8;
598     CVector<uint8_t> bufSecondUint8;
599     CVector<uint16_t> bufFirstUint16;
600     CVector<uint16_t> bufSecondUint16;
601     if (isFirstStringUtf8 && isSecondStringUtf8) {
602         const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(firstStr, bufFirstUint8);
603         const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(secondStr, bufSecondUint8);
604         return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
605                                            dataSecond, secondStr->GetLength());
606     }
607     if (!isFirstStringUtf8 && isSecondStringUtf8) {
608         const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(firstStr, bufFirstUint16);
609         const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(secondStr, bufSecondUint8);
610         return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
611                                            dataSecond, secondStr->GetLength());
612     }
613     if (isFirstStringUtf8 && !isSecondStringUtf8) {
614         const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(firstStr, bufFirstUint8);
615         const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(secondStr, bufSecondUint16);
616         return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
617                                            dataSecond, secondStr->GetLength());
618     }
619     {
620         const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(firstStr, bufFirstUint16);
621         const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(secondStr, bufSecondUint16);
622         return  CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
623                                             dataSecond, secondStr->GetLength());
624     }
625 }
626 
627 // static
CanBeCompressed(const EcmaString *string)628 bool EcmaString::CanBeCompressed(const EcmaString *string)
629 {
630     ASSERT(string->IsLineOrConstantString());
631     if (string->IsUtf8()) {
632         return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
633     }
634     return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
635 }
636 
637 // static
CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)638 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
639 {
640     uint32_t index = 0;
641     for (; index + 4 <= utf8Len; index += 4) { // 4: process the data in chunks of 4 elements to improve speed
642         // Check if all four characters in the current block are ASCII characters
643         if (!IsASCIICharacter(utf8Data[index]) ||
644             !IsASCIICharacter(utf8Data[index + 1]) || // 1: the second element of the block
645             !IsASCIICharacter(utf8Data[index + 2]) || // 2: the third element of the block
646             !IsASCIICharacter(utf8Data[index + 3])) { // 3: the fourth element of the block
647             return false;
648         }
649     }
650     // Check remaining characters if they are ASCII
651     for (; index < utf8Len; ++index) {
652         if (!IsASCIICharacter(utf8Data[index])) {
653             return false;
654         }
655     }
656     return true;
657 }
658 
659 /* static */
CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)660 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
661 {
662     uint32_t index = 0;
663     for (; index + 4 <= utf16Len; index += 4) { // 4: process the data in chunks of 4 elements to improve speed
664         // Check if all four characters in the current block are ASCII characters
665         if (!IsASCIICharacter(utf16Data[index]) ||
666             !IsASCIICharacter(utf16Data[index + 1]) || // 1: the second element of the block
667             !IsASCIICharacter(utf16Data[index + 2]) || // 2: the third element of the block
668             !IsASCIICharacter(utf16Data[index + 3])) { // 3: the fourth element of the block
669             return false;
670         }
671     }
672     // Check remaining characters if they are ASCII
673     for (; index < utf16Len; ++index) {
674         if (!IsASCIICharacter(utf16Data[index])) {
675             return false;
676         }
677     }
678     return true;
679 }
680 
EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)681 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
682 {
683     ASSERT(NotTreeString());
684     ASSERT(str1->NotTreeString() && str2->NotTreeString());
685     if (GetLength() != str1->GetLength() + str2->GetLength()) {
686         return false;
687     }
688     if (IsUtf16()) {
689         CVector<uint16_t> buf;
690         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
691         if (EcmaString::StringsAreEqualUtf16(str1, data, str1->GetLength())) {
692             return EcmaString::StringsAreEqualUtf16(str2, data + str1->GetLength(), str2->GetLength());
693         }
694     } else {
695         CVector<uint8_t> buf;
696         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
697         if (EcmaString::StringIsEqualUint8Data(str1, data, str1->GetLength(), this->IsUtf8())) {
698             return EcmaString::StringIsEqualUint8Data(str2, data + str1->GetLength(),
699                                                       str2->GetLength(), this->IsUtf8());
700         }
701     }
702     return false;
703 }
704 
705 /* static */
StringsAreEqualDiffUtfEncoding(EcmaString *left, EcmaString *right)706 bool EcmaString::StringsAreEqualDiffUtfEncoding(EcmaString *left, EcmaString *right)
707 {
708     CVector<uint16_t> bufLeftUft16;
709     CVector<uint16_t> bufRightUft16;
710     CVector<uint8_t> bufLeftUft8;
711     CVector<uint8_t> bufRightUft8;
712     int32_t lhsCount = static_cast<int32_t>(left->GetLength());
713     int32_t rhsCount = static_cast<int32_t>(right->GetLength());
714     if (!left->IsUtf16() && !right->IsUtf16()) {
715         const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
716         const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
717         Span<const uint8_t> lhsSp(data1, lhsCount);
718         Span<const uint8_t> rhsSp(data2, rhsCount);
719         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
720     } else if (!left->IsUtf16()) {
721         const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
722         const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
723         Span<const uint8_t> lhsSp(data1, lhsCount);
724         Span<const uint16_t> rhsSp(data2, rhsCount);
725         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
726     } else if (!right->IsUtf16()) {
727         const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
728         const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
729         Span<const uint16_t> lhsSp(data1, lhsCount);
730         Span<const uint8_t> rhsSp(data2, rhsCount);
731         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
732     } else {
733         const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
734         const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
735         Span<const uint16_t> lhsSp(data1, lhsCount);
736         Span<const uint16_t> rhsSp(data2, rhsCount);
737         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
738     }
739 }
740 
741 /* static */
StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right)742 bool EcmaString::StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right)
743 {
744     int32_t lhsCount = static_cast<int32_t>(left.GetLength());
745     int32_t rhsCount = static_cast<int32_t>(right.GetLength());
746     if (!left.IsUtf16() && !right.IsUtf16()) {
747         Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
748         Span<const uint8_t> rhsSp(right.GetDataUtf8(), rhsCount);
749         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
750     } else if (!left.IsUtf16()) {
751         Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
752         Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
753         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
754     } else if (!right.IsUtf16()) {
755         Span<const uint16_t> lhsSp(left.GetDataUtf16(), rhsCount);
756         Span<const uint8_t> rhsSp(right.GetDataUtf8(), lhsCount);
757         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
758     } else {
759         Span<const uint16_t> lhsSp(left.GetDataUtf16(), lhsCount);
760         Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
761         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
762     }
763 }
764 
StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)765 bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
766 {
767     if (str1 == str2) {
768         return true;
769     }
770     if (str1->IsInternString() && str2->IsInternString()) {
771         return false;
772     }
773     uint32_t str1Len = str1->GetLength();
774     if (str1Len != str2->GetLength()) {
775         return false;
776     }
777     if (str1Len == 0) {
778         return true;
779     }
780 
781     uint32_t str1Hash;
782     uint32_t str2Hash;
783     if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
784         if (str1Hash != str2Hash) {
785             return false;
786         }
787     }
788     FlatStringInfo str1Flat = FlattenAllString(vm, str1);
789     JSHandle<EcmaString> string(vm->GetJSThread(), str1Flat.GetString());
790     FlatStringInfo str2Flat = FlattenAllString(vm, str2);
791     str1Flat.SetString(*string);
792     return StringsAreEqualDiffUtfEncoding(str1Flat, str2Flat);
793 }
794 
795 /* static */
StringsAreEqual(EcmaString *str1, EcmaString *str2)796 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
797 {
798     if (str1 == str2) {
799         return true;
800     }
801     uint32_t str1Len = str1->GetLength();
802     if (str1Len != str2->GetLength()) {
803         return false;
804     }
805     if (str1Len == 0) {
806         return true;
807     }
808 
809     uint32_t str1Hash;
810     uint32_t str2Hash;
811     if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
812         if (str1Hash != str2Hash) {
813             return false;
814         }
815     }
816     return StringsAreEqualDiffUtfEncoding(str1, str2);
817 }
818 
819 /* static */
StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, bool canBeCompressToUtf8)820 bool EcmaString::StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
821                                         bool canBeCompressToUtf8)
822 {
823     if (!str1->IsSlicedString() && canBeCompressToUtf8 != str1->IsUtf8()) {
824         return false;
825     }
826     if (canBeCompressToUtf8 && str1->GetLength() != dataLen) {
827         return false;
828     }
829     if (str1->IsUtf8()) {
830         CVector<uint8_t> buf;
831         Span<const uint8_t> data1(EcmaString::GetUtf8DataFlat(str1, buf), dataLen);
832         Span<const uint8_t> data2(dataAddr, dataLen);
833         return EcmaString::StringsAreEquals(data1, data2);
834     }
835     CVector<uint16_t> buf;
836     uint32_t length = str1->GetLength();
837     const uint16_t *data = EcmaString::GetUtf16DataFlat(str1, buf);
838     return IsUtf8EqualsUtf16(dataAddr, dataLen, data, length);
839 }
840 
841 /* static */
StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)842 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
843 {
844     uint32_t length = str1->GetLength();
845     if (length != utf16Len) {
846         return false;
847     }
848     if (str1->IsUtf8()) {
849         CVector<uint8_t> buf;
850         const uint8_t *data = EcmaString::GetUtf8DataFlat(str1, buf);
851         return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len);
852     } else {
853         CVector<uint16_t> buf;
854         Span<const uint16_t> data1(EcmaString::GetUtf16DataFlat(str1, buf), length);
855         Span<const uint16_t> data2(utf16Data, utf16Len);
856         return EcmaString::StringsAreEquals(data1, data2);
857     }
858 }
859 
860 template<typename T>
MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)861 bool EcmaString::MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
862 {
863     ASSERT(dstMax >= count);
864     ASSERT(dst.Size() >= src.Size());
865     if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
866         LOG_FULL(FATAL) << "memcpy_s failed";
867         UNREACHABLE();
868     }
869     return true;
870 }
871 
HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const872 bool EcmaString::HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const
873 {
874     ASSERT(length >= 0);
875     Span<const uint8_t> str = FastToUtf8Span();
876     return HashIntegerString(str.data(), length, hash, hashSeed);
877 }
878 
ComputeHashcode() const879 uint32_t EcmaString::ComputeHashcode() const
880 {
881     auto [hash, isInteger] = ComputeRawHashcode();
882     return MixHashcode(hash, isInteger);
883 }
884 
885 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeRawHashcode() const886 std::pair<uint32_t, bool> EcmaString::ComputeRawHashcode() const
887 {
888     uint32_t hash = 0;
889     uint32_t length = GetLength();
890     if (length == 0) {
891         return {hash, false};
892     }
893 
894     if (IsUtf8()) {
895         // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
896         if (length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, 0)) {
897             return {hash, true};
898         }
899         CVector<uint8_t> buf;
900         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
901         // String can not convert to integer number, using normal hashcode computing algorithm.
902         hash = this->ComputeHashForData(data, length, 0);
903         return {hash, false};
904     } else {
905         CVector<uint16_t> buf;
906         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
907         // If rawSeed has certain value, and second string uses UTF16 encoding,
908         // then merged string can not be small integer number.
909         hash = this->ComputeHashForData(data, length, 0);
910         return {hash, false};
911     }
912 }
913 
914 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const915 uint32_t EcmaString::ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const
916 {
917     uint32_t hash;
918     uint32_t length = GetLength();
919     if (length == 0) {
920         return MixHashcode(rawHashSeed, isInteger);
921     }
922 
923     if (IsUtf8()) {
924         // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
925         if ((rawHashSeed == 0 || isInteger) &&
926              length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, rawHashSeed)) {
927             return hash;
928         }
929         CVector<uint8_t> buf;
930         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
931         // String can not convert to integer number, using normal hashcode computing algorithm.
932         hash = this->ComputeHashForData(data, length, rawHashSeed);
933         return MixHashcode(hash, NOT_INTEGER);
934     } else {
935         CVector<uint16_t> buf;
936         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
937         // If rawSeed has certain value, and second string uses UTF16 encoding,
938         // then merged string can not be small integer number.
939         hash = this->ComputeHashForData(data, length, rawHashSeed);
940         return MixHashcode(hash, NOT_INTEGER);
941     }
942 }
943 
944 /* static */
ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)945 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
946 {
947     uint32_t mixHash = 0;
948     if (canBeCompress) {
949         // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
950         if (utf8Len < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf8Data, utf8Len, &mixHash, 0)) {
951             return mixHash;
952         }
953         uint32_t hash = ComputeHashForData(utf8Data, utf8Len, 0);
954         return MixHashcode(hash, NOT_INTEGER);
955     } else {
956         auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
957         CVector<uint16_t> tmpBuffer(utf16Len);
958         [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
959                                                                                utf16Len);
960         ASSERT(len == utf16Len);
961         uint32_t hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
962         return MixHashcode(hash, NOT_INTEGER);
963     }
964     LOG_ECMA(FATAL) << "this branch is unreachable";
965     UNREACHABLE();
966 }
967 
968 /* static */
ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)969 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
970 {
971     uint32_t mixHash = 0;
972     // String length smaller than 10, try to compute integer hash.
973     if (length < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf16Data, length, &mixHash, 0)) {
974         return mixHash;
975     }
976     uint32_t hash = ComputeHashForData(utf16Data, length, 0);
977     return MixHashcode(hash, NOT_INTEGER);
978 }
979 
980 // drop the tail bytes if the remain length can't fill the length it represents.
FixUtf8Len(const uint8_t* utf8, size_t utf8Len)981 static size_t FixUtf8Len(const uint8_t* utf8, size_t utf8Len)
982 {
983     constexpr size_t TWO_BYTES_LENGTH = 2;
984     constexpr size_t THREE_BYTES_LENGTH = 3;
985     size_t trimSize = 0;
986     if (utf8Len >= 1 && utf8[utf8Len - 1] >= 0xC0) {
987         // The last one char claim there are more than 1 byte next to it, it's invalid, so drop the last one.
988         trimSize = 1;
989     }
990     if (utf8Len >= TWO_BYTES_LENGTH && utf8[utf8Len - TWO_BYTES_LENGTH] >= 0xE0) {
991         // The second to last char claim there are more than 2 bytes next to it, it's invalid, so drop the last two.
992         trimSize = TWO_BYTES_LENGTH;
993     }
994     if (utf8Len >= THREE_BYTES_LENGTH && utf8[utf8Len - THREE_BYTES_LENGTH] >= 0xF0) {
995         // The third to last char claim there are more than 3 bytes next to it, it's invalid, so drop the last three.
996         trimSize = THREE_BYTES_LENGTH;
997     }
998     return utf8Len - trimSize;
999 }
1000 
1001 
1002 /* static */
IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data, uint32_t utf16Len)1003 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len,
1004                                    const uint16_t *utf16Data, uint32_t utf16Len)
1005 {
1006     size_t safeUtf8Len = FixUtf8Len(utf8Data, utf8Len);
1007     const uint8_t *utf8End = utf8Data + utf8Len;
1008     const uint8_t *utf8SafeEnd = utf8Data + safeUtf8Len;
1009     const uint16_t *utf16End = utf16Data + utf16Len;
1010     while (utf8Data < utf8SafeEnd && utf16Data < utf16End) {
1011         uint8_t src = *utf8Data;
1012         switch (src & 0xF0) {
1013             case 0xF0: {
1014                 const uint8_t c2 = *(++utf8Data);
1015                 const uint8_t c3 = *(++utf8Data);
1016                 const uint8_t c4 = *(++utf8Data);
1017                 uint32_t codePoint = ((src & LOW_3BITS) << OFFSET_18POS) | ((c2 & LOW_6BITS) << OFFSET_12POS) |
1018                                      ((c3 & LOW_6BITS) << OFFSET_6POS) | (c4 & LOW_6BITS);
1019                 if (codePoint >= SURROGATE_RAIR_START) {
1020                     if (utf16Data >= utf16End - 1) {
1021                         return false;
1022                     }
1023                     codePoint -= SURROGATE_RAIR_START;
1024                     if (*utf16Data++ != static_cast<uint16_t>((codePoint >> OFFSET_10POS) | H_SURROGATE_START)) {
1025                         return false;
1026                     } else if (*utf16Data++ != static_cast<uint16_t>((codePoint & 0x3FF) | L_SURROGATE_START)) {
1027                         return false;
1028                     }
1029                 } else {
1030                     if (*utf16Data++ != static_cast<uint16_t>(codePoint)) {
1031                         return false;
1032                     }
1033                 }
1034                 utf8Data++;
1035                 break;
1036             }
1037             case 0xE0: {
1038                 const uint8_t c2 = *(++utf8Data);
1039                 const uint8_t c3 = *(++utf8Data);
1040                 if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_4BITS) << OFFSET_12POS) |
1041                     ((c2 & LOW_6BITS) << OFFSET_6POS) | (c3 & LOW_6BITS))) {
1042                     return false;
1043                 }
1044                 utf8Data++;
1045                 break;
1046             }
1047             case 0xD0:
1048             case 0xC0: {
1049                 const uint8_t c2 = *(++utf8Data);
1050                 if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_5BITS) << OFFSET_6POS) | (c2 & LOW_6BITS))) {
1051                     return false;
1052                 }
1053                 utf8Data++;
1054                 break;
1055             }
1056             default:
1057                 do {
1058                     if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
1059                         return false;
1060                     }
1061                 } while (utf8Data < utf8SafeEnd && utf16Data < utf16End && *utf8Data < 0x80);
1062                 break;
1063         }
1064     }
1065     // The remain chars should be treated as single byte char.
1066     while (utf8Data < utf8End && utf16Data < utf16End) {
1067         if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
1068             return false;
1069         }
1070     }
1071     return utf8Data == utf8End && utf16Data == utf16End;
1072 }
1073 
ToElementIndex(uint32_t *index)1074 bool EcmaString::ToElementIndex(uint32_t *index)
1075 {
1076     uint32_t len = GetLength();
1077     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {  // NOLINTNEXTLINEreadability-magic-numbers)
1078         return false;
1079     }
1080     if (UNLIKELY(IsUtf16())) {
1081         return false;
1082     }
1083 
1084     // fast path: get integer from string's hash value
1085     if (TryToGetInteger(index)) {
1086         return true;
1087     }
1088 
1089     CVector<uint8_t> buf;
1090     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1091     uint32_t c = data[0];
1092     uint64_t n = 0;
1093     if (c == '0') {
1094         *index = 0;
1095         return len == 1;
1096     }
1097     uint32_t loopStart = 0;
1098     if (ToUInt64FromLoopStart(&n, loopStart, data) && n < JSObject::MAX_ELEMENT_INDEX) {
1099         *index = n;
1100         return true;
1101     }
1102     return false;
1103 }
1104 
ToInt(int32_t *index, bool *negative)1105 bool EcmaString::ToInt(int32_t *index, bool *negative)
1106 {
1107     uint32_t len = GetLength();
1108     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {  // NOLINTNEXTLINEreadability-magic-numbers)
1109         return false;
1110     }
1111     if (UNLIKELY(IsUtf16())) {
1112         return false;
1113     }
1114     CVector<uint8_t> buf;
1115     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1116     uint32_t c = data[0];
1117     uint32_t loopStart = 0;
1118     uint64_t n = 0;
1119     if (c == '0') {
1120         *index = 0;
1121         return len == 1;
1122     }
1123     if (c == '-' && len > 1) {
1124         *negative = true;
1125         loopStart = 1;
1126     }
1127 
1128     if (ToUInt64FromLoopStart(&n, loopStart, data) && n <= std::numeric_limits<int32_t>::max()) {
1129         *index = *negative ? -n : n;
1130         return true;
1131     }
1132     return false;
1133 }
1134 
ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data)1135 bool EcmaString::ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data)
1136 {
1137     uint64_t n = 0;
1138     uint32_t len = GetLength();
1139     if (UNLIKELY(loopStart >= len)) {
1140         return false;
1141     }
1142     for (uint32_t i = loopStart; i < len; i++) {
1143         uint32_t c = data[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1144         if (c < '0' || c > '9') {
1145             return false;
1146         }
1147         // NOLINTNEXTLINE(readability-magic-numbers)
1148         n = n * 10 + (c - '0');  // 10: decimal factor
1149     }
1150     *index = n;
1151     return true;
1152 }
1153 
ToTypedArrayIndex(uint32_t *index)1154 bool EcmaString::ToTypedArrayIndex(uint32_t *index)
1155 {
1156     uint32_t len = GetLength();
1157     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
1158         return false;
1159     }
1160     if (UNLIKELY(IsUtf16())) {
1161         return false;
1162     }
1163 
1164     CVector<uint8_t> buf;
1165     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1166     uint32_t c = data[0];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1167     uint64_t n = 0;
1168     if (c == '0') {
1169         *index = 0;
1170         return len == 1;
1171     }
1172     if (c > '0' && c <= '9') {
1173         n = c - '0';
1174         for (uint32_t i = 1; i < len; i++) {
1175             c = data[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1176             if (c >= '0' && c <= '9') {
1177                 // NOLINTNEXTLINE(readability-magic-numbers)
1178                 n = n * 10 + (c - '0');  // 10: decimal factor
1179             } else if (c == '.') {
1180                 n = JSObject::MAX_ELEMENT_INDEX;
1181                 break;
1182             } else {
1183                 return false;
1184             }
1185         }
1186         if (n < JSObject::MAX_ELEMENT_INDEX) {
1187             *index = n;
1188             return true;
1189         } else {
1190             *index = JSObject::MAX_ELEMENT_INDEX;
1191             return true;
1192         }
1193     } else if (c == '-') {
1194         *index = JSObject::MAX_ELEMENT_INDEX;
1195         return true;
1196     }
1197     return false;
1198 }
1199 
1200 template<typename T>
TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)1201 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
1202 {
1203     uint32_t srcLen = src->GetLength();
1204     int32_t start = 0;
1205     int32_t end = static_cast<int32_t>(srcLen) - 1;
1206 
1207     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
1208         start = static_cast<int32_t>(base::StringHelper::GetStart(data, srcLen));
1209     }
1210     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
1211         end = base::StringHelper::GetEnd(data, start, srcLen);
1212     }
1213     EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast<uint32_t>(end - start + 1));
1214     return res;
1215 }
1216 
1217 /* static */
ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)1218 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1219 {
1220     auto srcFlat = FlattenAllString(vm, src);
1221     uint32_t srcLength = srcFlat.GetLength();
1222     auto factory = vm->GetFactory();
1223     if (srcFlat.IsUtf16()) {
1224         std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1225         std::string res = base::StringHelper::ToLower(u16str);
1226         return *(factory->NewFromStdString(res));
1227     } else {
1228         return ConvertUtf8ToLowerOrUpper(vm, src, true);
1229     }
1230 }
1231 
1232 /* static */
TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)1233 EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1234 {
1235     auto srcFlat = FlattenAllString(vm, src);
1236     uint32_t srcLength = srcFlat.GetLength();
1237     const char start = 'A';
1238     const char end = 'Z';
1239     uint32_t upperIndex = srcLength;
1240     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1241     for (uint32_t index = 0; index < srcLength; ++index) {
1242         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1243             upperIndex = index;
1244             break;
1245         }
1246     }
1247     if (upperIndex == srcLength) {
1248         return *src;
1249     }
1250     return ConvertUtf8ToLowerOrUpper(vm, src, true, upperIndex);
1251 }
1252 
1253 /* static */
TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)1254 EcmaString *EcmaString::TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1255 {
1256     auto srcFlat = FlattenAllString(vm, src);
1257     uint32_t srcLength = srcFlat.GetLength();
1258     const char start = 'a';
1259     const char end = 'z';
1260     uint32_t lowerIndex = srcLength;
1261     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1262     for (uint32_t index = 0; index < srcLength; ++index) {
1263         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1264             lowerIndex = index;
1265             break;
1266         }
1267     }
1268     if (lowerIndex == srcLength) {
1269         return *src;
1270     }
1271     return ConvertUtf8ToLowerOrUpper(vm, src, false, lowerIndex);
1272 }
1273 
1274 /* static */
ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, bool toLower, uint32_t startIndex)1275 EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
1276                                                   bool toLower, uint32_t startIndex)
1277 {
1278     const char start = toLower ? 'A' : 'a';
1279     const char end = toLower ? 'Z' : 'z';
1280     uint32_t srcLength = src->GetLength();
1281     JSHandle<EcmaString> newString(vm->GetJSThread(), CreateLineString(vm, srcLength, true));
1282     auto srcFlat = FlattenAllString(vm, src);
1283     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1284     auto newStringPtr = newString->GetDataUtf8Writable();
1285     if (startIndex > 0) {
1286         if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) {
1287             LOG_FULL(FATAL) << "memcpy_s failed";
1288             UNREACHABLE();
1289         }
1290     }
1291     for (uint32_t index = startIndex; index < srcLength; ++index) {
1292         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1293             *(newStringPtr + index) = data[index] ^ (1 << 5);   // 1 and 5 means lower to upper or upper to lower
1294         } else {
1295             *(newStringPtr + index) = data[index];
1296         }
1297     }
1298     return *newString;
1299 }
1300 
1301 /* static */
ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)1302 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1303 {
1304     FlatStringInfo srcFlat = FlattenAllString(vm, src);
1305     uint32_t srcLength = srcFlat.GetLength();
1306     auto factory = vm->GetFactory();
1307     if (srcFlat.IsUtf16()) {
1308         std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1309         std::string res = base::StringHelper::ToUpper(u16str);
1310         return *(factory->NewFromStdString(res));
1311     } else {
1312         return ConvertUtf8ToLowerOrUpper(vm, src, false);
1313     }
1314 }
1315 
1316 /* static */
ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)1317 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1318 {
1319     auto factory = vm->GetFactory();
1320     FlatStringInfo srcFlat = FlattenAllString(vm, src);
1321     std::u16string utf16 = srcFlat.ToU16String();
1322     std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
1323     return *(factory->NewFromStdString(res));
1324 }
1325 
1326 /* static */
ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)1327 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1328 {
1329     auto factory = vm->GetFactory();
1330     FlatStringInfo srcFlat = FlattenAllString(vm, src);
1331     std::u16string utf16 = srcFlat.ToU16String();
1332     std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
1333     return *(factory->NewFromStdString(res));
1334 }
1335 
Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)1336 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
1337 {
1338     FlatStringInfo srcFlat = FlattenAllString(thread->GetEcmaVM(), src);
1339     uint32_t srcLen = srcFlat.GetLength();
1340     if (UNLIKELY(srcLen == 0)) {
1341         return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
1342     }
1343     if (srcFlat.IsUtf8()) {
1344         Span<const uint8_t> data(srcFlat.GetDataUtf8(), srcLen);
1345         return TrimBody(thread, src, data, mode);
1346     } else {
1347         Span<const uint16_t> data(srcFlat.GetDataUtf16(), srcLen);
1348         return TrimBody(thread, src, data, mode);
1349     }
1350 }
1351 
SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)1352 EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1353 {
1354     ASSERT(string->IsTreeString() || string->IsSlicedString());
1355     ASSERT(IsSMemSpace(type));
1356     auto thread = vm->GetJSThread();
1357     uint32_t length = string->GetLength();
1358     EcmaString *result = nullptr;
1359     if (string->IsUtf8()) {
1360         result = CreateLineStringWithSpaceType(vm, length, true, type);
1361         WriteToFlat<uint8_t>(*string, result->GetDataUtf8Writable(), length);
1362     } else {
1363         result = CreateLineStringWithSpaceType(vm, length, false, type);
1364         WriteToFlat<uint16_t>(*string, result->GetDataUtf16Writable(), length);
1365     }
1366     if (string->IsTreeString()) {
1367         JSHandle<TreeEcmaString> tree(string);
1368         ASSERT(EcmaString::Cast(tree->GetSecond())->GetLength() != 0);
1369         tree->SetFirst(thread, JSTaggedValue(result));
1370         tree->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1371     }
1372     return result;
1373 }
1374 
Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)1375 EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1376 {
1377     EcmaString *s = *string;
1378     if (!s->IsTreeString()) {
1379         return s;
1380     }
1381     JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1382     if (!tree->IsFlat()) {
1383         return SlowFlatten(vm, string, type);
1384     }
1385     return EcmaString::Cast(tree->GetFirst());
1386 }
1387 
FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)1388 FlatStringInfo EcmaString::FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1389 {
1390     ASSERT(IsSMemSpace(type));
1391     EcmaString *s = *string;
1392     uint32_t startIndex = 0;
1393     if (s->IsLineOrConstantString()) {
1394         return FlatStringInfo(s, startIndex, s->GetLength());
1395     }
1396     if (string->IsTreeString()) {
1397         JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1398         if (!tree->IsFlat()) {
1399             s = SlowFlatten(vm, string, type);
1400         } else {
1401             s = EcmaString::Cast(tree->GetFirst());
1402         }
1403     } else if (string->IsSlicedString()) {
1404         s = EcmaString::Cast(SlicedString::Cast(*string)->GetParent());
1405         startIndex = SlicedString::Cast(*string)->GetStartIndex();
1406     }
1407     return FlatStringInfo(s, startIndex, string->GetLength());
1408 }
1409 
FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string)1410 EcmaString *EcmaString::FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string)
1411 {
1412     DISALLOW_GARBAGE_COLLECTION;
1413     if (string->IsLineOrConstantString()) {
1414         return string;
1415     }
1416     if (string->IsTreeString()) {
1417         TreeEcmaString *tree = TreeEcmaString::Cast(string);
1418         if (tree->IsFlat()) {
1419             string = EcmaString::Cast(tree->GetFirst());
1420         } else {
1421             uint32_t length = tree->GetLength();
1422             EcmaString *result = nullptr;
1423             if (tree->IsUtf8()) {
1424                 result = CreateLineStringNoGC(vm, length, true);
1425                 WriteToFlat<uint8_t>(tree, result->GetDataUtf8Writable(), length);
1426             } else {
1427                 result = CreateLineStringNoGC(vm, length, false);
1428                 WriteToFlat<uint16_t>(tree, result->GetDataUtf16Writable(), length);
1429             }
1430             tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result));
1431             tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1432             return result;
1433         }
1434     } else if (string->IsSlicedString()) {
1435         SlicedString *str = SlicedString::Cast(string);
1436         uint32_t length = str->GetLength();
1437         EcmaString *result = nullptr;
1438         if (str->IsUtf8()) {
1439             result = CreateLineStringNoGC(vm, length, true);
1440             WriteToFlat<uint8_t>(str, result->GetDataUtf8Writable(), length);
1441         } else {
1442             result = CreateLineStringNoGC(vm, length, false);
1443             WriteToFlat<uint16_t>(str, result->GetDataUtf16Writable(), length);
1444         }
1445         return result;
1446     }
1447     return string;
1448 }
1449 
GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)1450 const uint8_t *EcmaString::GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
1451 {
1452     ASSERT(src->IsUtf8());
1453     uint32_t length = src->GetLength();
1454     EcmaString *string = const_cast<EcmaString *>(src);
1455     if (string->IsTreeString()) {
1456         if (string->IsFlat()) {
1457             string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1458         } else {
1459             buf.reserve(length);
1460             WriteToFlat(string, buf.data(), length);
1461             return buf.data();
1462         }
1463     } else if (string->IsSlicedString()) {
1464         SlicedString *str = SlicedString::Cast(string);
1465         return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1466     }
1467     return string->GetDataUtf8();
1468 }
1469 
GetNonTreeUtf8Data(const EcmaString *src)1470 const uint8_t *EcmaString::GetNonTreeUtf8Data(const EcmaString *src)
1471 {
1472     ASSERT(src->IsUtf8());
1473     ASSERT(!src->IsTreeString());
1474     EcmaString *string = const_cast<EcmaString *>(src);
1475     if (string->IsSlicedString()) {
1476         SlicedString *str = SlicedString::Cast(string);
1477         return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1478     }
1479     ASSERT(src->IsLineOrConstantString());
1480     return string->GetDataUtf8();
1481 }
1482 
GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)1483 const uint16_t *EcmaString::GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
1484 {
1485     ASSERT(src->IsUtf16());
1486     uint32_t length = src->GetLength();
1487     EcmaString *string = const_cast<EcmaString *>(src);
1488     if (string->IsTreeString()) {
1489         if (string->IsFlat()) {
1490             string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1491         } else {
1492             buf.reserve(length);
1493             WriteToFlat(string, buf.data(), length);
1494             return buf.data();
1495         }
1496     } else if (string->IsSlicedString()) {
1497         SlicedString *str = SlicedString::Cast(string);
1498         return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1499     }
1500     return string->GetDataUtf16();
1501 }
1502 
GetNonTreeUtf16Data(const EcmaString *src)1503 const uint16_t *EcmaString::GetNonTreeUtf16Data(const EcmaString *src)
1504 {
1505     ASSERT(src->IsUtf16());
1506     ASSERT(!src->IsTreeString());
1507     EcmaString *string = const_cast<EcmaString *>(src);
1508     if (string->IsSlicedString()) {
1509         SlicedString *str = SlicedString::Cast(string);
1510         return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1511     }
1512     ASSERT(src->IsLineOrConstantString());
1513     return string->GetDataUtf16();
1514 }
1515 
ToU16String(uint32_t len)1516 std::u16string FlatStringInfo::ToU16String(uint32_t len)
1517 {
1518     uint32_t length = len > 0 ? len : GetLength();
1519     std::u16string result;
1520     if (IsUtf16()) {
1521         const uint16_t *data = this->GetDataUtf16();
1522         result = base::StringHelper::Utf16ToU16String(data, length);
1523     } else {
1524         const uint8_t *data = this->GetDataUtf8();
1525         result = base::StringHelper::Utf8ToU16String(data, length);
1526     }
1527     return result;
1528 }
1529 
EcmaStringAccessor(TaggedObject *obj)1530 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
1531 {
1532     ASSERT(obj != nullptr);
1533     string_ = EcmaString::Cast(obj);
1534 }
1535 
EcmaStringAccessor(JSTaggedValue value)1536 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
1537 {
1538     ASSERT(value.IsString());
1539     string_ = EcmaString::Cast(value.GetTaggedObject());
1540 }
1541 
EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)1542 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
1543     : string_(*strHandle)
1544 {
1545 }
1546 
ToStdString(StringConvertedUsage usage)1547 std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage)
1548 {
1549     if (string_ == nullptr) {
1550         return "";
1551     }
1552     bool modify = (usage != StringConvertedUsage::PRINT);
1553     CVector<uint8_t> buf;
1554     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1555     std::string res;
1556     res.reserve(sp.size());
1557     for (const auto &c : sp) {
1558         res.push_back(c);
1559     }
1560     return res;
1561 }
1562 
Utf8ConvertToString()1563 CString EcmaStringAccessor::Utf8ConvertToString()
1564 {
1565     if (string_ == nullptr) {
1566         return CString("");
1567     }
1568     if (IsUtf8()) {
1569         std::string stdStr;
1570         if (IsLineString()) {
1571             return base::StringHelper::Utf8ToString(GetDataUtf8(), GetLength()).c_str();
1572         }
1573         CVector<uint8_t> buf;
1574         const uint8_t *data = EcmaString::GetUtf8DataFlat(string_, buf);
1575         return base::StringHelper::Utf8ToString(data, GetLength()).c_str();
1576     } else {
1577         return ToCString();
1578     }
1579 }
1580 
DebuggerToStdString(StringConvertedUsage usage)1581 std::string EcmaStringAccessor::DebuggerToStdString(StringConvertedUsage usage)
1582 {
1583     if (string_ == nullptr) {
1584         return "";
1585     }
1586 
1587     bool modify = (usage != StringConvertedUsage::PRINT);
1588     CVector<uint8_t> buf;
1589     Span<const uint8_t> sp = string_->DebuggerToUtf8Span(buf, modify);
1590     std::string res;
1591     res.reserve(sp.size());
1592     for (const auto &c : sp) {
1593         res.push_back(c);
1594     }
1595     return res;
1596 }
1597 
ToCString(StringConvertedUsage usage, bool cesu8)1598 CString EcmaStringAccessor::ToCString(StringConvertedUsage usage, bool cesu8)
1599 {
1600     if (string_ == nullptr) {
1601         return "";
1602     }
1603     bool modify = (usage != StringConvertedUsage::PRINT);
1604     CVector<uint8_t> buf;
1605     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify, cesu8);
1606     CString res;
1607     res.reserve(sp.size());
1608     for (const auto &c : sp) {
1609         res.push_back(c);
1610     }
1611     return res;
1612 }
1613 
1614 // static
CreateLineString(const EcmaVM *vm, size_t length, bool compressed)1615 EcmaString *EcmaStringAccessor::CreateLineString(const EcmaVM *vm, size_t length, bool compressed)
1616 {
1617     return EcmaString::CreateLineString(vm, length, compressed);
1618 }
1619 }  // namespace panda::ecmascript
1620