1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/ecma_string-inl.h"
17
18 #include "ecmascript/ecma_string_table.h"
19 #include "ecmascript/platform/ecma_string_hash.h"
20
21 namespace panda::ecmascript {
22
23 constexpr size_t LOW_3BITS = 0x7;
24 constexpr size_t LOW_4BITS = 0xF;
25 constexpr size_t LOW_5BITS = 0x1F;
26 constexpr size_t LOW_6BITS = 0x3F;
27 constexpr size_t L_SURROGATE_START = 0xDC00;
28 constexpr size_t H_SURROGATE_START = 0xD800;
29 constexpr size_t SURROGATE_RAIR_START = 0x10000;
30 constexpr size_t OFFSET_18POS = 18;
31 constexpr size_t OFFSET_12POS = 12;
32 constexpr size_t OFFSET_10POS = 10;
33 constexpr size_t OFFSET_6POS = 6;
34
Concat(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)35 EcmaString *EcmaString::Concat(const EcmaVM *vm,
36 const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)
37 {
38 ASSERT(IsSMemSpace(type));
39 // allocator may trig gc and move src, need to hold it
40 EcmaString *strLeft = *left;
41 EcmaString *strRight = *right;
42 uint32_t leftLength = strLeft->GetLength();
43 uint32_t rightLength = strRight->GetLength();
44 uint32_t newLength = leftLength + rightLength;
45 if (newLength == 0) {
46 return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
47 }
48
49 if (leftLength == 0) {
50 return strRight;
51 }
52 if (rightLength == 0) {
53 return strLeft;
54 }
55 // if the result string is small, make a LineString
56 bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8());
57 if (newLength < TreeEcmaString::MIN_TREE_ECMASTRING_LENGTH) {
58 ASSERT(strLeft->IsLineOrConstantString());
59 ASSERT(strRight->IsLineOrConstantString());
60 auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type);
61 // retrieve strings after gc
62 strLeft = *left;
63 strRight = *right;
64 if (compressed) {
65 // copy left part
66 Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
67 Span<const uint8_t> srcLeft(strLeft->GetDataUtf8(), leftLength);
68 EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength);
69 // copy right part
70 sp = sp.SubSpan(leftLength);
71 Span<const uint8_t> srcRight(strRight->GetDataUtf8(), rightLength);
72 EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength);
73 } else {
74 // copy left part
75 Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
76 if (strLeft->IsUtf8()) {
77 EcmaString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength);
78 } else {
79 Span<const uint16_t> srcLeft(strLeft->GetDataUtf16(), leftLength);
80 EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U);
81 }
82 // copy right part
83 sp = sp.SubSpan(leftLength);
84 if (strRight->IsUtf8()) {
85 EcmaString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength);
86 } else {
87 Span<const uint16_t> srcRight(strRight->GetDataUtf16(), rightLength);
88 EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U);
89 }
90 }
91 ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
92 return newString;
93 }
94 return CreateTreeString(vm, left, right, newLength, compressed);
95 }
96
97 /* static */
CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original, uint32_t length, bool compressed)98 EcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
99 uint32_t length, bool compressed)
100 {
101 if (original->IsConstantString()) {
102 return CreateConstantString(vm, original->GetDataUtf8(), length, MemSpaceType::OLD_SPACE);
103 }
104 JSHandle<EcmaString> newString(vm->GetJSThread(),
105 CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE));
106 auto strOrigin = FlattenAllString(vm, original);
107 if (compressed) {
108 // copy
109 Span<uint8_t> sp(newString->GetDataUtf8Writable(), length);
110 Span<const uint8_t> srcSp(strOrigin.GetDataUtf8(), length);
111 EcmaString::MemCopyChars(sp, length, srcSp, length);
112 } else {
113 // copy left part
114 Span<uint16_t> sp(newString->GetDataUtf16Writable(), length);
115 if (strOrigin.IsUtf8()) {
116 EcmaString::CopyChars(sp.data(), strOrigin.GetDataUtf8(), length);
117 } else {
118 Span<const uint16_t> srcSp(strOrigin.GetDataUtf16(), length);
119 EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U);
120 }
121 }
122 ASSERT_PRINT(compressed == CanBeCompressed(*newString), "compressed does not match the real value!");
123 return *newString;
124 }
125
126 /* static */
FastSubString(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)127 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
128 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
129 {
130 ASSERT((start + length) <= src->GetLength());
131 if (length == 0) {
132 return *vm->GetFactory()->GetEmptyString();
133 }
134 if (start == 0 && length == src->GetLength()) {
135 return *src;
136 }
137 if (src->IsUtf8()) {
138 return FastSubUtf8String(vm, src, start, length);
139 }
140 return FastSubUtf16String(vm, src, start, length);
141 }
142
143 /* static */
GetSlicedString(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)144 EcmaString *EcmaString::GetSlicedString(const EcmaVM *vm,
145 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
146 {
147 ASSERT((start + length) <= src->GetLength());
148 JSHandle<SlicedString> slicedString(vm->GetJSThread(), CreateSlicedString(vm));
149 FlatStringInfo srcFlat = FlattenAllString(vm, src);
150 slicedString->SetLength(length, srcFlat.GetString()->IsUtf8());
151 slicedString->SetParent(vm->GetJSThread(), JSTaggedValue(srcFlat.GetString()));
152 slicedString->SetStartIndex(start + srcFlat.GetStartIndex());
153 return *slicedString;
154 }
155
156 /* static */
GetSubString(const EcmaVM *vm, const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)157 EcmaString *EcmaString::GetSubString(const EcmaVM *vm,
158 const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
159 {
160 ASSERT((start + length) <= src->GetLength());
161 if (length == 1) {
162 JSThread *thread = vm->GetJSThread();
163 uint16_t res = EcmaStringAccessor(src).Get<false>(start);
164 if (EcmaStringAccessor::CanBeCompressed(&res, 1)) {
165 JSHandle<SingleCharTable> singleCharTable(thread, thread->GetSingleCharTable());
166 return EcmaString::Cast(singleCharTable->GetStringFromSingleCharTable(res).GetTaggedObject());
167 }
168 }
169 if (static_cast<uint32_t>(length) >= SlicedString::MIN_SLICED_ECMASTRING_LENGTH) {
170 if (start == 0 && length == src->GetLength()) {
171 return *src;
172 }
173 if (src->IsUtf16()) {
174 FlatStringInfo srcFlat = FlattenAllString(vm, src);
175 bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
176 if (canBeCompressed) {
177 JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed));
178 srcFlat = FlattenAllString(vm, src);
179 CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length);
180 return *string;
181 }
182 }
183 return GetSlicedString(vm, src, start, length);
184 }
185 return FastSubString(vm, src, start, length);
186 }
187
WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length)188 void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length)
189 {
190 ASSERT(IsLineString() && !IsConstantString());
191 if (IsUtf8()) {
192 ASSERT(src->IsUtf8());
193 CVector<uint8_t> buf;
194 const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
195 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
196 if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) {
197 LOG_FULL(FATAL) << "memcpy_s failed";
198 UNREACHABLE();
199 }
200 } else if (src->IsUtf8()) {
201 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
202 CVector<uint8_t> buf;
203 const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
204 Span<uint16_t> to(GetDataUtf16Writable() + start, length);
205 Span<const uint8_t> from(data, length);
206 for (uint32_t i = 0; i < length; i++) {
207 to[i] = from[i];
208 }
209 } else {
210 CVector<uint16_t> buf;
211 const uint16_t *data = EcmaString::GetUtf16DataFlat(src, buf);
212 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
213 if (length != 0 && memcpy_s(GetDataUtf16Writable() + start,
214 destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
215 LOG_FULL(FATAL) << "memcpy_s failed";
216 UNREACHABLE();
217 }
218 }
219 }
220
221 template<typename T1, typename T2>
CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)222 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
223 {
224 for (int32_t i = 0; i < count; ++i) {
225 auto left = static_cast<int32_t>(lhsSp[i]);
226 auto right = static_cast<int32_t>(rhsSp[i]);
227 if (left != right) {
228 return left - right;
229 }
230 }
231 return 0;
232 }
233
Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)234 int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)
235 {
236 if (*left == *right) {
237 return 0;
238 }
239 FlatStringInfo lhs = FlattenAllString(vm, left);
240 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
241 FlatStringInfo rhs = FlattenAllString(vm, right);
242 lhs.SetString(*string);
243 int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
244 int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
245 int32_t countDiff = lhsCount - rhsCount;
246 int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
247 if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
248 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
249 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
250 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
251 if (charDiff != 0) {
252 return charDiff;
253 }
254 } else if (!lhs.IsUtf16()) {
255 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
256 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
257 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
258 if (charDiff != 0) {
259 return charDiff;
260 }
261 } else if (!rhs.IsUtf16()) {
262 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), rhsCount);
263 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), lhsCount);
264 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
265 if (charDiff != 0) {
266 return charDiff;
267 }
268 } else {
269 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
270 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
271 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
272 if (charDiff != 0) {
273 return charDiff;
274 }
275 }
276 return countDiff;
277 }
278
279 template<typename T1, typename T2>
IsSubStringAtSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, uint32_t offset)280 bool IsSubStringAtSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, uint32_t offset)
281 {
282 int rhsSize = static_cast<int>(rhsSp.size());
283 ASSERT(rhsSize + offset <= lhsSp.size());
284 for (int i = 0; i < rhsSize; ++i) {
285 auto left = static_cast<int32_t>(lhsSp[offset + static_cast<uint32_t>(i)]);
286 auto right = static_cast<int32_t>(rhsSp[i]);
287 if (left != right) {
288 return false;
289 }
290 }
291 return true;
292 }
293
294
295 /**
296 * left: text string
297 * right: pattern string
298 * example 1: IsSubStringAt("IsSubStringAt", "Is", 0) return true
299 * example 2: IsSubStringAt("IsSubStringAt", "It", 0) return false
300 */
IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right, uint32_t offset)301 bool EcmaString::IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
302 const JSHandle<EcmaString>& right, uint32_t offset)
303 {
304 FlatStringInfo lhs = FlattenAllString(vm, left);
305 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
306 FlatStringInfo rhs = FlattenAllString(vm, right);
307 lhs.SetString(*string);
308 int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
309 int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
310 if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
311 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
312 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
313 return IsSubStringAtSpan(lhsSp, rhsSp, offset);
314 } else if (!lhs.IsUtf16()) {
315 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
316 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
317 return IsSubStringAtSpan(lhsSp, rhsSp, offset);
318 } else if (!rhs.IsUtf16()) {
319 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
320 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
321 return IsSubStringAtSpan(lhsSp, rhsSp, offset);
322 } else {
323 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
324 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
325 return IsSubStringAtSpan(lhsSp, rhsSp, offset);
326 }
327 return false;
328 }
329
330 /* static */
331 template<typename T1, typename T2>
IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)332 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
333 {
334 ASSERT(rhsSp.size() > 0);
335 auto first = static_cast<int32_t>(rhsSp[0]);
336 for (int32_t i = pos; i <= max; i++) {
337 if (static_cast<int32_t>(lhsSp[i]) != first) {
338 i++;
339 while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
340 i++;
341 }
342 }
343 /* Found first character, now look at the rest of rhsSp */
344 if (i <= max) {
345 int j = i + 1;
346 int end = j + static_cast<int>(rhsSp.size()) - 1;
347
348 for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
349 }
350 if (j == end) {
351 /* Found whole string. */
352 return i;
353 }
354 }
355 }
356 return -1;
357 }
358
359 template<typename T1, typename T2>
LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)360 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
361 {
362 int rhsSize = static_cast<int>(rhsSp.size());
363 ASSERT(rhsSize > 0);
364 auto first = rhsSp[0];
365 for (int32_t i = pos; i >= 0; i--) {
366 if (lhsSp[i] != first) {
367 continue;
368 }
369 /* Found first character, now look at the rest of rhsSp */
370 int j = 1;
371 while (j < rhsSize) {
372 if (rhsSp[j] != lhsSp[i + j]) {
373 break;
374 }
375 j++;
376 }
377 if (j == rhsSize) {
378 return i;
379 }
380 }
381 return -1;
382 }
383
IndexOf(const EcmaVM *vm, const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)384 int32_t EcmaString::IndexOf(const EcmaVM *vm,
385 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
386 {
387 EcmaString *lhstring = *receiver;
388 EcmaString *rhstring = *search;
389 if (lhstring == nullptr || rhstring == nullptr) {
390 return -1;
391 }
392 int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
393 int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
394
395 if (pos > lhsCount) {
396 return -1;
397 }
398
399 if (rhsCount == 0) {
400 return pos;
401 }
402
403 if (pos < 0) {
404 pos = 0;
405 }
406
407 int32_t max = lhsCount - rhsCount;
408 if (max < 0) {
409 return -1;
410 }
411
412 if (pos + rhsCount > lhsCount) {
413 return -1;
414 }
415
416 FlatStringInfo lhs = FlattenAllString(vm, receiver);
417 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
418 FlatStringInfo rhs = FlattenAllString(vm, search);
419 lhs.SetString(*string);
420
421 if (rhs.IsUtf8() && lhs.IsUtf8()) {
422 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
423 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
424 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
425 } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return)
426 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
427 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
428 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
429 } else if (rhs.IsUtf16()) {
430 return -1;
431 } else { // NOLINT(readability-else-after-return)
432 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
433 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
434 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
435 }
436 }
437
LastIndexOf(const EcmaVM *vm, const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)438 int32_t EcmaString::LastIndexOf(const EcmaVM *vm,
439 const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
440 {
441 EcmaString *lhstring = *receiver;
442 EcmaString *rhstring = *search;
443 if (lhstring == nullptr || rhstring == nullptr) {
444 return -1;
445 }
446
447 int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
448 int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
449 if (lhsCount < rhsCount) {
450 return -1;
451 }
452
453 if (pos < 0) {
454 pos = 0;
455 }
456
457 if (pos > lhsCount) {
458 pos = lhsCount;
459 }
460
461 if (pos + rhsCount > lhsCount) {
462 pos = lhsCount - rhsCount;
463 }
464
465 if (rhsCount == 0) {
466 return pos;
467 }
468
469 FlatStringInfo lhs = FlattenAllString(vm, receiver);
470 JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
471 FlatStringInfo rhs = FlattenAllString(vm, search);
472 lhs.SetString(*string);
473 if (rhs.IsUtf8() && lhs.IsUtf8()) {
474 Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
475 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
476 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
477 } else if (rhs.IsUtf16() && lhs.IsUtf16()) { // NOLINT(readability-else-after-return)
478 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
479 Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
480 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
481 } else if (rhs.IsUtf16()) {
482 return -1;
483 } else { // NOLINT(readability-else-after-return)
484 Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
485 Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
486 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
487 }
488 }
489
ToU16String(uint32_t len)490 std::u16string EcmaString::ToU16String(uint32_t len)
491 {
492 uint32_t length = len > 0 ? len : GetLength();
493 std::u16string result;
494 if (IsUtf16()) {
495 CVector<uint16_t> buf;
496 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
497 result = base::StringHelper::Utf16ToU16String(data, length);
498 } else {
499 CVector<uint8_t> buf;
500 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
501 result = base::StringHelper::Utf8ToU16String(data, length);
502 }
503 return result;
504 }
505
506 //static
CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString, const JSHandle<EcmaString> &secondString)507 uint32_t EcmaString::CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString,
508 const JSHandle<EcmaString> &secondString)
509 {
510 uint32_t hashCode;
511 uint32_t firstLength = firstString->GetLength();
512 uint32_t secondLength = secondString->GetLength();
513 if ((firstLength + secondLength < MAX_ELEMENT_INDEX_LEN) &&
514 firstString->IsUtf8() && secondString->IsUtf8() &&
515 firstString->IsInteger() && secondString->IsInteger()) {
516 firstString->HashIntegerString(firstLength, &hashCode, 0);
517 secondString->HashIntegerString(secondLength, &hashCode, hashCode);
518 return hashCode;
519 }
520 hashCode = EcmaString::CalculateConcatHashCode(firstString, secondString);
521 hashCode = MixHashcode(hashCode, NOT_INTEGER);
522 return hashCode;
523 }
524
525 // static
526 template<typename T1, typename T2>
CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst, const T2 *dataSecond, size_t sizeSecond)527 uint32_t EcmaString::CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst,
528 const T2 *dataSecond, size_t sizeSecond)
529 {
530 uint32_t totalHash = 0;
531 constexpr uint32_t hashShift = static_cast<uint32_t>(EcmaStringHash::HASH_SHIFT);
532 constexpr uint32_t blockSize = static_cast<size_t>(EcmaStringHash::BLOCK_SIZE);
533 // The concatenated length of the two strings is less than MIN_SIZE_FOR_UNROLLING.
534 if (sizeFirst + sizeSecond <= static_cast<size_t>(EcmaStringHash::MIN_SIZE_FOR_UNROLLING)) {
535 for (uint32_t i = 0; i < sizeFirst; i++) {
536 totalHash = (totalHash << hashShift) - totalHash + dataFirst[i];
537 }
538 for (uint32_t i = 0; i < sizeSecond; i++) {
539 totalHash = (totalHash << hashShift) - totalHash + dataSecond[i];
540 }
541 return totalHash;
542 }
543 // Process the entire block of the first string.
544 uint32_t hash[blockSize] = {0};
545 uint32_t index = 0;
546 for (; index + blockSize <= sizeFirst; index += blockSize) {
547 hash[0] = (hash[0] << hashShift) - hash[0] + dataFirst[index];
548 hash[1] = (hash[1] << hashShift) - hash[1] + dataFirst[index + 1]; // 1: the second element
549 hash[2] = (hash[2] << hashShift) - hash[2] + dataFirst[index + 2]; // 2: the third element
550 hash[3] = (hash[3] << hashShift) - hash[3] + dataFirst[index + 3]; // 3: the fourth element
551 }
552 // The remaining total string length is less than a whole block.
553 if ((sizeFirst % blockSize) + sizeSecond < blockSize) {
554 for (; index < sizeFirst; ++index) {
555 hash[0] = (hash[0] << hashShift) - hash[0] + dataFirst[index];
556 }
557 index = 0;
558 } else {
559 //Calculate the non-integral block portion at the end of the first string.
560 for (; index < sizeFirst; ++index) {
561 hash[index % blockSize] = (hash[index % blockSize] << hashShift) -
562 hash[index % blockSize] + dataFirst[index];
563 }
564 //Calculate the portion of the second string
565 //that starts and aligns with an integral block at the end of the first string.
566 uint32_t wholeBlockRemain = (blockSize - sizeFirst % blockSize) % blockSize;
567 index = 0;
568 for (; index < wholeBlockRemain && index < sizeSecond; ++index) {
569 uint32_t nowHashIndex = sizeFirst % blockSize + index;
570 hash[nowHashIndex] = (hash[nowHashIndex] << hashShift) - hash[nowHashIndex] + dataSecond[index];
571 }
572 // Process the entire block of the Second string.
573 for (; index + blockSize <= sizeSecond; index += blockSize) {
574 hash[0] = (hash[0] << hashShift) - hash[0] + dataSecond[index];
575 hash[1] = (hash[1] << hashShift) - hash[1] + dataSecond[index + 1]; // 1: the second element
576 hash[2] = (hash[2] << hashShift) - hash[2] + dataSecond[index + 2]; // 2: the third element
577 hash[3] = (hash[3] << hashShift) - hash[3] + dataSecond[index + 3]; // 3: the fourth element
578 }
579 }
580 for (; index < sizeSecond; ++index) {
581 hash[0] = (hash[0] << hashShift) - hash[0] + dataSecond[index];
582 }
583 for (uint32_t i = 0; i < blockSize; ++i) {
584 totalHash = (totalHash << hashShift) - totalHash + hash[i];
585 }
586 return totalHash;
587 }
588
589 // static
CalculateConcatHashCode(const JSHandle<EcmaString> &firstString, const JSHandle<EcmaString> &secondString)590 uint32_t EcmaString::CalculateConcatHashCode(const JSHandle<EcmaString> &firstString,
591 const JSHandle<EcmaString> &secondString)
592 {
593 bool isFirstStringUtf8 = EcmaStringAccessor(firstString).IsUtf8();
594 bool isSecondStringUtf8 = EcmaStringAccessor(secondString).IsUtf8();
595 EcmaString *firstStr = *firstString;
596 EcmaString *secondStr = *secondString;
597 CVector<uint8_t> bufFirstUint8;
598 CVector<uint8_t> bufSecondUint8;
599 CVector<uint16_t> bufFirstUint16;
600 CVector<uint16_t> bufSecondUint16;
601 if (isFirstStringUtf8 && isSecondStringUtf8) {
602 const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(firstStr, bufFirstUint8);
603 const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(secondStr, bufSecondUint8);
604 return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
605 dataSecond, secondStr->GetLength());
606 }
607 if (!isFirstStringUtf8 && isSecondStringUtf8) {
608 const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(firstStr, bufFirstUint16);
609 const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(secondStr, bufSecondUint8);
610 return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
611 dataSecond, secondStr->GetLength());
612 }
613 if (isFirstStringUtf8 && !isSecondStringUtf8) {
614 const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(firstStr, bufFirstUint8);
615 const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(secondStr, bufSecondUint16);
616 return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
617 dataSecond, secondStr->GetLength());
618 }
619 {
620 const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(firstStr, bufFirstUint16);
621 const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(secondStr, bufSecondUint16);
622 return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
623 dataSecond, secondStr->GetLength());
624 }
625 }
626
627 // static
CanBeCompressed(const EcmaString *string)628 bool EcmaString::CanBeCompressed(const EcmaString *string)
629 {
630 ASSERT(string->IsLineOrConstantString());
631 if (string->IsUtf8()) {
632 return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
633 }
634 return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
635 }
636
637 // static
CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)638 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
639 {
640 uint32_t index = 0;
641 for (; index + 4 <= utf8Len; index += 4) { // 4: process the data in chunks of 4 elements to improve speed
642 // Check if all four characters in the current block are ASCII characters
643 if (!IsASCIICharacter(utf8Data[index]) ||
644 !IsASCIICharacter(utf8Data[index + 1]) || // 1: the second element of the block
645 !IsASCIICharacter(utf8Data[index + 2]) || // 2: the third element of the block
646 !IsASCIICharacter(utf8Data[index + 3])) { // 3: the fourth element of the block
647 return false;
648 }
649 }
650 // Check remaining characters if they are ASCII
651 for (; index < utf8Len; ++index) {
652 if (!IsASCIICharacter(utf8Data[index])) {
653 return false;
654 }
655 }
656 return true;
657 }
658
659 /* static */
CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)660 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
661 {
662 uint32_t index = 0;
663 for (; index + 4 <= utf16Len; index += 4) { // 4: process the data in chunks of 4 elements to improve speed
664 // Check if all four characters in the current block are ASCII characters
665 if (!IsASCIICharacter(utf16Data[index]) ||
666 !IsASCIICharacter(utf16Data[index + 1]) || // 1: the second element of the block
667 !IsASCIICharacter(utf16Data[index + 2]) || // 2: the third element of the block
668 !IsASCIICharacter(utf16Data[index + 3])) { // 3: the fourth element of the block
669 return false;
670 }
671 }
672 // Check remaining characters if they are ASCII
673 for (; index < utf16Len; ++index) {
674 if (!IsASCIICharacter(utf16Data[index])) {
675 return false;
676 }
677 }
678 return true;
679 }
680
EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)681 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
682 {
683 ASSERT(NotTreeString());
684 ASSERT(str1->NotTreeString() && str2->NotTreeString());
685 if (GetLength() != str1->GetLength() + str2->GetLength()) {
686 return false;
687 }
688 if (IsUtf16()) {
689 CVector<uint16_t> buf;
690 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
691 if (EcmaString::StringsAreEqualUtf16(str1, data, str1->GetLength())) {
692 return EcmaString::StringsAreEqualUtf16(str2, data + str1->GetLength(), str2->GetLength());
693 }
694 } else {
695 CVector<uint8_t> buf;
696 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
697 if (EcmaString::StringIsEqualUint8Data(str1, data, str1->GetLength(), this->IsUtf8())) {
698 return EcmaString::StringIsEqualUint8Data(str2, data + str1->GetLength(),
699 str2->GetLength(), this->IsUtf8());
700 }
701 }
702 return false;
703 }
704
705 /* static */
StringsAreEqualDiffUtfEncoding(EcmaString *left, EcmaString *right)706 bool EcmaString::StringsAreEqualDiffUtfEncoding(EcmaString *left, EcmaString *right)
707 {
708 CVector<uint16_t> bufLeftUft16;
709 CVector<uint16_t> bufRightUft16;
710 CVector<uint8_t> bufLeftUft8;
711 CVector<uint8_t> bufRightUft8;
712 int32_t lhsCount = static_cast<int32_t>(left->GetLength());
713 int32_t rhsCount = static_cast<int32_t>(right->GetLength());
714 if (!left->IsUtf16() && !right->IsUtf16()) {
715 const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
716 const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
717 Span<const uint8_t> lhsSp(data1, lhsCount);
718 Span<const uint8_t> rhsSp(data2, rhsCount);
719 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
720 } else if (!left->IsUtf16()) {
721 const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
722 const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
723 Span<const uint8_t> lhsSp(data1, lhsCount);
724 Span<const uint16_t> rhsSp(data2, rhsCount);
725 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
726 } else if (!right->IsUtf16()) {
727 const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
728 const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
729 Span<const uint16_t> lhsSp(data1, lhsCount);
730 Span<const uint8_t> rhsSp(data2, rhsCount);
731 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
732 } else {
733 const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
734 const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
735 Span<const uint16_t> lhsSp(data1, lhsCount);
736 Span<const uint16_t> rhsSp(data2, rhsCount);
737 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
738 }
739 }
740
741 /* static */
StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right)742 bool EcmaString::StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right)
743 {
744 int32_t lhsCount = static_cast<int32_t>(left.GetLength());
745 int32_t rhsCount = static_cast<int32_t>(right.GetLength());
746 if (!left.IsUtf16() && !right.IsUtf16()) {
747 Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
748 Span<const uint8_t> rhsSp(right.GetDataUtf8(), rhsCount);
749 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
750 } else if (!left.IsUtf16()) {
751 Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
752 Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
753 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
754 } else if (!right.IsUtf16()) {
755 Span<const uint16_t> lhsSp(left.GetDataUtf16(), rhsCount);
756 Span<const uint8_t> rhsSp(right.GetDataUtf8(), lhsCount);
757 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
758 } else {
759 Span<const uint16_t> lhsSp(left.GetDataUtf16(), lhsCount);
760 Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
761 return EcmaString::StringsAreEquals(lhsSp, rhsSp);
762 }
763 }
764
StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)765 bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
766 {
767 if (str1 == str2) {
768 return true;
769 }
770 if (str1->IsInternString() && str2->IsInternString()) {
771 return false;
772 }
773 uint32_t str1Len = str1->GetLength();
774 if (str1Len != str2->GetLength()) {
775 return false;
776 }
777 if (str1Len == 0) {
778 return true;
779 }
780
781 uint32_t str1Hash;
782 uint32_t str2Hash;
783 if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
784 if (str1Hash != str2Hash) {
785 return false;
786 }
787 }
788 FlatStringInfo str1Flat = FlattenAllString(vm, str1);
789 JSHandle<EcmaString> string(vm->GetJSThread(), str1Flat.GetString());
790 FlatStringInfo str2Flat = FlattenAllString(vm, str2);
791 str1Flat.SetString(*string);
792 return StringsAreEqualDiffUtfEncoding(str1Flat, str2Flat);
793 }
794
795 /* static */
StringsAreEqual(EcmaString *str1, EcmaString *str2)796 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
797 {
798 if (str1 == str2) {
799 return true;
800 }
801 uint32_t str1Len = str1->GetLength();
802 if (str1Len != str2->GetLength()) {
803 return false;
804 }
805 if (str1Len == 0) {
806 return true;
807 }
808
809 uint32_t str1Hash;
810 uint32_t str2Hash;
811 if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
812 if (str1Hash != str2Hash) {
813 return false;
814 }
815 }
816 return StringsAreEqualDiffUtfEncoding(str1, str2);
817 }
818
819 /* static */
StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen, bool canBeCompressToUtf8)820 bool EcmaString::StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
821 bool canBeCompressToUtf8)
822 {
823 if (!str1->IsSlicedString() && canBeCompressToUtf8 != str1->IsUtf8()) {
824 return false;
825 }
826 if (canBeCompressToUtf8 && str1->GetLength() != dataLen) {
827 return false;
828 }
829 if (str1->IsUtf8()) {
830 CVector<uint8_t> buf;
831 Span<const uint8_t> data1(EcmaString::GetUtf8DataFlat(str1, buf), dataLen);
832 Span<const uint8_t> data2(dataAddr, dataLen);
833 return EcmaString::StringsAreEquals(data1, data2);
834 }
835 CVector<uint16_t> buf;
836 uint32_t length = str1->GetLength();
837 const uint16_t *data = EcmaString::GetUtf16DataFlat(str1, buf);
838 return IsUtf8EqualsUtf16(dataAddr, dataLen, data, length);
839 }
840
841 /* static */
StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)842 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
843 {
844 uint32_t length = str1->GetLength();
845 if (length != utf16Len) {
846 return false;
847 }
848 if (str1->IsUtf8()) {
849 CVector<uint8_t> buf;
850 const uint8_t *data = EcmaString::GetUtf8DataFlat(str1, buf);
851 return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len);
852 } else {
853 CVector<uint16_t> buf;
854 Span<const uint16_t> data1(EcmaString::GetUtf16DataFlat(str1, buf), length);
855 Span<const uint16_t> data2(utf16Data, utf16Len);
856 return EcmaString::StringsAreEquals(data1, data2);
857 }
858 }
859
860 template<typename T>
MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)861 bool EcmaString::MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
862 {
863 ASSERT(dstMax >= count);
864 ASSERT(dst.Size() >= src.Size());
865 if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
866 LOG_FULL(FATAL) << "memcpy_s failed";
867 UNREACHABLE();
868 }
869 return true;
870 }
871
HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const872 bool EcmaString::HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const
873 {
874 ASSERT(length >= 0);
875 Span<const uint8_t> str = FastToUtf8Span();
876 return HashIntegerString(str.data(), length, hash, hashSeed);
877 }
878
ComputeHashcode() const879 uint32_t EcmaString::ComputeHashcode() const
880 {
881 auto [hash, isInteger] = ComputeRawHashcode();
882 return MixHashcode(hash, isInteger);
883 }
884
885 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeRawHashcode() const886 std::pair<uint32_t, bool> EcmaString::ComputeRawHashcode() const
887 {
888 uint32_t hash = 0;
889 uint32_t length = GetLength();
890 if (length == 0) {
891 return {hash, false};
892 }
893
894 if (IsUtf8()) {
895 // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
896 if (length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, 0)) {
897 return {hash, true};
898 }
899 CVector<uint8_t> buf;
900 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
901 // String can not convert to integer number, using normal hashcode computing algorithm.
902 hash = this->ComputeHashForData(data, length, 0);
903 return {hash, false};
904 } else {
905 CVector<uint16_t> buf;
906 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
907 // If rawSeed has certain value, and second string uses UTF16 encoding,
908 // then merged string can not be small integer number.
909 hash = this->ComputeHashForData(data, length, 0);
910 return {hash, false};
911 }
912 }
913
914 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const915 uint32_t EcmaString::ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const
916 {
917 uint32_t hash;
918 uint32_t length = GetLength();
919 if (length == 0) {
920 return MixHashcode(rawHashSeed, isInteger);
921 }
922
923 if (IsUtf8()) {
924 // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
925 if ((rawHashSeed == 0 || isInteger) &&
926 length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, rawHashSeed)) {
927 return hash;
928 }
929 CVector<uint8_t> buf;
930 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
931 // String can not convert to integer number, using normal hashcode computing algorithm.
932 hash = this->ComputeHashForData(data, length, rawHashSeed);
933 return MixHashcode(hash, NOT_INTEGER);
934 } else {
935 CVector<uint16_t> buf;
936 const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
937 // If rawSeed has certain value, and second string uses UTF16 encoding,
938 // then merged string can not be small integer number.
939 hash = this->ComputeHashForData(data, length, rawHashSeed);
940 return MixHashcode(hash, NOT_INTEGER);
941 }
942 }
943
944 /* static */
ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)945 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
946 {
947 uint32_t mixHash = 0;
948 if (canBeCompress) {
949 // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
950 if (utf8Len < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf8Data, utf8Len, &mixHash, 0)) {
951 return mixHash;
952 }
953 uint32_t hash = ComputeHashForData(utf8Data, utf8Len, 0);
954 return MixHashcode(hash, NOT_INTEGER);
955 } else {
956 auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
957 CVector<uint16_t> tmpBuffer(utf16Len);
958 [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
959 utf16Len);
960 ASSERT(len == utf16Len);
961 uint32_t hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
962 return MixHashcode(hash, NOT_INTEGER);
963 }
964 LOG_ECMA(FATAL) << "this branch is unreachable";
965 UNREACHABLE();
966 }
967
968 /* static */
ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)969 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
970 {
971 uint32_t mixHash = 0;
972 // String length smaller than 10, try to compute integer hash.
973 if (length < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf16Data, length, &mixHash, 0)) {
974 return mixHash;
975 }
976 uint32_t hash = ComputeHashForData(utf16Data, length, 0);
977 return MixHashcode(hash, NOT_INTEGER);
978 }
979
980 // drop the tail bytes if the remain length can't fill the length it represents.
FixUtf8Len(const uint8_t* utf8, size_t utf8Len)981 static size_t FixUtf8Len(const uint8_t* utf8, size_t utf8Len)
982 {
983 constexpr size_t TWO_BYTES_LENGTH = 2;
984 constexpr size_t THREE_BYTES_LENGTH = 3;
985 size_t trimSize = 0;
986 if (utf8Len >= 1 && utf8[utf8Len - 1] >= 0xC0) {
987 // The last one char claim there are more than 1 byte next to it, it's invalid, so drop the last one.
988 trimSize = 1;
989 }
990 if (utf8Len >= TWO_BYTES_LENGTH && utf8[utf8Len - TWO_BYTES_LENGTH] >= 0xE0) {
991 // The second to last char claim there are more than 2 bytes next to it, it's invalid, so drop the last two.
992 trimSize = TWO_BYTES_LENGTH;
993 }
994 if (utf8Len >= THREE_BYTES_LENGTH && utf8[utf8Len - THREE_BYTES_LENGTH] >= 0xF0) {
995 // The third to last char claim there are more than 3 bytes next to it, it's invalid, so drop the last three.
996 trimSize = THREE_BYTES_LENGTH;
997 }
998 return utf8Len - trimSize;
999 }
1000
1001
1002 /* static */
IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data, uint32_t utf16Len)1003 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len,
1004 const uint16_t *utf16Data, uint32_t utf16Len)
1005 {
1006 size_t safeUtf8Len = FixUtf8Len(utf8Data, utf8Len);
1007 const uint8_t *utf8End = utf8Data + utf8Len;
1008 const uint8_t *utf8SafeEnd = utf8Data + safeUtf8Len;
1009 const uint16_t *utf16End = utf16Data + utf16Len;
1010 while (utf8Data < utf8SafeEnd && utf16Data < utf16End) {
1011 uint8_t src = *utf8Data;
1012 switch (src & 0xF0) {
1013 case 0xF0: {
1014 const uint8_t c2 = *(++utf8Data);
1015 const uint8_t c3 = *(++utf8Data);
1016 const uint8_t c4 = *(++utf8Data);
1017 uint32_t codePoint = ((src & LOW_3BITS) << OFFSET_18POS) | ((c2 & LOW_6BITS) << OFFSET_12POS) |
1018 ((c3 & LOW_6BITS) << OFFSET_6POS) | (c4 & LOW_6BITS);
1019 if (codePoint >= SURROGATE_RAIR_START) {
1020 if (utf16Data >= utf16End - 1) {
1021 return false;
1022 }
1023 codePoint -= SURROGATE_RAIR_START;
1024 if (*utf16Data++ != static_cast<uint16_t>((codePoint >> OFFSET_10POS) | H_SURROGATE_START)) {
1025 return false;
1026 } else if (*utf16Data++ != static_cast<uint16_t>((codePoint & 0x3FF) | L_SURROGATE_START)) {
1027 return false;
1028 }
1029 } else {
1030 if (*utf16Data++ != static_cast<uint16_t>(codePoint)) {
1031 return false;
1032 }
1033 }
1034 utf8Data++;
1035 break;
1036 }
1037 case 0xE0: {
1038 const uint8_t c2 = *(++utf8Data);
1039 const uint8_t c3 = *(++utf8Data);
1040 if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_4BITS) << OFFSET_12POS) |
1041 ((c2 & LOW_6BITS) << OFFSET_6POS) | (c3 & LOW_6BITS))) {
1042 return false;
1043 }
1044 utf8Data++;
1045 break;
1046 }
1047 case 0xD0:
1048 case 0xC0: {
1049 const uint8_t c2 = *(++utf8Data);
1050 if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_5BITS) << OFFSET_6POS) | (c2 & LOW_6BITS))) {
1051 return false;
1052 }
1053 utf8Data++;
1054 break;
1055 }
1056 default:
1057 do {
1058 if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
1059 return false;
1060 }
1061 } while (utf8Data < utf8SafeEnd && utf16Data < utf16End && *utf8Data < 0x80);
1062 break;
1063 }
1064 }
1065 // The remain chars should be treated as single byte char.
1066 while (utf8Data < utf8End && utf16Data < utf16End) {
1067 if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
1068 return false;
1069 }
1070 }
1071 return utf8Data == utf8End && utf16Data == utf16End;
1072 }
1073
ToElementIndex(uint32_t *index)1074 bool EcmaString::ToElementIndex(uint32_t *index)
1075 {
1076 uint32_t len = GetLength();
1077 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers)
1078 return false;
1079 }
1080 if (UNLIKELY(IsUtf16())) {
1081 return false;
1082 }
1083
1084 // fast path: get integer from string's hash value
1085 if (TryToGetInteger(index)) {
1086 return true;
1087 }
1088
1089 CVector<uint8_t> buf;
1090 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1091 uint32_t c = data[0];
1092 uint64_t n = 0;
1093 if (c == '0') {
1094 *index = 0;
1095 return len == 1;
1096 }
1097 uint32_t loopStart = 0;
1098 if (ToUInt64FromLoopStart(&n, loopStart, data) && n < JSObject::MAX_ELEMENT_INDEX) {
1099 *index = n;
1100 return true;
1101 }
1102 return false;
1103 }
1104
ToInt(int32_t *index, bool *negative)1105 bool EcmaString::ToInt(int32_t *index, bool *negative)
1106 {
1107 uint32_t len = GetLength();
1108 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers)
1109 return false;
1110 }
1111 if (UNLIKELY(IsUtf16())) {
1112 return false;
1113 }
1114 CVector<uint8_t> buf;
1115 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1116 uint32_t c = data[0];
1117 uint32_t loopStart = 0;
1118 uint64_t n = 0;
1119 if (c == '0') {
1120 *index = 0;
1121 return len == 1;
1122 }
1123 if (c == '-' && len > 1) {
1124 *negative = true;
1125 loopStart = 1;
1126 }
1127
1128 if (ToUInt64FromLoopStart(&n, loopStart, data) && n <= std::numeric_limits<int32_t>::max()) {
1129 *index = *negative ? -n : n;
1130 return true;
1131 }
1132 return false;
1133 }
1134
ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data)1135 bool EcmaString::ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data)
1136 {
1137 uint64_t n = 0;
1138 uint32_t len = GetLength();
1139 if (UNLIKELY(loopStart >= len)) {
1140 return false;
1141 }
1142 for (uint32_t i = loopStart; i < len; i++) {
1143 uint32_t c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1144 if (c < '0' || c > '9') {
1145 return false;
1146 }
1147 // NOLINTNEXTLINE(readability-magic-numbers)
1148 n = n * 10 + (c - '0'); // 10: decimal factor
1149 }
1150 *index = n;
1151 return true;
1152 }
1153
ToTypedArrayIndex(uint32_t *index)1154 bool EcmaString::ToTypedArrayIndex(uint32_t *index)
1155 {
1156 uint32_t len = GetLength();
1157 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
1158 return false;
1159 }
1160 if (UNLIKELY(IsUtf16())) {
1161 return false;
1162 }
1163
1164 CVector<uint8_t> buf;
1165 const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1166 uint32_t c = data[0]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1167 uint64_t n = 0;
1168 if (c == '0') {
1169 *index = 0;
1170 return len == 1;
1171 }
1172 if (c > '0' && c <= '9') {
1173 n = c - '0';
1174 for (uint32_t i = 1; i < len; i++) {
1175 c = data[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1176 if (c >= '0' && c <= '9') {
1177 // NOLINTNEXTLINE(readability-magic-numbers)
1178 n = n * 10 + (c - '0'); // 10: decimal factor
1179 } else if (c == '.') {
1180 n = JSObject::MAX_ELEMENT_INDEX;
1181 break;
1182 } else {
1183 return false;
1184 }
1185 }
1186 if (n < JSObject::MAX_ELEMENT_INDEX) {
1187 *index = n;
1188 return true;
1189 } else {
1190 *index = JSObject::MAX_ELEMENT_INDEX;
1191 return true;
1192 }
1193 } else if (c == '-') {
1194 *index = JSObject::MAX_ELEMENT_INDEX;
1195 return true;
1196 }
1197 return false;
1198 }
1199
1200 template<typename T>
TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)1201 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
1202 {
1203 uint32_t srcLen = src->GetLength();
1204 int32_t start = 0;
1205 int32_t end = static_cast<int32_t>(srcLen) - 1;
1206
1207 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
1208 start = static_cast<int32_t>(base::StringHelper::GetStart(data, srcLen));
1209 }
1210 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
1211 end = base::StringHelper::GetEnd(data, start, srcLen);
1212 }
1213 EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast<uint32_t>(end - start + 1));
1214 return res;
1215 }
1216
1217 /* static */
ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)1218 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1219 {
1220 auto srcFlat = FlattenAllString(vm, src);
1221 uint32_t srcLength = srcFlat.GetLength();
1222 auto factory = vm->GetFactory();
1223 if (srcFlat.IsUtf16()) {
1224 std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1225 std::string res = base::StringHelper::ToLower(u16str);
1226 return *(factory->NewFromStdString(res));
1227 } else {
1228 return ConvertUtf8ToLowerOrUpper(vm, src, true);
1229 }
1230 }
1231
1232 /* static */
TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)1233 EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1234 {
1235 auto srcFlat = FlattenAllString(vm, src);
1236 uint32_t srcLength = srcFlat.GetLength();
1237 const char start = 'A';
1238 const char end = 'Z';
1239 uint32_t upperIndex = srcLength;
1240 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1241 for (uint32_t index = 0; index < srcLength; ++index) {
1242 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1243 upperIndex = index;
1244 break;
1245 }
1246 }
1247 if (upperIndex == srcLength) {
1248 return *src;
1249 }
1250 return ConvertUtf8ToLowerOrUpper(vm, src, true, upperIndex);
1251 }
1252
1253 /* static */
TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)1254 EcmaString *EcmaString::TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1255 {
1256 auto srcFlat = FlattenAllString(vm, src);
1257 uint32_t srcLength = srcFlat.GetLength();
1258 const char start = 'a';
1259 const char end = 'z';
1260 uint32_t lowerIndex = srcLength;
1261 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1262 for (uint32_t index = 0; index < srcLength; ++index) {
1263 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1264 lowerIndex = index;
1265 break;
1266 }
1267 }
1268 if (lowerIndex == srcLength) {
1269 return *src;
1270 }
1271 return ConvertUtf8ToLowerOrUpper(vm, src, false, lowerIndex);
1272 }
1273
1274 /* static */
ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, bool toLower, uint32_t startIndex)1275 EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
1276 bool toLower, uint32_t startIndex)
1277 {
1278 const char start = toLower ? 'A' : 'a';
1279 const char end = toLower ? 'Z' : 'z';
1280 uint32_t srcLength = src->GetLength();
1281 JSHandle<EcmaString> newString(vm->GetJSThread(), CreateLineString(vm, srcLength, true));
1282 auto srcFlat = FlattenAllString(vm, src);
1283 Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1284 auto newStringPtr = newString->GetDataUtf8Writable();
1285 if (startIndex > 0) {
1286 if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) {
1287 LOG_FULL(FATAL) << "memcpy_s failed";
1288 UNREACHABLE();
1289 }
1290 }
1291 for (uint32_t index = startIndex; index < srcLength; ++index) {
1292 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1293 *(newStringPtr + index) = data[index] ^ (1 << 5); // 1 and 5 means lower to upper or upper to lower
1294 } else {
1295 *(newStringPtr + index) = data[index];
1296 }
1297 }
1298 return *newString;
1299 }
1300
1301 /* static */
ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)1302 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1303 {
1304 FlatStringInfo srcFlat = FlattenAllString(vm, src);
1305 uint32_t srcLength = srcFlat.GetLength();
1306 auto factory = vm->GetFactory();
1307 if (srcFlat.IsUtf16()) {
1308 std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1309 std::string res = base::StringHelper::ToUpper(u16str);
1310 return *(factory->NewFromStdString(res));
1311 } else {
1312 return ConvertUtf8ToLowerOrUpper(vm, src, false);
1313 }
1314 }
1315
1316 /* static */
ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)1317 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1318 {
1319 auto factory = vm->GetFactory();
1320 FlatStringInfo srcFlat = FlattenAllString(vm, src);
1321 std::u16string utf16 = srcFlat.ToU16String();
1322 std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
1323 return *(factory->NewFromStdString(res));
1324 }
1325
1326 /* static */
ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)1327 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1328 {
1329 auto factory = vm->GetFactory();
1330 FlatStringInfo srcFlat = FlattenAllString(vm, src);
1331 std::u16string utf16 = srcFlat.ToU16String();
1332 std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
1333 return *(factory->NewFromStdString(res));
1334 }
1335
Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)1336 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
1337 {
1338 FlatStringInfo srcFlat = FlattenAllString(thread->GetEcmaVM(), src);
1339 uint32_t srcLen = srcFlat.GetLength();
1340 if (UNLIKELY(srcLen == 0)) {
1341 return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
1342 }
1343 if (srcFlat.IsUtf8()) {
1344 Span<const uint8_t> data(srcFlat.GetDataUtf8(), srcLen);
1345 return TrimBody(thread, src, data, mode);
1346 } else {
1347 Span<const uint16_t> data(srcFlat.GetDataUtf16(), srcLen);
1348 return TrimBody(thread, src, data, mode);
1349 }
1350 }
1351
SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)1352 EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1353 {
1354 ASSERT(string->IsTreeString() || string->IsSlicedString());
1355 ASSERT(IsSMemSpace(type));
1356 auto thread = vm->GetJSThread();
1357 uint32_t length = string->GetLength();
1358 EcmaString *result = nullptr;
1359 if (string->IsUtf8()) {
1360 result = CreateLineStringWithSpaceType(vm, length, true, type);
1361 WriteToFlat<uint8_t>(*string, result->GetDataUtf8Writable(), length);
1362 } else {
1363 result = CreateLineStringWithSpaceType(vm, length, false, type);
1364 WriteToFlat<uint16_t>(*string, result->GetDataUtf16Writable(), length);
1365 }
1366 if (string->IsTreeString()) {
1367 JSHandle<TreeEcmaString> tree(string);
1368 ASSERT(EcmaString::Cast(tree->GetSecond())->GetLength() != 0);
1369 tree->SetFirst(thread, JSTaggedValue(result));
1370 tree->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1371 }
1372 return result;
1373 }
1374
Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)1375 EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1376 {
1377 EcmaString *s = *string;
1378 if (!s->IsTreeString()) {
1379 return s;
1380 }
1381 JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1382 if (!tree->IsFlat()) {
1383 return SlowFlatten(vm, string, type);
1384 }
1385 return EcmaString::Cast(tree->GetFirst());
1386 }
1387
FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)1388 FlatStringInfo EcmaString::FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1389 {
1390 ASSERT(IsSMemSpace(type));
1391 EcmaString *s = *string;
1392 uint32_t startIndex = 0;
1393 if (s->IsLineOrConstantString()) {
1394 return FlatStringInfo(s, startIndex, s->GetLength());
1395 }
1396 if (string->IsTreeString()) {
1397 JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1398 if (!tree->IsFlat()) {
1399 s = SlowFlatten(vm, string, type);
1400 } else {
1401 s = EcmaString::Cast(tree->GetFirst());
1402 }
1403 } else if (string->IsSlicedString()) {
1404 s = EcmaString::Cast(SlicedString::Cast(*string)->GetParent());
1405 startIndex = SlicedString::Cast(*string)->GetStartIndex();
1406 }
1407 return FlatStringInfo(s, startIndex, string->GetLength());
1408 }
1409
FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string)1410 EcmaString *EcmaString::FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string)
1411 {
1412 DISALLOW_GARBAGE_COLLECTION;
1413 if (string->IsLineOrConstantString()) {
1414 return string;
1415 }
1416 if (string->IsTreeString()) {
1417 TreeEcmaString *tree = TreeEcmaString::Cast(string);
1418 if (tree->IsFlat()) {
1419 string = EcmaString::Cast(tree->GetFirst());
1420 } else {
1421 uint32_t length = tree->GetLength();
1422 EcmaString *result = nullptr;
1423 if (tree->IsUtf8()) {
1424 result = CreateLineStringNoGC(vm, length, true);
1425 WriteToFlat<uint8_t>(tree, result->GetDataUtf8Writable(), length);
1426 } else {
1427 result = CreateLineStringNoGC(vm, length, false);
1428 WriteToFlat<uint16_t>(tree, result->GetDataUtf16Writable(), length);
1429 }
1430 tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result));
1431 tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1432 return result;
1433 }
1434 } else if (string->IsSlicedString()) {
1435 SlicedString *str = SlicedString::Cast(string);
1436 uint32_t length = str->GetLength();
1437 EcmaString *result = nullptr;
1438 if (str->IsUtf8()) {
1439 result = CreateLineStringNoGC(vm, length, true);
1440 WriteToFlat<uint8_t>(str, result->GetDataUtf8Writable(), length);
1441 } else {
1442 result = CreateLineStringNoGC(vm, length, false);
1443 WriteToFlat<uint16_t>(str, result->GetDataUtf16Writable(), length);
1444 }
1445 return result;
1446 }
1447 return string;
1448 }
1449
GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)1450 const uint8_t *EcmaString::GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
1451 {
1452 ASSERT(src->IsUtf8());
1453 uint32_t length = src->GetLength();
1454 EcmaString *string = const_cast<EcmaString *>(src);
1455 if (string->IsTreeString()) {
1456 if (string->IsFlat()) {
1457 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1458 } else {
1459 buf.reserve(length);
1460 WriteToFlat(string, buf.data(), length);
1461 return buf.data();
1462 }
1463 } else if (string->IsSlicedString()) {
1464 SlicedString *str = SlicedString::Cast(string);
1465 return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1466 }
1467 return string->GetDataUtf8();
1468 }
1469
GetNonTreeUtf8Data(const EcmaString *src)1470 const uint8_t *EcmaString::GetNonTreeUtf8Data(const EcmaString *src)
1471 {
1472 ASSERT(src->IsUtf8());
1473 ASSERT(!src->IsTreeString());
1474 EcmaString *string = const_cast<EcmaString *>(src);
1475 if (string->IsSlicedString()) {
1476 SlicedString *str = SlicedString::Cast(string);
1477 return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1478 }
1479 ASSERT(src->IsLineOrConstantString());
1480 return string->GetDataUtf8();
1481 }
1482
GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)1483 const uint16_t *EcmaString::GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
1484 {
1485 ASSERT(src->IsUtf16());
1486 uint32_t length = src->GetLength();
1487 EcmaString *string = const_cast<EcmaString *>(src);
1488 if (string->IsTreeString()) {
1489 if (string->IsFlat()) {
1490 string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1491 } else {
1492 buf.reserve(length);
1493 WriteToFlat(string, buf.data(), length);
1494 return buf.data();
1495 }
1496 } else if (string->IsSlicedString()) {
1497 SlicedString *str = SlicedString::Cast(string);
1498 return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1499 }
1500 return string->GetDataUtf16();
1501 }
1502
GetNonTreeUtf16Data(const EcmaString *src)1503 const uint16_t *EcmaString::GetNonTreeUtf16Data(const EcmaString *src)
1504 {
1505 ASSERT(src->IsUtf16());
1506 ASSERT(!src->IsTreeString());
1507 EcmaString *string = const_cast<EcmaString *>(src);
1508 if (string->IsSlicedString()) {
1509 SlicedString *str = SlicedString::Cast(string);
1510 return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1511 }
1512 ASSERT(src->IsLineOrConstantString());
1513 return string->GetDataUtf16();
1514 }
1515
ToU16String(uint32_t len)1516 std::u16string FlatStringInfo::ToU16String(uint32_t len)
1517 {
1518 uint32_t length = len > 0 ? len : GetLength();
1519 std::u16string result;
1520 if (IsUtf16()) {
1521 const uint16_t *data = this->GetDataUtf16();
1522 result = base::StringHelper::Utf16ToU16String(data, length);
1523 } else {
1524 const uint8_t *data = this->GetDataUtf8();
1525 result = base::StringHelper::Utf8ToU16String(data, length);
1526 }
1527 return result;
1528 }
1529
EcmaStringAccessor(TaggedObject *obj)1530 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
1531 {
1532 ASSERT(obj != nullptr);
1533 string_ = EcmaString::Cast(obj);
1534 }
1535
EcmaStringAccessor(JSTaggedValue value)1536 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
1537 {
1538 ASSERT(value.IsString());
1539 string_ = EcmaString::Cast(value.GetTaggedObject());
1540 }
1541
EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)1542 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
1543 : string_(*strHandle)
1544 {
1545 }
1546
ToStdString(StringConvertedUsage usage)1547 std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage)
1548 {
1549 if (string_ == nullptr) {
1550 return "";
1551 }
1552 bool modify = (usage != StringConvertedUsage::PRINT);
1553 CVector<uint8_t> buf;
1554 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1555 std::string res;
1556 res.reserve(sp.size());
1557 for (const auto &c : sp) {
1558 res.push_back(c);
1559 }
1560 return res;
1561 }
1562
Utf8ConvertToString()1563 CString EcmaStringAccessor::Utf8ConvertToString()
1564 {
1565 if (string_ == nullptr) {
1566 return CString("");
1567 }
1568 if (IsUtf8()) {
1569 std::string stdStr;
1570 if (IsLineString()) {
1571 return base::StringHelper::Utf8ToString(GetDataUtf8(), GetLength()).c_str();
1572 }
1573 CVector<uint8_t> buf;
1574 const uint8_t *data = EcmaString::GetUtf8DataFlat(string_, buf);
1575 return base::StringHelper::Utf8ToString(data, GetLength()).c_str();
1576 } else {
1577 return ToCString();
1578 }
1579 }
1580
DebuggerToStdString(StringConvertedUsage usage)1581 std::string EcmaStringAccessor::DebuggerToStdString(StringConvertedUsage usage)
1582 {
1583 if (string_ == nullptr) {
1584 return "";
1585 }
1586
1587 bool modify = (usage != StringConvertedUsage::PRINT);
1588 CVector<uint8_t> buf;
1589 Span<const uint8_t> sp = string_->DebuggerToUtf8Span(buf, modify);
1590 std::string res;
1591 res.reserve(sp.size());
1592 for (const auto &c : sp) {
1593 res.push_back(c);
1594 }
1595 return res;
1596 }
1597
ToCString(StringConvertedUsage usage, bool cesu8)1598 CString EcmaStringAccessor::ToCString(StringConvertedUsage usage, bool cesu8)
1599 {
1600 if (string_ == nullptr) {
1601 return "";
1602 }
1603 bool modify = (usage != StringConvertedUsage::PRINT);
1604 CVector<uint8_t> buf;
1605 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify, cesu8);
1606 CString res;
1607 res.reserve(sp.size());
1608 for (const auto &c : sp) {
1609 res.push_back(c);
1610 }
1611 return res;
1612 }
1613
1614 // static
CreateLineString(const EcmaVM *vm, size_t length, bool compressed)1615 EcmaString *EcmaStringAccessor::CreateLineString(const EcmaVM *vm, size_t length, bool compressed)
1616 {
1617 return EcmaString::CreateLineString(vm, length, compressed);
1618 }
1619 } // namespace panda::ecmascript
1620