1// Copyright 2016 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/builtins/builtins-utils-inl.h"
6#include "src/builtins/builtins.h"
7#include "src/heap/heap-inl.h"  // For ToBoolean. TODO(jkummerow): Drop.
8#include "src/logging/counters.h"
9#include "src/numbers/conversions.h"
10#include "src/objects/objects-inl.h"
11#ifdef V8_INTL_SUPPORT
12#include "src/objects/intl-objects.h"
13#endif
14#include "src/base/strings.h"
15#include "src/regexp/regexp-utils.h"
16#include "src/strings/string-builder-inl.h"
17#include "src/strings/string-case.h"
18#include "src/strings/unicode-inl.h"
19#include "src/strings/unicode.h"
20
21namespace v8 {
22namespace internal {
23
24namespace {  // for String.fromCodePoint
25
26bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
27  if (!value->IsNumber() &&
28      !Object::ToNumber(isolate, value).ToHandle(&value)) {
29    return false;
30  }
31
32  if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
33      value->Number()) {
34    return false;
35  }
36
37  if (value->Number() < 0 || value->Number() > 0x10FFFF) {
38    return false;
39  }
40
41  return true;
42}
43
44static constexpr base::uc32 kInvalidCodePoint = static_cast<base::uc32>(-1);
45
46base::uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
47  Handle<Object> value = args.at(1 + index);
48  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
49      isolate, value, Object::ToNumber(isolate, value), kInvalidCodePoint);
50  if (!IsValidCodePoint(isolate, value)) {
51    isolate->Throw(*isolate->factory()->NewRangeError(
52        MessageTemplate::kInvalidCodePoint, value));
53    return kInvalidCodePoint;
54  }
55  return DoubleToUint32(value->Number());
56}
57
58}  // namespace
59
60// ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
61BUILTIN(StringFromCodePoint) {
62  HandleScope scope(isolate);
63  int const length = args.length() - 1;
64  if (length == 0) return ReadOnlyRoots(isolate).empty_string();
65  DCHECK_LT(0, length);
66
67  // Optimistically assume that the resulting String contains only one byte
68  // characters.
69  std::vector<uint8_t> one_byte_buffer;
70  one_byte_buffer.reserve(length);
71  base::uc32 code = 0;
72  int index;
73  for (index = 0; index < length; index++) {
74    code = NextCodePoint(isolate, args, index);
75    if (code == kInvalidCodePoint) {
76      return ReadOnlyRoots(isolate).exception();
77    }
78    if (code > String::kMaxOneByteCharCode) {
79      break;
80    }
81    one_byte_buffer.push_back(code);
82  }
83
84  if (index == length) {
85    RETURN_RESULT_OR_FAILURE(
86        isolate, isolate->factory()->NewStringFromOneByte(base::Vector<uint8_t>(
87                     one_byte_buffer.data(), one_byte_buffer.size())));
88  }
89
90  std::vector<base::uc16> two_byte_buffer;
91  two_byte_buffer.reserve(length - index);
92
93  while (true) {
94    if (code <=
95        static_cast<base::uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
96      two_byte_buffer.push_back(code);
97    } else {
98      two_byte_buffer.push_back(unibrow::Utf16::LeadSurrogate(code));
99      two_byte_buffer.push_back(unibrow::Utf16::TrailSurrogate(code));
100    }
101
102    if (++index == length) {
103      break;
104    }
105    code = NextCodePoint(isolate, args, index);
106    if (code == kInvalidCodePoint) {
107      return ReadOnlyRoots(isolate).exception();
108    }
109  }
110
111  Handle<SeqTwoByteString> result;
112  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
113      isolate, result,
114      isolate->factory()->NewRawTwoByteString(
115          static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size())));
116
117  DisallowGarbageCollection no_gc;
118  CopyChars(result->GetChars(no_gc), one_byte_buffer.data(),
119            one_byte_buffer.size());
120  CopyChars(result->GetChars(no_gc) + one_byte_buffer.size(),
121            two_byte_buffer.data(), two_byte_buffer.size());
122
123  return *result;
124}
125
126// ES6 section 21.1.3.9
127// String.prototype.lastIndexOf ( searchString [ , position ] )
128BUILTIN(StringPrototypeLastIndexOf) {
129  HandleScope handle_scope(isolate);
130  return String::LastIndexOf(isolate, args.receiver(),
131                             args.atOrUndefined(isolate, 1),
132                             args.atOrUndefined(isolate, 2));
133}
134
135// ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
136//
137// This function is implementation specific.  For now, we do not
138// do anything locale specific.
139BUILTIN(StringPrototypeLocaleCompare) {
140  HandleScope handle_scope(isolate);
141
142  isolate->CountUsage(v8::Isolate::UseCounterFeature::kStringLocaleCompare);
143  static const char* const kMethod = "String.prototype.localeCompare";
144
145#ifdef V8_INTL_SUPPORT
146  TO_THIS_STRING(str1, kMethod);
147  Handle<String> str2;
148  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
149      isolate, str2, Object::ToString(isolate, args.atOrUndefined(isolate, 1)));
150  base::Optional<int> result = Intl::StringLocaleCompare(
151      isolate, str1, str2, args.atOrUndefined(isolate, 2),
152      args.atOrUndefined(isolate, 3), kMethod);
153  if (!result.has_value()) {
154    DCHECK(isolate->has_pending_exception());
155    return ReadOnlyRoots(isolate).exception();
156  }
157  return Smi::FromInt(result.value());
158#else
159  DCHECK_LE(2, args.length());
160
161  TO_THIS_STRING(str1, kMethod);
162  Handle<String> str2;
163  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
164                                     Object::ToString(isolate, args.at(1)));
165
166  if (str1.is_identical_to(str2)) return Smi::zero();  // Equal.
167  int str1_length = str1->length();
168  int str2_length = str2->length();
169
170  // Decide trivial cases without flattening.
171  if (str1_length == 0) {
172    if (str2_length == 0) return Smi::zero();  // Equal.
173    return Smi::FromInt(-str2_length);
174  } else {
175    if (str2_length == 0) return Smi::FromInt(str1_length);
176  }
177
178  int end = str1_length < str2_length ? str1_length : str2_length;
179
180  // No need to flatten if we are going to find the answer on the first
181  // character. At this point we know there is at least one character
182  // in each string, due to the trivial case handling above.
183  int d = str1->Get(0) - str2->Get(0);
184  if (d != 0) return Smi::FromInt(d);
185
186  str1 = String::Flatten(isolate, str1);
187  str2 = String::Flatten(isolate, str2);
188
189  DisallowGarbageCollection no_gc;
190  String::FlatContent flat1 = str1->GetFlatContent(no_gc);
191  String::FlatContent flat2 = str2->GetFlatContent(no_gc);
192
193  for (int i = 0; i < end; i++) {
194    if (flat1.Get(i) != flat2.Get(i)) {
195      return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
196    }
197  }
198
199  return Smi::FromInt(str1_length - str2_length);
200#endif  // !V8_INTL_SUPPORT
201}
202
203#ifndef V8_INTL_SUPPORT
204// ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
205//
206// Simply checks the argument is valid and returns the string itself.
207// If internationalization is enabled, then intl.js will override this function
208// and provide the proper functionality, so this is just a fallback.
209BUILTIN(StringPrototypeNormalize) {
210  HandleScope handle_scope(isolate);
211  TO_THIS_STRING(string, "String.prototype.normalize");
212
213  Handle<Object> form_input = args.atOrUndefined(isolate, 1);
214  if (form_input->IsUndefined(isolate)) return *string;
215
216  Handle<String> form;
217  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
218                                     Object::ToString(isolate, form_input));
219
220  if (!(String::Equals(isolate, form, isolate->factory()->NFC_string()) ||
221        String::Equals(isolate, form, isolate->factory()->NFD_string()) ||
222        String::Equals(isolate, form, isolate->factory()->NFKC_string()) ||
223        String::Equals(isolate, form, isolate->factory()->NFKD_string()))) {
224    Handle<String> valid_forms =
225        isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
226    THROW_NEW_ERROR_RETURN_FAILURE(
227        isolate,
228        NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
229  }
230
231  return *string;
232}
233#endif  // !V8_INTL_SUPPORT
234
235
236#ifndef V8_INTL_SUPPORT
237namespace {
238
239inline bool ToUpperOverflows(base::uc32 character) {
240  // y with umlauts and the micro sign are the only characters that stop
241  // fitting into one-byte when converting to uppercase.
242  static const base::uc32 yuml_code = 0xFF;
243  static const base::uc32 micro_code = 0xB5;
244  return (character == yuml_code || character == micro_code);
245}
246
247template <class Converter>
248V8_WARN_UNUSED_RESULT static Object ConvertCaseHelper(
249    Isolate* isolate, String string, SeqString result, int result_length,
250    unibrow::Mapping<Converter, 128>* mapping) {
251  DisallowGarbageCollection no_gc;
252  // We try this twice, once with the assumption that the result is no longer
253  // than the input and, if that assumption breaks, again with the exact
254  // length.  This may not be pretty, but it is nicer than what was here before
255  // and I hereby claim my vaffel-is.
256  //
257  // NOTE: This assumes that the upper/lower case of an ASCII
258  // character is also ASCII.  This is currently the case, but it
259  // might break in the future if we implement more context and locale
260  // dependent upper/lower conversions.
261  bool has_changed_character = false;
262
263  // Convert all characters to upper case, assuming that they will fit
264  // in the buffer
265  StringCharacterStream stream(string);
266  unibrow::uchar chars[Converter::kMaxWidth];
267  // We can assume that the string is not empty
268  base::uc32 current = stream.GetNext();
269  bool ignore_overflow = Converter::kIsToLower || result.IsSeqTwoByteString();
270  for (int i = 0; i < result_length;) {
271    bool has_next = stream.HasMore();
272    base::uc32 next = has_next ? stream.GetNext() : 0;
273    int char_length = mapping->get(current, next, chars);
274    if (char_length == 0) {
275      // The case conversion of this character is the character itself.
276      result.Set(i, current);
277      i++;
278    } else if (char_length == 1 &&
279               (ignore_overflow || !ToUpperOverflows(current))) {
280      // Common case: converting the letter resulted in one character.
281      DCHECK(static_cast<base::uc32>(chars[0]) != current);
282      result.Set(i, chars[0]);
283      has_changed_character = true;
284      i++;
285    } else if (result_length == string.length()) {
286      bool overflows = ToUpperOverflows(current);
287      // We've assumed that the result would be as long as the
288      // input but here is a character that converts to several
289      // characters.  No matter, we calculate the exact length
290      // of the result and try the whole thing again.
291      //
292      // Note that this leaves room for optimization.  We could just
293      // memcpy what we already have to the result string.  Also,
294      // the result string is the last object allocated we could
295      // "realloc" it and probably, in the vast majority of cases,
296      // extend the existing string to be able to hold the full
297      // result.
298      int next_length = 0;
299      if (has_next) {
300        next_length = mapping->get(next, 0, chars);
301        if (next_length == 0) next_length = 1;
302      }
303      int current_length = i + char_length + next_length;
304      while (stream.HasMore()) {
305        current = stream.GetNext();
306        overflows |= ToUpperOverflows(current);
307        // NOTE: we use 0 as the next character here because, while
308        // the next character may affect what a character converts to,
309        // it does not in any case affect the length of what it convert
310        // to.
311        int char_length = mapping->get(current, 0, chars);
312        if (char_length == 0) char_length = 1;
313        current_length += char_length;
314        if (current_length > String::kMaxLength) {
315          AllowGarbageCollection allocate_error_and_return;
316          THROW_NEW_ERROR_RETURN_FAILURE(isolate,
317                                         NewInvalidStringLengthError());
318        }
319      }
320      // Try again with the real length.  Return signed if we need
321      // to allocate a two-byte string for to uppercase.
322      return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
323                                             : Smi::FromInt(current_length);
324    } else {
325      for (int j = 0; j < char_length; j++) {
326        result.Set(i, chars[j]);
327        i++;
328      }
329      has_changed_character = true;
330    }
331    current = next;
332  }
333  if (has_changed_character) {
334    return result;
335  } else {
336    // If we didn't actually change anything in doing the conversion
337    // we simple return the result and let the converted string
338    // become garbage; there is no reason to keep two identical strings
339    // alive.
340    return string;
341  }
342}
343
344template <class Converter>
345V8_WARN_UNUSED_RESULT static Object ConvertCase(
346    Handle<String> s, Isolate* isolate,
347    unibrow::Mapping<Converter, 128>* mapping) {
348  s = String::Flatten(isolate, s);
349  int length = s->length();
350  // Assume that the string is not empty; we need this assumption later
351  if (length == 0) return *s;
352
353  // Simpler handling of ASCII strings.
354  //
355  // NOTE: This assumes that the upper/lower case of an ASCII
356  // character is also ASCII.  This is currently the case, but it
357  // might break in the future if we implement more context and locale
358  // dependent upper/lower conversions.
359  if (String::IsOneByteRepresentationUnderneath(*s)) {
360    // Same length as input.
361    Handle<SeqOneByteString> result =
362        isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
363    DisallowGarbageCollection no_gc;
364    String::FlatContent flat_content = s->GetFlatContent(no_gc);
365    DCHECK(flat_content.IsFlat());
366    bool has_changed_character = false;
367    int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
368        reinterpret_cast<char*>(result->GetChars(no_gc)),
369        reinterpret_cast<const char*>(flat_content.ToOneByteVector().begin()),
370        length, &has_changed_character);
371    // If not ASCII, we discard the result and take the 2 byte path.
372    if (index_to_first_unprocessed == length)
373      return has_changed_character ? *result : *s;
374  }
375
376  Handle<SeqString> result;  // Same length as input.
377  if (s->IsOneByteRepresentation()) {
378    result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
379  } else {
380    result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
381  }
382
383  Object answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
384  if (answer.IsException(isolate) || answer.IsString()) return answer;
385
386  DCHECK(answer.IsSmi());
387  length = Smi::ToInt(answer);
388  if (s->IsOneByteRepresentation() && length > 0) {
389    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
390        isolate, result, isolate->factory()->NewRawOneByteString(length));
391  } else {
392    if (length < 0) length = -length;
393    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
394        isolate, result, isolate->factory()->NewRawTwoByteString(length));
395  }
396  return ConvertCaseHelper(isolate, *s, *result, length, mapping);
397}
398
399}  // namespace
400
401BUILTIN(StringPrototypeToLocaleLowerCase) {
402  HandleScope scope(isolate);
403  TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
404  return ConvertCase(string, isolate,
405                     isolate->runtime_state()->to_lower_mapping());
406}
407
408BUILTIN(StringPrototypeToLocaleUpperCase) {
409  HandleScope scope(isolate);
410  TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
411  return ConvertCase(string, isolate,
412                     isolate->runtime_state()->to_upper_mapping());
413}
414
415BUILTIN(StringPrototypeToLowerCase) {
416  HandleScope scope(isolate);
417  TO_THIS_STRING(string, "String.prototype.toLowerCase");
418  return ConvertCase(string, isolate,
419                     isolate->runtime_state()->to_lower_mapping());
420}
421
422BUILTIN(StringPrototypeToUpperCase) {
423  HandleScope scope(isolate);
424  TO_THIS_STRING(string, "String.prototype.toUpperCase");
425  return ConvertCase(string, isolate,
426                     isolate->runtime_state()->to_upper_mapping());
427}
428#endif  // !V8_INTL_SUPPORT
429
430// ES6 #sec-string.prototype.raw
431BUILTIN(StringRaw) {
432  HandleScope scope(isolate);
433  Handle<Object> templ = args.atOrUndefined(isolate, 1);
434  const uint32_t argc = args.length();
435  Handle<String> raw_string =
436      isolate->factory()->NewStringFromAsciiChecked("raw");
437
438  Handle<Object> cooked;
439  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, cooked,
440                                     Object::ToObject(isolate, templ));
441
442  Handle<Object> raw;
443  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
444      isolate, raw, Object::GetProperty(isolate, cooked, raw_string));
445  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw,
446                                     Object::ToObject(isolate, raw));
447  Handle<Object> raw_len;
448  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
449      isolate, raw_len,
450      Object::GetProperty(isolate, raw, isolate->factory()->length_string()));
451
452  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw_len,
453                                     Object::ToLength(isolate, raw_len));
454
455  IncrementalStringBuilder result_builder(isolate);
456  // Intentional spec violation: we ignore {length} values >= 2^32, because
457  // assuming non-empty chunks they would generate too-long strings anyway.
458  const double raw_len_number = raw_len->Number();
459  const uint32_t length = raw_len_number > std::numeric_limits<uint32_t>::max()
460                              ? std::numeric_limits<uint32_t>::max()
461                              : static_cast<uint32_t>(raw_len_number);
462  if (length > 0) {
463    Handle<Object> first_element;
464    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, first_element,
465                                       Object::GetElement(isolate, raw, 0));
466
467    Handle<String> first_string;
468    ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
469        isolate, first_string, Object::ToString(isolate, first_element));
470    result_builder.AppendString(first_string);
471
472    for (uint32_t i = 1, arg_i = 2; i < length; i++, arg_i++) {
473      if (arg_i < argc) {
474        Handle<String> argument_string;
475        ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
476            isolate, argument_string,
477            Object::ToString(isolate, args.at(arg_i)));
478        result_builder.AppendString(argument_string);
479      }
480
481      Handle<Object> element;
482      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element,
483                                         Object::GetElement(isolate, raw, i));
484
485      Handle<String> element_string;
486      ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element_string,
487                                         Object::ToString(isolate, element));
488      result_builder.AppendString(element_string);
489    }
490  }
491
492  RETURN_RESULT_OR_FAILURE(isolate, result_builder.Finish());
493}
494
495}  // namespace internal
496}  // namespace v8
497