1// Copyright 2016 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "src/builtins/builtins-utils-inl.h" 6#include "src/builtins/builtins.h" 7#include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop. 8#include "src/logging/counters.h" 9#include "src/numbers/conversions.h" 10#include "src/objects/objects-inl.h" 11#ifdef V8_INTL_SUPPORT 12#include "src/objects/intl-objects.h" 13#endif 14#include "src/base/strings.h" 15#include "src/regexp/regexp-utils.h" 16#include "src/strings/string-builder-inl.h" 17#include "src/strings/string-case.h" 18#include "src/strings/unicode-inl.h" 19#include "src/strings/unicode.h" 20 21namespace v8 { 22namespace internal { 23 24namespace { // for String.fromCodePoint 25 26bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) { 27 if (!value->IsNumber() && 28 !Object::ToNumber(isolate, value).ToHandle(&value)) { 29 return false; 30 } 31 32 if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() != 33 value->Number()) { 34 return false; 35 } 36 37 if (value->Number() < 0 || value->Number() > 0x10FFFF) { 38 return false; 39 } 40 41 return true; 42} 43 44static constexpr base::uc32 kInvalidCodePoint = static_cast<base::uc32>(-1); 45 46base::uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) { 47 Handle<Object> value = args.at(1 + index); 48 ASSIGN_RETURN_ON_EXCEPTION_VALUE( 49 isolate, value, Object::ToNumber(isolate, value), kInvalidCodePoint); 50 if (!IsValidCodePoint(isolate, value)) { 51 isolate->Throw(*isolate->factory()->NewRangeError( 52 MessageTemplate::kInvalidCodePoint, value)); 53 return kInvalidCodePoint; 54 } 55 return DoubleToUint32(value->Number()); 56} 57 58} // namespace 59 60// ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints ) 61BUILTIN(StringFromCodePoint) { 62 HandleScope scope(isolate); 63 int const length = args.length() - 1; 64 if (length == 0) return ReadOnlyRoots(isolate).empty_string(); 65 DCHECK_LT(0, length); 66 67 // Optimistically assume that the resulting String contains only one byte 68 // characters. 69 std::vector<uint8_t> one_byte_buffer; 70 one_byte_buffer.reserve(length); 71 base::uc32 code = 0; 72 int index; 73 for (index = 0; index < length; index++) { 74 code = NextCodePoint(isolate, args, index); 75 if (code == kInvalidCodePoint) { 76 return ReadOnlyRoots(isolate).exception(); 77 } 78 if (code > String::kMaxOneByteCharCode) { 79 break; 80 } 81 one_byte_buffer.push_back(code); 82 } 83 84 if (index == length) { 85 RETURN_RESULT_OR_FAILURE( 86 isolate, isolate->factory()->NewStringFromOneByte(base::Vector<uint8_t>( 87 one_byte_buffer.data(), one_byte_buffer.size()))); 88 } 89 90 std::vector<base::uc16> two_byte_buffer; 91 two_byte_buffer.reserve(length - index); 92 93 while (true) { 94 if (code <= 95 static_cast<base::uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { 96 two_byte_buffer.push_back(code); 97 } else { 98 two_byte_buffer.push_back(unibrow::Utf16::LeadSurrogate(code)); 99 two_byte_buffer.push_back(unibrow::Utf16::TrailSurrogate(code)); 100 } 101 102 if (++index == length) { 103 break; 104 } 105 code = NextCodePoint(isolate, args, index); 106 if (code == kInvalidCodePoint) { 107 return ReadOnlyRoots(isolate).exception(); 108 } 109 } 110 111 Handle<SeqTwoByteString> result; 112 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 113 isolate, result, 114 isolate->factory()->NewRawTwoByteString( 115 static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size()))); 116 117 DisallowGarbageCollection no_gc; 118 CopyChars(result->GetChars(no_gc), one_byte_buffer.data(), 119 one_byte_buffer.size()); 120 CopyChars(result->GetChars(no_gc) + one_byte_buffer.size(), 121 two_byte_buffer.data(), two_byte_buffer.size()); 122 123 return *result; 124} 125 126// ES6 section 21.1.3.9 127// String.prototype.lastIndexOf ( searchString [ , position ] ) 128BUILTIN(StringPrototypeLastIndexOf) { 129 HandleScope handle_scope(isolate); 130 return String::LastIndexOf(isolate, args.receiver(), 131 args.atOrUndefined(isolate, 1), 132 args.atOrUndefined(isolate, 2)); 133} 134 135// ES6 section 21.1.3.10 String.prototype.localeCompare ( that ) 136// 137// This function is implementation specific. For now, we do not 138// do anything locale specific. 139BUILTIN(StringPrototypeLocaleCompare) { 140 HandleScope handle_scope(isolate); 141 142 isolate->CountUsage(v8::Isolate::UseCounterFeature::kStringLocaleCompare); 143 static const char* const kMethod = "String.prototype.localeCompare"; 144 145#ifdef V8_INTL_SUPPORT 146 TO_THIS_STRING(str1, kMethod); 147 Handle<String> str2; 148 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 149 isolate, str2, Object::ToString(isolate, args.atOrUndefined(isolate, 1))); 150 base::Optional<int> result = Intl::StringLocaleCompare( 151 isolate, str1, str2, args.atOrUndefined(isolate, 2), 152 args.atOrUndefined(isolate, 3), kMethod); 153 if (!result.has_value()) { 154 DCHECK(isolate->has_pending_exception()); 155 return ReadOnlyRoots(isolate).exception(); 156 } 157 return Smi::FromInt(result.value()); 158#else 159 DCHECK_LE(2, args.length()); 160 161 TO_THIS_STRING(str1, kMethod); 162 Handle<String> str2; 163 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2, 164 Object::ToString(isolate, args.at(1))); 165 166 if (str1.is_identical_to(str2)) return Smi::zero(); // Equal. 167 int str1_length = str1->length(); 168 int str2_length = str2->length(); 169 170 // Decide trivial cases without flattening. 171 if (str1_length == 0) { 172 if (str2_length == 0) return Smi::zero(); // Equal. 173 return Smi::FromInt(-str2_length); 174 } else { 175 if (str2_length == 0) return Smi::FromInt(str1_length); 176 } 177 178 int end = str1_length < str2_length ? str1_length : str2_length; 179 180 // No need to flatten if we are going to find the answer on the first 181 // character. At this point we know there is at least one character 182 // in each string, due to the trivial case handling above. 183 int d = str1->Get(0) - str2->Get(0); 184 if (d != 0) return Smi::FromInt(d); 185 186 str1 = String::Flatten(isolate, str1); 187 str2 = String::Flatten(isolate, str2); 188 189 DisallowGarbageCollection no_gc; 190 String::FlatContent flat1 = str1->GetFlatContent(no_gc); 191 String::FlatContent flat2 = str2->GetFlatContent(no_gc); 192 193 for (int i = 0; i < end; i++) { 194 if (flat1.Get(i) != flat2.Get(i)) { 195 return Smi::FromInt(flat1.Get(i) - flat2.Get(i)); 196 } 197 } 198 199 return Smi::FromInt(str1_length - str2_length); 200#endif // !V8_INTL_SUPPORT 201} 202 203#ifndef V8_INTL_SUPPORT 204// ES6 section 21.1.3.12 String.prototype.normalize ( [form] ) 205// 206// Simply checks the argument is valid and returns the string itself. 207// If internationalization is enabled, then intl.js will override this function 208// and provide the proper functionality, so this is just a fallback. 209BUILTIN(StringPrototypeNormalize) { 210 HandleScope handle_scope(isolate); 211 TO_THIS_STRING(string, "String.prototype.normalize"); 212 213 Handle<Object> form_input = args.atOrUndefined(isolate, 1); 214 if (form_input->IsUndefined(isolate)) return *string; 215 216 Handle<String> form; 217 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form, 218 Object::ToString(isolate, form_input)); 219 220 if (!(String::Equals(isolate, form, isolate->factory()->NFC_string()) || 221 String::Equals(isolate, form, isolate->factory()->NFD_string()) || 222 String::Equals(isolate, form, isolate->factory()->NFKC_string()) || 223 String::Equals(isolate, form, isolate->factory()->NFKD_string()))) { 224 Handle<String> valid_forms = 225 isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD"); 226 THROW_NEW_ERROR_RETURN_FAILURE( 227 isolate, 228 NewRangeError(MessageTemplate::kNormalizationForm, valid_forms)); 229 } 230 231 return *string; 232} 233#endif // !V8_INTL_SUPPORT 234 235 236#ifndef V8_INTL_SUPPORT 237namespace { 238 239inline bool ToUpperOverflows(base::uc32 character) { 240 // y with umlauts and the micro sign are the only characters that stop 241 // fitting into one-byte when converting to uppercase. 242 static const base::uc32 yuml_code = 0xFF; 243 static const base::uc32 micro_code = 0xB5; 244 return (character == yuml_code || character == micro_code); 245} 246 247template <class Converter> 248V8_WARN_UNUSED_RESULT static Object ConvertCaseHelper( 249 Isolate* isolate, String string, SeqString result, int result_length, 250 unibrow::Mapping<Converter, 128>* mapping) { 251 DisallowGarbageCollection no_gc; 252 // We try this twice, once with the assumption that the result is no longer 253 // than the input and, if that assumption breaks, again with the exact 254 // length. This may not be pretty, but it is nicer than what was here before 255 // and I hereby claim my vaffel-is. 256 // 257 // NOTE: This assumes that the upper/lower case of an ASCII 258 // character is also ASCII. This is currently the case, but it 259 // might break in the future if we implement more context and locale 260 // dependent upper/lower conversions. 261 bool has_changed_character = false; 262 263 // Convert all characters to upper case, assuming that they will fit 264 // in the buffer 265 StringCharacterStream stream(string); 266 unibrow::uchar chars[Converter::kMaxWidth]; 267 // We can assume that the string is not empty 268 base::uc32 current = stream.GetNext(); 269 bool ignore_overflow = Converter::kIsToLower || result.IsSeqTwoByteString(); 270 for (int i = 0; i < result_length;) { 271 bool has_next = stream.HasMore(); 272 base::uc32 next = has_next ? stream.GetNext() : 0; 273 int char_length = mapping->get(current, next, chars); 274 if (char_length == 0) { 275 // The case conversion of this character is the character itself. 276 result.Set(i, current); 277 i++; 278 } else if (char_length == 1 && 279 (ignore_overflow || !ToUpperOverflows(current))) { 280 // Common case: converting the letter resulted in one character. 281 DCHECK(static_cast<base::uc32>(chars[0]) != current); 282 result.Set(i, chars[0]); 283 has_changed_character = true; 284 i++; 285 } else if (result_length == string.length()) { 286 bool overflows = ToUpperOverflows(current); 287 // We've assumed that the result would be as long as the 288 // input but here is a character that converts to several 289 // characters. No matter, we calculate the exact length 290 // of the result and try the whole thing again. 291 // 292 // Note that this leaves room for optimization. We could just 293 // memcpy what we already have to the result string. Also, 294 // the result string is the last object allocated we could 295 // "realloc" it and probably, in the vast majority of cases, 296 // extend the existing string to be able to hold the full 297 // result. 298 int next_length = 0; 299 if (has_next) { 300 next_length = mapping->get(next, 0, chars); 301 if (next_length == 0) next_length = 1; 302 } 303 int current_length = i + char_length + next_length; 304 while (stream.HasMore()) { 305 current = stream.GetNext(); 306 overflows |= ToUpperOverflows(current); 307 // NOTE: we use 0 as the next character here because, while 308 // the next character may affect what a character converts to, 309 // it does not in any case affect the length of what it convert 310 // to. 311 int char_length = mapping->get(current, 0, chars); 312 if (char_length == 0) char_length = 1; 313 current_length += char_length; 314 if (current_length > String::kMaxLength) { 315 AllowGarbageCollection allocate_error_and_return; 316 THROW_NEW_ERROR_RETURN_FAILURE(isolate, 317 NewInvalidStringLengthError()); 318 } 319 } 320 // Try again with the real length. Return signed if we need 321 // to allocate a two-byte string for to uppercase. 322 return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length) 323 : Smi::FromInt(current_length); 324 } else { 325 for (int j = 0; j < char_length; j++) { 326 result.Set(i, chars[j]); 327 i++; 328 } 329 has_changed_character = true; 330 } 331 current = next; 332 } 333 if (has_changed_character) { 334 return result; 335 } else { 336 // If we didn't actually change anything in doing the conversion 337 // we simple return the result and let the converted string 338 // become garbage; there is no reason to keep two identical strings 339 // alive. 340 return string; 341 } 342} 343 344template <class Converter> 345V8_WARN_UNUSED_RESULT static Object ConvertCase( 346 Handle<String> s, Isolate* isolate, 347 unibrow::Mapping<Converter, 128>* mapping) { 348 s = String::Flatten(isolate, s); 349 int length = s->length(); 350 // Assume that the string is not empty; we need this assumption later 351 if (length == 0) return *s; 352 353 // Simpler handling of ASCII strings. 354 // 355 // NOTE: This assumes that the upper/lower case of an ASCII 356 // character is also ASCII. This is currently the case, but it 357 // might break in the future if we implement more context and locale 358 // dependent upper/lower conversions. 359 if (String::IsOneByteRepresentationUnderneath(*s)) { 360 // Same length as input. 361 Handle<SeqOneByteString> result = 362 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); 363 DisallowGarbageCollection no_gc; 364 String::FlatContent flat_content = s->GetFlatContent(no_gc); 365 DCHECK(flat_content.IsFlat()); 366 bool has_changed_character = false; 367 int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>( 368 reinterpret_cast<char*>(result->GetChars(no_gc)), 369 reinterpret_cast<const char*>(flat_content.ToOneByteVector().begin()), 370 length, &has_changed_character); 371 // If not ASCII, we discard the result and take the 2 byte path. 372 if (index_to_first_unprocessed == length) 373 return has_changed_character ? *result : *s; 374 } 375 376 Handle<SeqString> result; // Same length as input. 377 if (s->IsOneByteRepresentation()) { 378 result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); 379 } else { 380 result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked(); 381 } 382 383 Object answer = ConvertCaseHelper(isolate, *s, *result, length, mapping); 384 if (answer.IsException(isolate) || answer.IsString()) return answer; 385 386 DCHECK(answer.IsSmi()); 387 length = Smi::ToInt(answer); 388 if (s->IsOneByteRepresentation() && length > 0) { 389 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 390 isolate, result, isolate->factory()->NewRawOneByteString(length)); 391 } else { 392 if (length < 0) length = -length; 393 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 394 isolate, result, isolate->factory()->NewRawTwoByteString(length)); 395 } 396 return ConvertCaseHelper(isolate, *s, *result, length, mapping); 397} 398 399} // namespace 400 401BUILTIN(StringPrototypeToLocaleLowerCase) { 402 HandleScope scope(isolate); 403 TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase"); 404 return ConvertCase(string, isolate, 405 isolate->runtime_state()->to_lower_mapping()); 406} 407 408BUILTIN(StringPrototypeToLocaleUpperCase) { 409 HandleScope scope(isolate); 410 TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase"); 411 return ConvertCase(string, isolate, 412 isolate->runtime_state()->to_upper_mapping()); 413} 414 415BUILTIN(StringPrototypeToLowerCase) { 416 HandleScope scope(isolate); 417 TO_THIS_STRING(string, "String.prototype.toLowerCase"); 418 return ConvertCase(string, isolate, 419 isolate->runtime_state()->to_lower_mapping()); 420} 421 422BUILTIN(StringPrototypeToUpperCase) { 423 HandleScope scope(isolate); 424 TO_THIS_STRING(string, "String.prototype.toUpperCase"); 425 return ConvertCase(string, isolate, 426 isolate->runtime_state()->to_upper_mapping()); 427} 428#endif // !V8_INTL_SUPPORT 429 430// ES6 #sec-string.prototype.raw 431BUILTIN(StringRaw) { 432 HandleScope scope(isolate); 433 Handle<Object> templ = args.atOrUndefined(isolate, 1); 434 const uint32_t argc = args.length(); 435 Handle<String> raw_string = 436 isolate->factory()->NewStringFromAsciiChecked("raw"); 437 438 Handle<Object> cooked; 439 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, cooked, 440 Object::ToObject(isolate, templ)); 441 442 Handle<Object> raw; 443 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 444 isolate, raw, Object::GetProperty(isolate, cooked, raw_string)); 445 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw, 446 Object::ToObject(isolate, raw)); 447 Handle<Object> raw_len; 448 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 449 isolate, raw_len, 450 Object::GetProperty(isolate, raw, isolate->factory()->length_string())); 451 452 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw_len, 453 Object::ToLength(isolate, raw_len)); 454 455 IncrementalStringBuilder result_builder(isolate); 456 // Intentional spec violation: we ignore {length} values >= 2^32, because 457 // assuming non-empty chunks they would generate too-long strings anyway. 458 const double raw_len_number = raw_len->Number(); 459 const uint32_t length = raw_len_number > std::numeric_limits<uint32_t>::max() 460 ? std::numeric_limits<uint32_t>::max() 461 : static_cast<uint32_t>(raw_len_number); 462 if (length > 0) { 463 Handle<Object> first_element; 464 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, first_element, 465 Object::GetElement(isolate, raw, 0)); 466 467 Handle<String> first_string; 468 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 469 isolate, first_string, Object::ToString(isolate, first_element)); 470 result_builder.AppendString(first_string); 471 472 for (uint32_t i = 1, arg_i = 2; i < length; i++, arg_i++) { 473 if (arg_i < argc) { 474 Handle<String> argument_string; 475 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 476 isolate, argument_string, 477 Object::ToString(isolate, args.at(arg_i))); 478 result_builder.AppendString(argument_string); 479 } 480 481 Handle<Object> element; 482 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element, 483 Object::GetElement(isolate, raw, i)); 484 485 Handle<String> element_string; 486 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element_string, 487 Object::ToString(isolate, element)); 488 result_builder.AppendString(element_string); 489 } 490 } 491 492 RETURN_RESULT_OR_FAILURE(isolate, result_builder.Finish()); 493} 494 495} // namespace internal 496} // namespace v8 497