1// Copyright 2013 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_INTL_SUPPORT
6#error Internationalization is expected to be enabled.
7#endif  // V8_INTL_SUPPORT
8
9#include "src/objects/intl-objects.h"
10
11#include <algorithm>
12#include <memory>
13#include <string>
14#include <vector>
15
16#include "src/api/api-inl.h"
17#include "src/base/strings.h"
18#include "src/date/date.h"
19#include "src/execution/isolate.h"
20#include "src/execution/local-isolate.h"
21#include "src/handles/global-handles.h"
22#include "src/heap/factory.h"
23#include "src/objects/js-collator-inl.h"
24#include "src/objects/js-date-time-format-inl.h"
25#include "src/objects/js-locale-inl.h"
26#include "src/objects/js-locale.h"
27#include "src/objects/js-number-format-inl.h"
28#include "src/objects/managed-inl.h"
29#include "src/objects/objects-inl.h"
30#include "src/objects/option-utils.h"
31#include "src/objects/property-descriptor.h"
32#include "src/objects/smi.h"
33#include "src/objects/string.h"
34#include "src/strings/string-case.h"
35#include "unicode/basictz.h"
36#include "unicode/brkiter.h"
37#include "unicode/calendar.h"
38#include "unicode/coll.h"
39#include "unicode/datefmt.h"
40#include "unicode/decimfmt.h"
41#include "unicode/formattedvalue.h"
42#include "unicode/localebuilder.h"
43#include "unicode/localematcher.h"
44#include "unicode/locid.h"
45#include "unicode/normalizer2.h"
46#include "unicode/numberformatter.h"
47#include "unicode/numfmt.h"
48#include "unicode/numsys.h"
49#include "unicode/timezone.h"
50#include "unicode/ures.h"
51#include "unicode/ustring.h"
52#include "unicode/uvernum.h"  // U_ICU_VERSION_MAJOR_NUM
53
54#define XSTR(s) STR(s)
55#define STR(s) #s
56static_assert(
57    V8_MINIMUM_ICU_VERSION <= U_ICU_VERSION_MAJOR_NUM,
58    "v8 is required to build with ICU " XSTR(V8_MINIMUM_ICU_VERSION) " and up");
59#undef STR
60#undef XSTR
61
62namespace v8 {
63namespace internal {
64
65namespace {
66
67constexpr uint8_t kToLower[256] = {
68    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
69    0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
70    0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
71    0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
72    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
73    0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
74    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
75    0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
76    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
77    0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
78    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
79    0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
80    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
81    0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
82    0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
83    0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
84    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
85    0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
86    0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
87    0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
88    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
89    0xFC, 0xFD, 0xFE, 0xFF,
90};
91
92inline constexpr uint16_t ToLatin1Lower(uint16_t ch) {
93  return static_cast<uint16_t>(kToLower[ch]);
94}
95
96// Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
97inline constexpr uint16_t ToLatin1Upper(uint16_t ch) {
98  DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
99  return ch &
100         ~((IsAsciiLower(ch) || (((ch & 0xE0) == 0xE0) && ch != 0xF7)) << 5);
101}
102
103template <typename Char>
104bool ToUpperFastASCII(const base::Vector<const Char>& src,
105                      Handle<SeqOneByteString> result) {
106  // Do a faster loop for the case where all the characters are ASCII.
107  uint16_t ored = 0;
108  int32_t index = 0;
109  for (auto it = src.begin(); it != src.end(); ++it) {
110    uint16_t ch = static_cast<uint16_t>(*it);
111    ored |= ch;
112    result->SeqOneByteStringSet(index++, ToAsciiUpper(ch));
113  }
114  return !(ored & ~0x7F);
115}
116
117const uint16_t sharp_s = 0xDF;
118
119template <typename Char>
120bool ToUpperOneByte(const base::Vector<const Char>& src, uint8_t* dest,
121                    int* sharp_s_count) {
122  // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
123
124  // There are two special cases.
125  //  1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
126  //  2. Lower case sharp-S converts to "SS" (two characters)
127  *sharp_s_count = 0;
128  for (auto it = src.begin(); it != src.end(); ++it) {
129    uint16_t ch = static_cast<uint16_t>(*it);
130    if (V8_UNLIKELY(ch == sharp_s)) {
131      ++(*sharp_s_count);
132      continue;
133    }
134    if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
135      // Since this upper-cased character does not fit in an 8-bit string, we
136      // need to take the 16-bit path.
137      return false;
138    }
139    *dest++ = ToLatin1Upper(ch);
140  }
141
142  return true;
143}
144
145template <typename Char>
146void ToUpperWithSharpS(const base::Vector<const Char>& src,
147                       Handle<SeqOneByteString> result) {
148  int32_t dest_index = 0;
149  for (auto it = src.begin(); it != src.end(); ++it) {
150    uint16_t ch = static_cast<uint16_t>(*it);
151    if (ch == sharp_s) {
152      result->SeqOneByteStringSet(dest_index++, 'S');
153      result->SeqOneByteStringSet(dest_index++, 'S');
154    } else {
155      result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
156    }
157  }
158}
159
160inline int FindFirstUpperOrNonAscii(String s, int length) {
161  for (int index = 0; index < length; ++index) {
162    uint16_t ch = s.Get(index);
163    if (V8_UNLIKELY(IsAsciiUpper(ch) || ch & ~0x7F)) {
164      return index;
165    }
166  }
167  return length;
168}
169
170const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
171                                    std::unique_ptr<base::uc16[]>* dest,
172                                    int32_t length) {
173  DCHECK(flat.IsFlat());
174  if (flat.IsOneByte()) {
175    if (!*dest) {
176      dest->reset(NewArray<base::uc16>(length));
177      CopyChars(dest->get(), flat.ToOneByteVector().begin(), length);
178    }
179    return reinterpret_cast<const UChar*>(dest->get());
180  } else {
181    return reinterpret_cast<const UChar*>(flat.ToUC16Vector().begin());
182  }
183}
184
185template <typename T>
186MaybeHandle<T> New(Isolate* isolate, Handle<JSFunction> constructor,
187                   Handle<Object> locales, Handle<Object> options,
188                   const char* method_name) {
189  Handle<Map> map;
190  ASSIGN_RETURN_ON_EXCEPTION(
191      isolate, map,
192      JSFunction::GetDerivedMap(isolate, constructor, constructor), T);
193  return T::New(isolate, map, locales, options, method_name);
194}
195}  // namespace
196
197const uint8_t* Intl::ToLatin1LowerTable() { return &kToLower[0]; }
198
199icu::UnicodeString Intl::ToICUUnicodeString(Isolate* isolate,
200                                            Handle<String> string, int offset) {
201  DCHECK(string->IsFlat());
202  DisallowGarbageCollection no_gc;
203  std::unique_ptr<base::uc16[]> sap;
204  // Short one-byte strings can be expanded on the stack to avoid allocating a
205  // temporary buffer.
206  constexpr int kShortStringSize = 80;
207  UChar short_string_buffer[kShortStringSize];
208  const UChar* uchar_buffer = nullptr;
209  const String::FlatContent& flat = string->GetFlatContent(no_gc);
210  int32_t length = string->length();
211  DCHECK_LE(offset, length);
212  if (flat.IsOneByte() && length <= kShortStringSize) {
213    CopyChars(short_string_buffer, flat.ToOneByteVector().begin(), length);
214    uchar_buffer = short_string_buffer;
215  } else {
216    uchar_buffer = GetUCharBufferFromFlat(flat, &sap, length);
217  }
218  return icu::UnicodeString(uchar_buffer + offset, length - offset);
219}
220
221namespace {
222
223icu::StringPiece ToICUStringPiece(Isolate* isolate, Handle<String> string,
224                                  int offset = 0) {
225  DCHECK(string->IsFlat());
226  DisallowGarbageCollection no_gc;
227
228  const String::FlatContent& flat = string->GetFlatContent(no_gc);
229  if (!flat.IsOneByte()) return icu::StringPiece();
230
231  int32_t length = string->length();
232  DCHECK_LT(offset, length);
233  const char* char_buffer =
234      reinterpret_cast<const char*>(flat.ToOneByteVector().begin());
235  if (!String::IsAscii(char_buffer, length)) {
236    return icu::StringPiece();
237  }
238
239  return icu::StringPiece(char_buffer + offset, length - offset);
240}
241
242MaybeHandle<String> LocaleConvertCase(Isolate* isolate, Handle<String> s,
243                                      bool is_to_upper, const char* lang) {
244  auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
245  int32_t src_length = s->length();
246  int32_t dest_length = src_length;
247  UErrorCode status;
248  Handle<SeqTwoByteString> result;
249  std::unique_ptr<base::uc16[]> sap;
250
251  if (dest_length == 0) return ReadOnlyRoots(isolate).empty_string_handle();
252
253  // This is not a real loop. It'll be executed only once (no overflow) or
254  // twice (overflow).
255  for (int i = 0; i < 2; ++i) {
256    // Case conversion can increase the string length (e.g. sharp-S => SS) so
257    // that we have to handle RangeError exceptions here.
258    ASSIGN_RETURN_ON_EXCEPTION(
259        isolate, result, isolate->factory()->NewRawTwoByteString(dest_length),
260        String);
261    DisallowGarbageCollection no_gc;
262    DCHECK(s->IsFlat());
263    String::FlatContent flat = s->GetFlatContent(no_gc);
264    const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
265    status = U_ZERO_ERROR;
266    dest_length =
267        case_converter(reinterpret_cast<UChar*>(result->GetChars(no_gc)),
268                       dest_length, src, src_length, lang, &status);
269    if (status != U_BUFFER_OVERFLOW_ERROR) break;
270  }
271
272  // In most cases, the output will fill the destination buffer completely
273  // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
274  // Only in rare cases, it'll be shorter than the destination buffer and
275  // |result| has to be truncated.
276  DCHECK(U_SUCCESS(status));
277  if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
278    DCHECK(dest_length == result->length());
279    return result;
280  }
281  DCHECK(dest_length < result->length());
282  return SeqString::Truncate(result, dest_length);
283}
284
285}  // namespace
286
287// A stripped-down version of ConvertToLower that can only handle flat one-byte
288// strings and does not allocate. Note that {src} could still be, e.g., a
289// one-byte sliced string with a two-byte parent string.
290// Called from TF builtins.
291String Intl::ConvertOneByteToLower(String src, String dst) {
292  DCHECK_EQ(src.length(), dst.length());
293  DCHECK(src.IsOneByteRepresentation());
294  DCHECK(src.IsFlat());
295  DCHECK(dst.IsSeqOneByteString());
296
297  DisallowGarbageCollection no_gc;
298
299  const int length = src.length();
300  String::FlatContent src_flat = src.GetFlatContent(no_gc);
301  uint8_t* dst_data = SeqOneByteString::cast(dst).GetChars(no_gc);
302
303  if (src_flat.IsOneByte()) {
304    const uint8_t* src_data = src_flat.ToOneByteVector().begin();
305
306    bool has_changed_character = false;
307    int index_to_first_unprocessed =
308        FastAsciiConvert<true>(reinterpret_cast<char*>(dst_data),
309                               reinterpret_cast<const char*>(src_data), length,
310                               &has_changed_character);
311
312    if (index_to_first_unprocessed == length) {
313      return has_changed_character ? dst : src;
314    }
315
316    // If not ASCII, we keep the result up to index_to_first_unprocessed and
317    // process the rest.
318    for (int index = index_to_first_unprocessed; index < length; ++index) {
319      dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
320    }
321  } else {
322    DCHECK(src_flat.IsTwoByte());
323    int index_to_first_unprocessed = FindFirstUpperOrNonAscii(src, length);
324    if (index_to_first_unprocessed == length) return src;
325
326    const uint16_t* src_data = src_flat.ToUC16Vector().begin();
327    CopyChars(dst_data, src_data, index_to_first_unprocessed);
328    for (int index = index_to_first_unprocessed; index < length; ++index) {
329      dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
330    }
331  }
332
333  return dst;
334}
335
336MaybeHandle<String> Intl::ConvertToLower(Isolate* isolate, Handle<String> s) {
337  if (!s->IsOneByteRepresentation()) {
338    // Use a slower implementation for strings with characters beyond U+00FF.
339    return LocaleConvertCase(isolate, s, false, "");
340  }
341
342  int length = s->length();
343
344  // We depend here on the invariant that the length of a Latin1
345  // string is invariant under ToLowerCase, and the result always
346  // fits in the Latin1 range in the *root locale*. It does not hold
347  // for ToUpperCase even in the root locale.
348
349  // Scan the string for uppercase and non-ASCII characters for strings
350  // shorter than a machine-word without any memory allocation overhead.
351  // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
352  // to two parts, one for scanning the prefix with no change and the other for
353  // handling ASCII-only characters.
354
355  bool is_short = length < static_cast<int>(sizeof(uintptr_t));
356  if (is_short) {
357    bool is_lower_ascii = FindFirstUpperOrNonAscii(*s, length) == length;
358    if (is_lower_ascii) return s;
359  }
360
361  Handle<SeqOneByteString> result =
362      isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
363
364  return Handle<String>(Intl::ConvertOneByteToLower(*s, *result), isolate);
365}
366
367MaybeHandle<String> Intl::ConvertToUpper(Isolate* isolate, Handle<String> s) {
368  int32_t length = s->length();
369  if (s->IsOneByteRepresentation() && length > 0) {
370    Handle<SeqOneByteString> result =
371        isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
372
373    DCHECK(s->IsFlat());
374    int sharp_s_count;
375    bool is_result_single_byte;
376    {
377      DisallowGarbageCollection no_gc;
378      String::FlatContent flat = s->GetFlatContent(no_gc);
379      uint8_t* dest = result->GetChars(no_gc);
380      if (flat.IsOneByte()) {
381        base::Vector<const uint8_t> src = flat.ToOneByteVector();
382        bool has_changed_character = false;
383        int index_to_first_unprocessed = FastAsciiConvert<false>(
384            reinterpret_cast<char*>(result->GetChars(no_gc)),
385            reinterpret_cast<const char*>(src.begin()), length,
386            &has_changed_character);
387        if (index_to_first_unprocessed == length) {
388          return has_changed_character ? result : s;
389        }
390        // If not ASCII, we keep the result up to index_to_first_unprocessed and
391        // process the rest.
392        is_result_single_byte =
393            ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
394                           dest + index_to_first_unprocessed, &sharp_s_count);
395      } else {
396        DCHECK(flat.IsTwoByte());
397        base::Vector<const uint16_t> src = flat.ToUC16Vector();
398        if (ToUpperFastASCII(src, result)) return result;
399        is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
400      }
401    }
402
403    // Go to the full Unicode path if there are characters whose uppercase
404    // is beyond the Latin-1 range (cannot be represented in OneByteString).
405    if (V8_UNLIKELY(!is_result_single_byte)) {
406      return LocaleConvertCase(isolate, s, true, "");
407    }
408
409    if (sharp_s_count == 0) return result;
410
411    // We have sharp_s_count sharp-s characters, but the result is still
412    // in the Latin-1 range.
413    ASSIGN_RETURN_ON_EXCEPTION(
414        isolate, result,
415        isolate->factory()->NewRawOneByteString(length + sharp_s_count),
416        String);
417    DisallowGarbageCollection no_gc;
418    String::FlatContent flat = s->GetFlatContent(no_gc);
419    if (flat.IsOneByte()) {
420      ToUpperWithSharpS(flat.ToOneByteVector(), result);
421    } else {
422      ToUpperWithSharpS(flat.ToUC16Vector(), result);
423    }
424
425    return result;
426  }
427
428  return LocaleConvertCase(isolate, s, true, "");
429}
430
431std::string Intl::GetNumberingSystem(const icu::Locale& icu_locale) {
432  // Ugly hack. ICU doesn't expose numbering system in any way, so we have
433  // to assume that for given locale NumberingSystem constructor produces the
434  // same digits as NumberFormat/Calendar would.
435  UErrorCode status = U_ZERO_ERROR;
436  std::unique_ptr<icu::NumberingSystem> numbering_system(
437      icu::NumberingSystem::createInstance(icu_locale, status));
438  if (U_SUCCESS(status) && !numbering_system->isAlgorithmic()) {
439    return numbering_system->getName();
440  }
441  return "latn";
442}
443
444namespace {
445
446Maybe<icu::Locale> CreateICULocale(const std::string& bcp47_locale) {
447  DisallowGarbageCollection no_gc;
448
449  // Convert BCP47 into ICU locale format.
450  UErrorCode status = U_ZERO_ERROR;
451
452  icu::Locale icu_locale = icu::Locale::forLanguageTag(bcp47_locale, status);
453  DCHECK(U_SUCCESS(status));
454  if (icu_locale.isBogus()) {
455    return Nothing<icu::Locale>();
456  }
457
458  return Just(icu_locale);
459}
460
461}  // anonymous namespace
462
463// static
464
465MaybeHandle<String> Intl::ToString(Isolate* isolate,
466                                   const icu::UnicodeString& string) {
467  return isolate->factory()->NewStringFromTwoByte(base::Vector<const uint16_t>(
468      reinterpret_cast<const uint16_t*>(string.getBuffer()), string.length()));
469}
470
471MaybeHandle<String> Intl::ToString(Isolate* isolate,
472                                   const icu::UnicodeString& string,
473                                   int32_t begin, int32_t end) {
474  return Intl::ToString(isolate, string.tempSubStringBetween(begin, end));
475}
476
477namespace {
478
479Handle<JSObject> InnerAddElement(Isolate* isolate, Handle<JSArray> array,
480                                 int index, Handle<String> field_type_string,
481                                 Handle<String> value) {
482  // let element = $array[$index] = {
483  //   type: $field_type_string,
484  //   value: $value
485  // }
486  // return element;
487  Factory* factory = isolate->factory();
488  Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
489  JSObject::AddProperty(isolate, element, factory->type_string(),
490                        field_type_string, NONE);
491
492  JSObject::AddProperty(isolate, element, factory->value_string(), value, NONE);
493  // TODO(victorgomes): Temporarily forcing a fatal error here in case of
494  // overflow, until Intl::AddElement can handle exceptions.
495  if (JSObject::AddDataElement(array, index, element, NONE).IsNothing()) {
496    FATAL("Fatal JavaScript invalid size error when adding element");
497    UNREACHABLE();
498  }
499  return element;
500}
501
502}  // namespace
503
504void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
505                      Handle<String> field_type_string, Handle<String> value) {
506  // Same as $array[$index] = {type: $field_type_string, value: $value};
507  InnerAddElement(isolate, array, index, field_type_string, value);
508}
509
510void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
511                      Handle<String> field_type_string, Handle<String> value,
512                      Handle<String> additional_property_name,
513                      Handle<String> additional_property_value) {
514  // Same as $array[$index] = {
515  //   type: $field_type_string, value: $value,
516  //   $additional_property_name: $additional_property_value
517  // }
518  Handle<JSObject> element =
519      InnerAddElement(isolate, array, index, field_type_string, value);
520  JSObject::AddProperty(isolate, element, additional_property_name,
521                        additional_property_value, NONE);
522}
523
524namespace {
525
526// Build the shortened locale; eg, convert xx_Yyyy_ZZ  to xx_ZZ.
527//
528// If locale has a script tag then return true and the locale without the
529// script else return false and an empty string.
530bool RemoveLocaleScriptTag(const std::string& icu_locale,
531                           std::string* locale_less_script) {
532  icu::Locale new_locale = icu::Locale::createCanonical(icu_locale.c_str());
533  const char* icu_script = new_locale.getScript();
534  if (icu_script == nullptr || strlen(icu_script) == 0) {
535    *locale_less_script = std::string();
536    return false;
537  }
538
539  const char* icu_language = new_locale.getLanguage();
540  const char* icu_country = new_locale.getCountry();
541  icu::Locale short_locale = icu::Locale(icu_language, icu_country);
542  *locale_less_script = short_locale.getName();
543  return true;
544}
545
546bool ValidateResource(const icu::Locale locale, const char* path,
547                      const char* key) {
548  bool result = false;
549  UErrorCode status = U_ZERO_ERROR;
550  UResourceBundle* bundle = ures_open(path, locale.getName(), &status);
551  if (bundle != nullptr && status == U_ZERO_ERROR) {
552    if (key == nullptr) {
553      result = true;
554    } else {
555      UResourceBundle* key_bundle =
556          ures_getByKey(bundle, key, nullptr, &status);
557      result = key_bundle != nullptr && (status == U_ZERO_ERROR);
558      ures_close(key_bundle);
559    }
560  }
561  ures_close(bundle);
562  if (!result) {
563    if ((locale.getCountry()[0] != '\0') && (locale.getScript()[0] != '\0')) {
564      // Fallback to try without country.
565      std::string without_country(locale.getLanguage());
566      without_country = without_country.append("-").append(locale.getScript());
567      return ValidateResource(without_country.c_str(), path, key);
568    } else if ((locale.getCountry()[0] != '\0') ||
569               (locale.getScript()[0] != '\0')) {
570      // Fallback to try with only language.
571      std::string language(locale.getLanguage());
572      return ValidateResource(language.c_str(), path, key);
573    }
574  }
575  return result;
576}
577
578}  // namespace
579
580std::set<std::string> Intl::BuildLocaleSet(
581    const std::vector<std::string>& icu_available_locales, const char* path,
582    const char* validate_key) {
583  std::set<std::string> locales;
584  for (const std::string& locale : icu_available_locales) {
585    if (path != nullptr || validate_key != nullptr) {
586      if (!ValidateResource(icu::Locale(locale.c_str()), path, validate_key)) {
587        // FIXME(chromium:1215606) Find a beter fix for nb->no fallback
588        if (locale != "nb") {
589          continue;
590        }
591        // Try no for nb
592        if (!ValidateResource(icu::Locale("no"), path, validate_key)) {
593          continue;
594        }
595      }
596    }
597    locales.insert(locale);
598    std::string shortened_locale;
599    if (RemoveLocaleScriptTag(locale, &shortened_locale)) {
600      std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-');
601      locales.insert(shortened_locale);
602    }
603  }
604  return locales;
605}
606
607Maybe<std::string> Intl::ToLanguageTag(const icu::Locale& locale) {
608  UErrorCode status = U_ZERO_ERROR;
609  std::string res = locale.toLanguageTag<std::string>(status);
610  if (U_FAILURE(status)) {
611    return Nothing<std::string>();
612  }
613  DCHECK(U_SUCCESS(status));
614  return Just(res);
615}
616
617// See ecma402/#legacy-constructor.
618MaybeHandle<Object> Intl::LegacyUnwrapReceiver(Isolate* isolate,
619                                               Handle<JSReceiver> receiver,
620                                               Handle<JSFunction> constructor,
621                                               bool has_initialized_slot) {
622  Handle<Object> obj_ordinary_has_instance;
623  ASSIGN_RETURN_ON_EXCEPTION(
624      isolate, obj_ordinary_has_instance,
625      Object::OrdinaryHasInstance(isolate, constructor, receiver), Object);
626  bool ordinary_has_instance = obj_ordinary_has_instance->BooleanValue(isolate);
627
628  // 2. If receiver does not have an [[Initialized...]] internal slot
629  //    and ? OrdinaryHasInstance(constructor, receiver) is true, then
630  if (!has_initialized_slot && ordinary_has_instance) {
631    // 2. a. Let new_receiver be ? Get(receiver, %Intl%.[[FallbackSymbol]]).
632    Handle<Object> new_receiver;
633    ASSIGN_RETURN_ON_EXCEPTION(
634        isolate, new_receiver,
635        JSReceiver::GetProperty(isolate, receiver,
636                                isolate->factory()->intl_fallback_symbol()),
637        Object);
638    return new_receiver;
639  }
640
641  return receiver;
642}
643
644namespace {
645
646bool IsTwoLetterLanguage(const std::string& locale) {
647  // Two letters, both in range 'a'-'z'...
648  return locale.length() == 2 && IsAsciiLower(locale[0]) &&
649         IsAsciiLower(locale[1]);
650}
651
652bool IsDeprecatedOrLegacyLanguage(const std::string& locale) {
653  //  Check if locale is one of the deprecated language tags:
654  return locale == "in" || locale == "iw" || locale == "ji" || locale == "jw" ||
655         locale == "mo" ||
656         //  Check if locale is one of the legacy language tags:
657         locale == "sh" || locale == "tl" || locale == "no";
658}
659
660bool IsStructurallyValidLanguageTag(const std::string& tag) {
661  return JSLocale::StartsWithUnicodeLanguageId(tag);
662}
663
664// Canonicalize the locale.
665// https://tc39.github.io/ecma402/#sec-canonicalizelanguagetag,
666// including type check and structural validity check.
667Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
668                                           const std::string& locale_in) {
669  std::string locale = locale_in;
670
671  if (locale.length() == 0 ||
672      !String::IsAscii(locale.data(), static_cast<int>(locale.length()))) {
673    THROW_NEW_ERROR_RETURN_VALUE(
674        isolate,
675        NewRangeError(
676            MessageTemplate::kInvalidLanguageTag,
677            isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
678        Nothing<std::string>());
679  }
680
681  // Optimize for the most common case: a 2-letter language code in the
682  // canonical form/lowercase that is not one of the deprecated codes
683  // (in, iw, ji, jw). Don't check for ~70 of 3-letter deprecated language
684  // codes. Instead, let them be handled by ICU in the slow path. However,
685  // fast-track 'fil' (3-letter canonical code).
686  if ((IsTwoLetterLanguage(locale) && !IsDeprecatedOrLegacyLanguage(locale)) ||
687      locale == "fil") {
688    return Just(locale);
689  }
690
691  // Because per BCP 47 2.1.1 language tags are case-insensitive, lowercase
692  // the input before any more check.
693  std::transform(locale.begin(), locale.end(), locale.begin(), ToAsciiLower);
694
695  // // ECMA 402 6.2.3
696  // TODO(jshin): uloc_{for,to}TanguageTag can fail even for a structually valid
697  // language tag if it's too long (much longer than 100 chars). Even if we
698  // allocate a longer buffer, ICU will still fail if it's too long. Either
699  // propose to Ecma 402 to put a limit on the locale length or change ICU to
700  // handle long locale names better. See
701  // https://unicode-org.atlassian.net/browse/ICU-13417
702  UErrorCode error = U_ZERO_ERROR;
703  // uloc_forLanguageTag checks the structrual validity. If the input BCP47
704  // language tag is parsed all the way to the end, it indicates that the input
705  // is structurally valid. Due to a couple of bugs, we can't use it
706  // without Chromium patches or ICU 62 or earlier.
707  icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error);
708
709  if (U_FAILURE(error) || icu_locale.isBogus()) {
710    THROW_NEW_ERROR_RETURN_VALUE(
711        isolate,
712        NewRangeError(
713            MessageTemplate::kInvalidLanguageTag,
714            isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
715        Nothing<std::string>());
716  }
717
718  // Use LocaleBuilder to validate locale.
719  icu_locale = icu::LocaleBuilder().setLocale(icu_locale).build(error);
720  icu_locale.canonicalize(error);
721  if (U_FAILURE(error) || icu_locale.isBogus()) {
722    THROW_NEW_ERROR_RETURN_VALUE(
723        isolate,
724        NewRangeError(
725            MessageTemplate::kInvalidLanguageTag,
726            isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
727        Nothing<std::string>());
728  }
729  Maybe<std::string> maybe_to_language_tag = Intl::ToLanguageTag(icu_locale);
730  if (maybe_to_language_tag.IsNothing()) {
731    THROW_NEW_ERROR_RETURN_VALUE(
732        isolate,
733        NewRangeError(
734            MessageTemplate::kInvalidLanguageTag,
735            isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
736        Nothing<std::string>());
737  }
738
739  return maybe_to_language_tag;
740}
741
742Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
743                                           Handle<Object> locale_in) {
744  Handle<String> locale_str;
745  // This does part of the validity checking spec'ed in CanonicalizeLocaleList:
746  // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
747  // exception.
748  // 7c iii. Let tag be ? ToString(kValue).
749  // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
750  // RangeError exception.
751
752  if (locale_in->IsString()) {
753    locale_str = Handle<String>::cast(locale_in);
754  } else if (locale_in->IsJSReceiver()) {
755    ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, locale_str,
756                                     Object::ToString(isolate, locale_in),
757                                     Nothing<std::string>());
758  } else {
759    THROW_NEW_ERROR_RETURN_VALUE(isolate,
760                                 NewTypeError(MessageTemplate::kLanguageID),
761                                 Nothing<std::string>());
762  }
763  std::string locale(locale_str->ToCString().get());
764
765  if (!IsStructurallyValidLanguageTag(locale)) {
766    THROW_NEW_ERROR_RETURN_VALUE(
767        isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
768        Nothing<std::string>());
769  }
770  return CanonicalizeLanguageTag(isolate, locale);
771}
772
773}  // anonymous namespace
774
775Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
776    Isolate* isolate, Handle<Object> locales, bool only_return_one_result) {
777  // 1. If locales is undefined, then
778  if (locales->IsUndefined(isolate)) {
779    // 1a. Return a new empty List.
780    return Just(std::vector<std::string>());
781  }
782  // 2. Let seen be a new empty List.
783  std::vector<std::string> seen;
784  // 3. If Type(locales) is String or locales has an [[InitializedLocale]]
785  // internal slot,  then
786  if (locales->IsJSLocale()) {
787    // Since this value came from JSLocale, which is already went though the
788    // CanonializeLanguageTag process once, therefore there are no need to
789    // call CanonializeLanguageTag again.
790    seen.push_back(JSLocale::ToString(Handle<JSLocale>::cast(locales)));
791    return Just(seen);
792  }
793  if (locales->IsString()) {
794    // 3a. Let O be CreateArrayFromList(« locales »).
795    // Instead of creating a one-element array and then iterating over it,
796    // we inline the body of the iteration:
797    std::string canonicalized_tag;
798    if (!CanonicalizeLanguageTag(isolate, locales).To(&canonicalized_tag)) {
799      return Nothing<std::vector<std::string>>();
800    }
801    seen.push_back(canonicalized_tag);
802    return Just(seen);
803  }
804  // 4. Else,
805  // 4a. Let O be ? ToObject(locales).
806  Handle<JSReceiver> o;
807  ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, o,
808                                   Object::ToObject(isolate, locales),
809                                   Nothing<std::vector<std::string>>());
810  // 5. Let len be ? ToLength(? Get(O, "length")).
811  Handle<Object> length_obj;
812  ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, length_obj,
813                                   Object::GetLengthFromArrayLike(isolate, o),
814                                   Nothing<std::vector<std::string>>());
815  // TODO(jkummerow): Spec violation: strictly speaking, we have to iterate
816  // up to 2^53-1 if {length_obj} says so. Since cases above 2^32 probably
817  // don't happen in practice (and would be very slow if they do), we'll keep
818  // the code simple for now by using a saturating to-uint32 conversion.
819  double raw_length = length_obj->Number();
820  uint32_t len =
821      raw_length >= kMaxUInt32 ? kMaxUInt32 : static_cast<uint32_t>(raw_length);
822  // 6. Let k be 0.
823  // 7. Repeat, while k < len
824  for (uint32_t k = 0; k < len; k++) {
825    // 7a. Let Pk be ToString(k).
826    // 7b. Let kPresent be ? HasProperty(O, Pk).
827    LookupIterator it(isolate, o, k);
828    Maybe<bool> maybe_found = JSReceiver::HasProperty(&it);
829    MAYBE_RETURN(maybe_found, Nothing<std::vector<std::string>>());
830    // 7c. If kPresent is true, then
831    if (!maybe_found.FromJust()) continue;
832    // 7c i. Let kValue be ? Get(O, Pk).
833    Handle<Object> k_value;
834    ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, k_value, Object::GetProperty(&it),
835                                     Nothing<std::vector<std::string>>());
836    // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
837    // exception.
838    // 7c iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]]
839    // internal slot, then
840    std::string canonicalized_tag;
841    if (k_value->IsJSLocale()) {
842      // 7c iii. 1. Let tag be kValue.[[Locale]].
843      canonicalized_tag = JSLocale::ToString(Handle<JSLocale>::cast(k_value));
844      // 7c iv. Else,
845    } else {
846      // 7c iv 1. Let tag be ? ToString(kValue).
847      // 7c v. If IsStructurallyValidLanguageTag(tag) is false, throw a
848      // RangeError exception.
849      // 7c vi. Let canonicalizedTag be CanonicalizeLanguageTag(tag).
850      if (!CanonicalizeLanguageTag(isolate, k_value).To(&canonicalized_tag)) {
851        return Nothing<std::vector<std::string>>();
852      }
853    }
854    // 7c vi. If canonicalizedTag is not an element of seen, append
855    // canonicalizedTag as the last element of seen.
856    if (std::find(seen.begin(), seen.end(), canonicalized_tag) == seen.end()) {
857      seen.push_back(canonicalized_tag);
858    }
859    // 7d. Increase k by 1. (See loop header.)
860    // Optimization: some callers only need one result.
861    if (only_return_one_result) return Just(seen);
862  }
863  // 8. Return seen.
864  return Just(seen);
865}
866
867// ecma402 #sup-string.prototype.tolocalelowercase
868// ecma402 #sup-string.prototype.tolocaleuppercase
869MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
870                                                  Handle<String> s,
871                                                  bool to_upper,
872                                                  Handle<Object> locales) {
873  std::vector<std::string> requested_locales;
874  if (!CanonicalizeLocaleList(isolate, locales, true).To(&requested_locales)) {
875    return MaybeHandle<String>();
876  }
877  std::string requested_locale = requested_locales.size() == 0
878                                     ? isolate->DefaultLocale()
879                                     : requested_locales[0];
880  size_t dash = requested_locale.find('-');
881  if (dash != std::string::npos) {
882    requested_locale = requested_locale.substr(0, dash);
883  }
884
885  // Primary language tag can be up to 8 characters long in theory.
886  // https://tools.ietf.org/html/bcp47#section-2.2.1
887  DCHECK_LE(requested_locale.length(), 8);
888  s = String::Flatten(isolate, s);
889
890  // All the languages requiring special-handling have two-letter codes.
891  // Note that we have to check for '!= 2' here because private-use language
892  // tags (x-foo) or grandfathered irregular tags (e.g. i-enochian) would have
893  // only 'x' or 'i' when they get here.
894  if (V8_UNLIKELY(requested_locale.length() != 2)) {
895    if (to_upper) {
896      return ConvertToUpper(isolate, s);
897    }
898    return ConvertToLower(isolate, s);
899  }
900  // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
901  // in the root locale needs to be adjusted for az, lt and tr because even case
902  // mapping of ASCII range characters are different in those locales.
903  // Greek (el) does not require any adjustment.
904  if (V8_UNLIKELY((requested_locale == "tr") || (requested_locale == "el") ||
905                  (requested_locale == "lt") || (requested_locale == "az"))) {
906    return LocaleConvertCase(isolate, s, to_upper, requested_locale.c_str());
907  } else {
908    if (to_upper) {
909      return ConvertToUpper(isolate, s);
910    }
911    return ConvertToLower(isolate, s);
912  }
913}
914
915// static
916template <class IsolateT>
917Intl::CompareStringsOptions Intl::CompareStringsOptionsFor(
918    IsolateT* isolate, Handle<Object> locales, Handle<Object> options) {
919  if (!options->IsUndefined(isolate)) {
920    return CompareStringsOptions::kNone;
921  }
922
923  // Lists all of the available locales that are statically known to fulfill
924  // fast path conditions. See the StringLocaleCompareFastPath test as a
925  // starting point to update this list.
926  //
927  // Locale entries are roughly sorted s.t. common locales come first.
928  //
929  // The actual conditions are verified in debug builds in
930  // CollatorAllowsFastComparison.
931  static const char* const kFastLocales[] = {
932      "en-US", "en", "fr", "es",    "de",    "pt",    "it", "ca",
933      "de-AT", "fi", "id", "id-ID", "ms",    "nl",    "pl", "ro",
934      "sl",    "sv", "sw", "vi",    "en-DE", "en-GB",
935  };
936
937  if (locales->IsUndefined(isolate)) {
938    const std::string& default_locale = isolate->DefaultLocale();
939    for (const char* fast_locale : kFastLocales) {
940      if (strcmp(fast_locale, default_locale.c_str()) == 0) {
941        return CompareStringsOptions::kTryFastPath;
942      }
943    }
944
945    return CompareStringsOptions::kNone;
946  }
947
948  if (!locales->IsString()) return CompareStringsOptions::kNone;
949
950  Handle<String> locales_string = Handle<String>::cast(locales);
951  for (const char* fast_locale : kFastLocales) {
952    if (locales_string->IsEqualTo(base::CStrVector(fast_locale), isolate)) {
953      return CompareStringsOptions::kTryFastPath;
954    }
955  }
956
957  return CompareStringsOptions::kNone;
958}
959
960// Instantiations.
961template Intl::CompareStringsOptions Intl::CompareStringsOptionsFor(
962    Isolate*, Handle<Object>, Handle<Object>);
963template Intl::CompareStringsOptions Intl::CompareStringsOptionsFor(
964    LocalIsolate*, Handle<Object>, Handle<Object>);
965
966base::Optional<int> Intl::StringLocaleCompare(
967    Isolate* isolate, Handle<String> string1, Handle<String> string2,
968    Handle<Object> locales, Handle<Object> options, const char* method_name) {
969  // We only cache the instance when locales is a string/undefined and
970  // options is undefined, as that is the only case when the specified
971  // side-effects of examining those arguments are unobservable.
972  const bool can_cache =
973      (locales->IsString() || locales->IsUndefined(isolate)) &&
974      options->IsUndefined(isolate);
975  // We may be able to take the fast path, depending on the `locales` and
976  // `options` arguments.
977  const CompareStringsOptions compare_strings_options =
978      CompareStringsOptionsFor(isolate, locales, options);
979  if (can_cache) {
980    // Both locales and options are undefined, check the cache.
981    icu::Collator* cached_icu_collator =
982        static_cast<icu::Collator*>(isolate->get_cached_icu_object(
983            Isolate::ICUObjectCacheType::kDefaultCollator, locales));
984    // We may use the cached icu::Collator for a fast path.
985    if (cached_icu_collator != nullptr) {
986      return Intl::CompareStrings(isolate, *cached_icu_collator, string1,
987                                  string2, compare_strings_options);
988    }
989  }
990
991  Handle<JSFunction> constructor = Handle<JSFunction>(
992      JSFunction::cast(
993          isolate->context().native_context().intl_collator_function()),
994      isolate);
995
996  Handle<JSCollator> collator;
997  MaybeHandle<JSCollator> maybe_collator =
998      New<JSCollator>(isolate, constructor, locales, options, method_name);
999  if (!maybe_collator.ToHandle(&collator)) return {};
1000  if (can_cache) {
1001    isolate->set_icu_object_in_cache(
1002        Isolate::ICUObjectCacheType::kDefaultCollator, locales,
1003        std::static_pointer_cast<icu::UMemory>(collator->icu_collator().get()));
1004  }
1005  icu::Collator* icu_collator = collator->icu_collator().raw();
1006  return Intl::CompareStrings(isolate, *icu_collator, string1, string2,
1007                              compare_strings_options);
1008}
1009
1010namespace {
1011
1012// Weights for the Unicode Collation Algorithm for charcodes [0x00,0x7F].
1013// https://unicode.org/reports/tr10/.
1014//
1015// Generated from:
1016//
1017// $ wget http://www.unicode.org/Public/UCA/latest/allkeys.txt
1018// $ cat ~/allkeys.txt | grep '^00[0-7].  ;' | sort | sed 's/[*.]/ /g' |\
1019//   sed 's/.*\[ \(.*\)\].*/\1/' | python ~/gen_weights.py
1020//
1021// Where gen_weights.py does an ordinal rank s.t. weights fit in a uint8_t:
1022//
1023//   import sys
1024//
1025//   def to_ordinal(ws):
1026//       weight_map = {}
1027//       weights_uniq_sorted = sorted(set(ws))
1028//       for i in range(0, len(weights_uniq_sorted)):
1029//           weight_map[weights_uniq_sorted[i]] = i
1030//       return [weight_map[x] for x in ws]
1031//
1032//   def print_weight_list(array_name, ws):
1033//       print("constexpr uint8_t %s[256] = {" % array_name, end = "")
1034//       i = 0
1035//       for w in ws:
1036//           if (i % 16) == 0:
1037//               print("\n  ", end = "")
1038//           print("%3d," % w, end = "")
1039//           i += 1
1040//       print("\n};\n")
1041//
1042//   if __name__ == "__main__":
1043//       l1s = []
1044//       l3s = []
1045//       for line in sys.stdin:
1046//           weights = line.split()
1047//           l1s.append(int(weights[0], 16))
1048//           l3s.append(int(weights[2], 16))
1049//       print_weight_list("kCollationWeightsL1", to_ordinal(l1s))
1050//       print_weight_list("kCollationWeightsL3", to_ordinal(l3s))
1051
1052// clang-format off
1053constexpr uint8_t kCollationWeightsL1[256] = {
1054    0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  0,  0,
1055    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
1056    6, 12, 16, 28, 38, 29, 27, 15, 17, 18, 24, 32,  9,  8, 14, 25,
1057   39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 11, 10, 33, 34, 35, 13,
1058   23, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
1059   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 19, 26, 20, 31,  7,
1060   30, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
1061   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 21, 36, 22, 37,  0,
1062};
1063constexpr uint8_t kCollationWeightsL3[256] = {
1064    0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  0,  0,
1065    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
1066    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
1067    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
1068    1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
1069    2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,
1070    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
1071    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,
1072};
1073constexpr int kCollationWeightsLength = arraysize(kCollationWeightsL1);
1074STATIC_ASSERT(kCollationWeightsLength == arraysize(kCollationWeightsL3));
1075// clang-format on
1076
1077// Normalize a comparison delta (usually `lhs - rhs`) to UCollationResult
1078// values.
1079constexpr UCollationResult ToUCollationResult(int delta) {
1080  return delta < 0 ? UCollationResult::UCOL_LESS
1081                   : (delta > 0 ? UCollationResult::UCOL_GREATER
1082                                : UCollationResult::UCOL_EQUAL);
1083}
1084
1085struct FastCompareStringsData {
1086  UCollationResult l1_result = UCollationResult::UCOL_EQUAL;
1087  UCollationResult l3_result = UCollationResult::UCOL_EQUAL;
1088  int processed_until = 0;
1089  int first_diff_at = 0;  // The first relevant diff (L1 if exists, else L3).
1090  bool has_diff = false;
1091
1092  base::Optional<UCollationResult> FastCompareFailed(
1093      int* processed_until_out) const {
1094    if (has_diff) {
1095      // Found some difference, continue there to ensure the generic algorithm
1096      // picks it up.
1097      *processed_until_out = first_diff_at;
1098    } else {
1099      // No difference found, reprocess the last processed character since it
1100      // may be followed by a unicode combining character (which alters it's
1101      // meaning).
1102      *processed_until_out = std::max(processed_until - 1, 0);
1103    }
1104    return {};
1105  }
1106};
1107
1108template <class CharT>
1109constexpr bool CanFastCompare(CharT c) {
1110  return c < kCollationWeightsLength && kCollationWeightsL1[c] != 0;
1111}
1112
1113template <class Char1T, class Char2T>
1114bool FastCompareFlatString(const Char1T* lhs, const Char2T* rhs, int length,
1115                           FastCompareStringsData* d) {
1116  for (int i = 0; i < length; i++) {
1117    const Char1T l = lhs[i];
1118    const Char2T r = rhs[i];
1119    if (!CanFastCompare(l) || !CanFastCompare(r)) {
1120      d->processed_until = i;
1121      return false;
1122    }
1123    UCollationResult l1_result =
1124        ToUCollationResult(kCollationWeightsL1[l] - kCollationWeightsL1[r]);
1125    if (l1_result != UCollationResult::UCOL_EQUAL) {
1126      d->has_diff = true;
1127      d->first_diff_at = i;
1128      d->processed_until = i;
1129      d->l1_result = l1_result;
1130      return true;
1131    }
1132    if (l != r && d->l3_result == UCollationResult::UCOL_EQUAL) {
1133      // Collapse the two-pass algorithm into one: if we find a difference in
1134      // L1 weights, that is our result. If not, use the first L3 weight
1135      // difference.
1136      UCollationResult l3_result =
1137          ToUCollationResult(kCollationWeightsL3[l] - kCollationWeightsL3[r]);
1138      d->l3_result = l3_result;
1139      if (!d->has_diff) {
1140        d->has_diff = true;
1141        d->first_diff_at = i;
1142      }
1143    }
1144  }
1145  d->processed_until = length;
1146  return true;
1147}
1148
1149bool FastCompareStringFlatContent(const String::FlatContent& lhs,
1150                                  const String::FlatContent& rhs, int length,
1151                                  FastCompareStringsData* d) {
1152  if (lhs.IsOneByte()) {
1153    base::Vector<const uint8_t> l = lhs.ToOneByteVector();
1154    if (rhs.IsOneByte()) {
1155      base::Vector<const uint8_t> r = rhs.ToOneByteVector();
1156      return FastCompareFlatString(l.data(), r.data(), length, d);
1157    } else {
1158      base::Vector<const uint16_t> r = rhs.ToUC16Vector();
1159      return FastCompareFlatString(l.data(), r.data(), length, d);
1160    }
1161  } else {
1162    base::Vector<const uint16_t> l = lhs.ToUC16Vector();
1163    if (rhs.IsOneByte()) {
1164      base::Vector<const uint8_t> r = rhs.ToOneByteVector();
1165      return FastCompareFlatString(l.data(), r.data(), length, d);
1166    } else {
1167      base::Vector<const uint16_t> r = rhs.ToUC16Vector();
1168      return FastCompareFlatString(l.data(), r.data(), length, d);
1169    }
1170  }
1171  UNREACHABLE();
1172}
1173
1174bool CharIsAsciiOrOutOfBounds(const String::FlatContent& string,
1175                              int string_length, int index) {
1176  DCHECK_EQ(string.length(), string_length);
1177  return index >= string_length || isascii(string.Get(index));
1178}
1179
1180bool CharCanFastCompareOrOutOfBounds(const String::FlatContent& string,
1181                                     int string_length, int index) {
1182  DCHECK_EQ(string.length(), string_length);
1183  return index >= string_length || CanFastCompare(string.Get(index));
1184}
1185
1186#ifdef DEBUG
1187bool USetContainsAllAsciiItem(USet* set) {
1188  static constexpr int kBufferSize = 64;
1189  UChar buffer[kBufferSize];
1190
1191  const int length = uset_getItemCount(set);
1192  for (int i = 0; i < length; i++) {
1193    UChar32 start, end;
1194    UErrorCode status = U_ZERO_ERROR;
1195    const int item_length =
1196        uset_getItem(set, i, &start, &end, buffer, kBufferSize, &status);
1197    CHECK(U_SUCCESS(status));
1198    DCHECK_GE(item_length, 0);
1199
1200    if (item_length == 0) {
1201      // Empty string or a range.
1202      if (isascii(start)) return true;
1203    } else {
1204      // A non-empty string.
1205      bool all_ascii = true;
1206      for (int j = 0; j < item_length; j++) {
1207        if (!isascii(buffer[j])) {
1208          all_ascii = false;
1209          break;
1210        }
1211      }
1212
1213      if (all_ascii) return true;
1214    }
1215  }
1216
1217  return false;
1218}
1219
1220bool CollatorAllowsFastComparison(const icu::Collator& icu_collator) {
1221  UErrorCode status = U_ZERO_ERROR;
1222
1223  icu::Locale icu_locale(icu_collator.getLocale(ULOC_VALID_LOCALE, status));
1224  DCHECK(U_SUCCESS(status));
1225
1226  static constexpr int kBufferSize = 64;
1227  char buffer[kBufferSize];
1228  const int collation_keyword_length =
1229      icu_locale.getKeywordValue("collation", buffer, kBufferSize, status);
1230  DCHECK(U_SUCCESS(status));
1231  if (collation_keyword_length != 0) return false;
1232
1233  // These attributes must be set to the expected value for fast comparisons.
1234  static constexpr struct {
1235    UColAttribute attribute;
1236    UColAttributeValue legal_value;
1237  } kAttributeChecks[] = {
1238      {UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE},
1239      {UCOL_CASE_FIRST, UCOL_OFF},
1240      {UCOL_CASE_LEVEL, UCOL_OFF},
1241      {UCOL_FRENCH_COLLATION, UCOL_OFF},
1242      {UCOL_NUMERIC_COLLATION, UCOL_OFF},
1243      {UCOL_STRENGTH, UCOL_TERTIARY},
1244  };
1245
1246  for (const auto& check : kAttributeChecks) {
1247    if (icu_collator.getAttribute(check.attribute, status) !=
1248        check.legal_value) {
1249      return false;
1250    }
1251    DCHECK(U_SUCCESS(status));
1252  }
1253
1254  // No reordering codes are allowed.
1255  int num_reorder_codes =
1256      ucol_getReorderCodes(icu_collator.toUCollator(), nullptr, 0, &status);
1257  if (num_reorder_codes != 0) return false;
1258  DCHECK(U_SUCCESS(status));  // Must check *after* num_reorder_codes != 0.
1259
1260  // No tailored rules are allowed.
1261  int32_t rules_length = 0;
1262  ucol_getRules(icu_collator.toUCollator(), &rules_length);
1263  if (rules_length != 0) return false;
1264
1265  USet* tailored_set = ucol_getTailoredSet(icu_collator.toUCollator(), &status);
1266  DCHECK(U_SUCCESS(status));
1267  if (USetContainsAllAsciiItem(tailored_set)) return false;
1268  uset_close(tailored_set);
1269
1270  // No ASCII contractions or expansions are allowed.
1271  USet* contractions = uset_openEmpty();
1272  USet* expansions = uset_openEmpty();
1273  ucol_getContractionsAndExpansions(icu_collator.toUCollator(), contractions,
1274                                    expansions, true, &status);
1275  if (USetContainsAllAsciiItem(contractions)) return false;
1276  if (USetContainsAllAsciiItem(expansions)) return false;
1277  DCHECK(U_SUCCESS(status));
1278  uset_close(contractions);
1279  uset_close(expansions);
1280
1281  return true;
1282}
1283#endif  // DEBUG
1284
1285// Fast comparison is implemented for charcodes for which the L1 collation
1286// weight (see kCollactionWeightsL1 above) is not 0.
1287//
1288// Note it's possible to partially process strings as long as their leading
1289// characters all satisfy the above criteria. In that case, and if the L3
1290// result is EQUAL, we set `processed_until_out` to the first non-processed
1291// index - future processing can begin at that offset.
1292//
1293// This fast path looks somewhat complex; mostly because it combines multiple
1294// passes into one. The pseudo-code for simplified multi-pass algorithm is:
1295//
1296// {
1297//   // We can only fast-compare a certain subset of the ASCII range.
1298//   // Additionally, unicode characters can change the meaning of preceding
1299//   // characters, for example: "o\u0308" is treated like "ö".
1300//   //
1301//   // Note, in the actual single-pass algorithm below, we tolerate non-ASCII
1302//   // contents outside the relevant range.
1303//   for (int i = 0; i < string1.length; i++) {
1304//     if (!CanFastCompare(string1[i])) return {};
1305//   }
1306//   for (int i = 0; i < string2.length; i++) {
1307//     if (!CanFastCompare(string2[i])) return {};
1308//   }
1309//
1310//   // Apply L1 weights.
1311//   for (int i = 0; i < common_length; i++) {
1312//     Char1T c1 = string1[i];
1313//     Char2T c2 = string2[i];
1314//     if (L1Weight[c1] != L1Weight[c2]) {
1315//       return L1Weight[c1] - L1Weight[c2];
1316//     }
1317//   }
1318//
1319//   // Strings are L1-equal up to the common length; if lengths differ, the
1320//   // longer string is treated as 'greater'.
1321//   if (string1.length != string2.length) string1.length - string2.length;
1322//
1323//   // Apply L3 weights.
1324//   for (int i = 0; i < common_length; i++) {
1325//     Char1T c1 = string1[i];
1326//     Char2T c2 = string2[i];
1327//     if (L3Weight[c1] != L3Weight[c2]) {
1328//       return L3Weight[c1] - L3Weight[c2];
1329//     }
1330//   }
1331//
1332//   return UCOL_EQUAL;
1333// }
1334base::Optional<UCollationResult> TryFastCompareStrings(
1335    Isolate* isolate, const icu::Collator& icu_collator, Handle<String> string1,
1336    Handle<String> string2, int* processed_until_out) {
1337  // TODO(jgruber): We could avoid the flattening (done by the caller) as well
1338  // by implementing comparison through string iteration. This has visible
1339  // performance benefits (e.g. 7% on CDJS) but complicates the code. Consider
1340  // doing this in the future.
1341  DCHECK(string1->IsFlat());
1342  DCHECK(string2->IsFlat());
1343
1344  *processed_until_out = 0;
1345
1346#ifdef DEBUG
1347  // Checked by the caller, see CompareStringsOptionsFor.
1348  SLOW_DCHECK(CollatorAllowsFastComparison(icu_collator));
1349  USE(CollatorAllowsFastComparison);
1350#endif  // DEBUG
1351
1352  DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*string1));
1353  DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*string2));
1354
1355  const int length1 = string1->length();
1356  const int length2 = string2->length();
1357  int common_length = std::min(length1, length2);
1358
1359  FastCompareStringsData d;
1360  DisallowGarbageCollection no_gc;
1361  const String::FlatContent& flat1 = string1->GetFlatContent(no_gc);
1362  const String::FlatContent& flat2 = string2->GetFlatContent(no_gc);
1363  if (!FastCompareStringFlatContent(flat1, flat2, common_length, &d)) {
1364    DCHECK_EQ(d.l1_result, UCollationResult::UCOL_EQUAL);
1365    return d.FastCompareFailed(processed_until_out);
1366  }
1367
1368  // The result is only valid if the last processed character is not followed
1369  // by a unicode combining character (we are overly strict and restrict to
1370  // ASCII).
1371  if (!CharIsAsciiOrOutOfBounds(flat1, length1, d.processed_until + 1) ||
1372      !CharIsAsciiOrOutOfBounds(flat2, length2, d.processed_until + 1)) {
1373    return d.FastCompareFailed(processed_until_out);
1374  }
1375
1376  if (d.l1_result != UCollationResult::UCOL_EQUAL) {
1377    return d.l1_result;
1378  }
1379
1380  // Strings are L1-equal up to their common length, length differences win.
1381  UCollationResult length_result = ToUCollationResult(length1 - length2);
1382  if (length_result != UCollationResult::UCOL_EQUAL) {
1383    // Strings of different lengths may still compare as equal if the longer
1384    // string has a fully ignored suffix, e.g. "a" vs. "a\u{1}".
1385    if (!CharCanFastCompareOrOutOfBounds(flat1, length1, common_length) ||
1386        !CharCanFastCompareOrOutOfBounds(flat2, length2, common_length)) {
1387      return d.FastCompareFailed(processed_until_out);
1388    }
1389    return length_result;
1390  }
1391
1392  // L1-equal and same length, the L3 result wins.
1393  return d.l3_result;
1394}
1395
1396}  // namespace
1397
1398// static
1399const uint8_t* Intl::AsciiCollationWeightsL1() {
1400  return &kCollationWeightsL1[0];
1401}
1402
1403// static
1404const uint8_t* Intl::AsciiCollationWeightsL3() {
1405  return &kCollationWeightsL3[0];
1406}
1407
1408// static
1409const int Intl::kAsciiCollationWeightsLength = kCollationWeightsLength;
1410
1411// ecma402/#sec-collator-comparestrings
1412int Intl::CompareStrings(Isolate* isolate, const icu::Collator& icu_collator,
1413                         Handle<String> string1, Handle<String> string2,
1414                         CompareStringsOptions compare_strings_options) {
1415  // Early return for identical strings.
1416  if (string1.is_identical_to(string2)) {
1417    return UCollationResult::UCOL_EQUAL;
1418  }
1419
1420  // Early return for empty strings.
1421  if (string1->length() == 0 || string2->length() == 0) {
1422    return ToUCollationResult(string1->length() - string2->length());
1423  }
1424
1425  string1 = String::Flatten(isolate, string1);
1426  string2 = String::Flatten(isolate, string2);
1427
1428  int processed_until = 0;
1429  if (compare_strings_options == CompareStringsOptions::kTryFastPath) {
1430    base::Optional<int> maybe_result = TryFastCompareStrings(
1431        isolate, icu_collator, string1, string2, &processed_until);
1432    if (maybe_result.has_value()) return maybe_result.value();
1433  }
1434
1435  UCollationResult result;
1436  UErrorCode status = U_ZERO_ERROR;
1437  icu::StringPiece string_piece1 =
1438      ToICUStringPiece(isolate, string1, processed_until);
1439  if (!string_piece1.empty()) {
1440    icu::StringPiece string_piece2 =
1441        ToICUStringPiece(isolate, string2, processed_until);
1442    if (!string_piece2.empty()) {
1443      result = icu_collator.compareUTF8(string_piece1, string_piece2, status);
1444      DCHECK(U_SUCCESS(status));
1445      return result;
1446    }
1447  }
1448
1449  icu::UnicodeString string_val1 =
1450      Intl::ToICUUnicodeString(isolate, string1, processed_until);
1451  icu::UnicodeString string_val2 =
1452      Intl::ToICUUnicodeString(isolate, string2, processed_until);
1453  result = icu_collator.compare(string_val1, string_val2, status);
1454  DCHECK(U_SUCCESS(status));
1455  return result;
1456}
1457
1458// ecma402/#sup-properties-of-the-number-prototype-object
1459MaybeHandle<String> Intl::NumberToLocaleString(Isolate* isolate,
1460                                               Handle<Object> num,
1461                                               Handle<Object> locales,
1462                                               Handle<Object> options,
1463                                               const char* method_name) {
1464  Handle<Object> numeric_obj;
1465  ASSIGN_RETURN_ON_EXCEPTION(isolate, numeric_obj,
1466                             Object::ToNumeric(isolate, num), String);
1467
1468  // We only cache the instance when locales is a string/undefined and
1469  // options is undefined, as that is the only case when the specified
1470  // side-effects of examining those arguments are unobservable.
1471  bool can_cache = (locales->IsString() || locales->IsUndefined(isolate)) &&
1472                   options->IsUndefined(isolate);
1473  if (can_cache) {
1474    icu::number::LocalizedNumberFormatter* cached_number_format =
1475        static_cast<icu::number::LocalizedNumberFormatter*>(
1476            isolate->get_cached_icu_object(
1477                Isolate::ICUObjectCacheType::kDefaultNumberFormat, locales));
1478    // We may use the cached icu::NumberFormat for a fast path.
1479    if (cached_number_format != nullptr) {
1480      return JSNumberFormat::FormatNumeric(isolate, *cached_number_format,
1481                                           numeric_obj);
1482    }
1483  }
1484
1485  Handle<JSFunction> constructor = Handle<JSFunction>(
1486      JSFunction::cast(
1487          isolate->context().native_context().intl_number_format_function()),
1488      isolate);
1489  Handle<JSNumberFormat> number_format;
1490  // 2. Let numberFormat be ? Construct(%NumberFormat%, « locales, options »).
1491  ASSIGN_RETURN_ON_EXCEPTION(
1492      isolate, number_format,
1493      New<JSNumberFormat>(isolate, constructor, locales, options, method_name),
1494      String);
1495
1496  if (can_cache) {
1497    isolate->set_icu_object_in_cache(
1498        Isolate::ICUObjectCacheType::kDefaultNumberFormat, locales,
1499        std::static_pointer_cast<icu::UMemory>(
1500            number_format->icu_number_formatter().get()));
1501  }
1502
1503  // Return FormatNumber(numberFormat, x).
1504  icu::number::LocalizedNumberFormatter* icu_number_format =
1505      number_format->icu_number_formatter().raw();
1506  return JSNumberFormat::FormatNumeric(isolate, *icu_number_format,
1507                                       numeric_obj);
1508}
1509
1510Maybe<Intl::NumberFormatDigitOptions> Intl::SetNumberFormatDigitOptions(
1511    Isolate* isolate, Handle<JSReceiver> options, int mnfd_default,
1512    int mxfd_default, bool notation_is_compact) {
1513  Factory* factory = isolate->factory();
1514  Intl::NumberFormatDigitOptions digit_options;
1515
1516  // 5. Let mnid be ? GetNumberOption(options, "minimumIntegerDigits,", 1, 21,
1517  // 1).
1518  int mnid = 1;
1519  if (!GetNumberOption(isolate, options, factory->minimumIntegerDigits_string(),
1520                       1, 21, 1)
1521           .To(&mnid)) {
1522    return Nothing<NumberFormatDigitOptions>();
1523  }
1524
1525  // 6. Let mnfd be ? Get(options, "minimumFractionDigits").
1526  Handle<Object> mnfd_obj;
1527  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1528      isolate, mnfd_obj,
1529      JSReceiver::GetProperty(isolate, options,
1530                              factory->minimumFractionDigits_string()),
1531      Nothing<NumberFormatDigitOptions>());
1532
1533  // 7. Let mxfd be ? Get(options, "maximumFractionDigits").
1534  Handle<Object> mxfd_obj;
1535  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1536      isolate, mxfd_obj,
1537      JSReceiver::GetProperty(isolate, options,
1538                              factory->maximumFractionDigits_string()),
1539      Nothing<NumberFormatDigitOptions>());
1540
1541  // 8.  Let mnsd be ? Get(options, "minimumSignificantDigits").
1542  Handle<Object> mnsd_obj;
1543  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1544      isolate, mnsd_obj,
1545      JSReceiver::GetProperty(isolate, options,
1546                              factory->minimumSignificantDigits_string()),
1547      Nothing<NumberFormatDigitOptions>());
1548
1549  // 9. Let mxsd be ? Get(options, "maximumSignificantDigits").
1550  Handle<Object> mxsd_obj;
1551  ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1552      isolate, mxsd_obj,
1553      JSReceiver::GetProperty(isolate, options,
1554                              factory->maximumSignificantDigits_string()),
1555      Nothing<NumberFormatDigitOptions>());
1556
1557  digit_options.rounding_priority = RoundingPriority::kAuto;
1558  digit_options.minimum_significant_digits = 0;
1559  digit_options.maximum_significant_digits = 0;
1560
1561  // 10. Set intlObj.[[MinimumIntegerDigits]] to mnid.
1562  digit_options.minimum_integer_digits = mnid;
1563
1564  if (FLAG_harmony_intl_number_format_v3) {
1565    // 11. Let roundingPriority be ? GetOption(options, "roundingPriority",
1566    // "string", « "auto", "morePrecision", "lessPrecision" », "auto").
1567
1568    Maybe<RoundingPriority> maybe_rounding_priority =
1569        GetStringOption<RoundingPriority>(
1570            isolate, options, "roundingPriority", "SetNumberFormatDigitOptions",
1571            {"auto", "morePrecision", "lessPrecision"},
1572            {RoundingPriority::kAuto, RoundingPriority::kMorePrecision,
1573             RoundingPriority::kLessPrecision},
1574            RoundingPriority::kAuto);
1575    MAYBE_RETURN(maybe_rounding_priority, Nothing<NumberFormatDigitOptions>());
1576    digit_options.rounding_priority = maybe_rounding_priority.FromJust();
1577  }
1578
1579  // 12. If mnsd is not undefined or mxsd is not undefined, then
1580  // a. Set hasSd to true.
1581  // 13. Else,
1582  // a. Set hasSd to false.
1583  bool has_sd =
1584      (!mnsd_obj->IsUndefined(isolate)) || (!mxsd_obj->IsUndefined(isolate));
1585
1586  // 14. If mnfd is not undefined or mxfd is not undefined, then
1587  // a. Set hasFd to true.
1588  // 15. Else,
1589  // a. Set hasFd to false.
1590  bool has_fd =
1591      (!mnfd_obj->IsUndefined(isolate)) || (!mxfd_obj->IsUndefined(isolate));
1592
1593  // 17. If hasSd or roundingPriority is not "auto", set needSd to true; else,
1594  // set needSd to false.
1595  bool need_sd =
1596      has_sd || (RoundingPriority::kAuto != digit_options.rounding_priority);
1597
1598  // 18. If ( not hasSd and (hasFd or notation is not "compact") ) or
1599  // roundingPriority is not "auto", then a. Set needFd to true.
1600  // 19. Else,
1601  // a. Set needFd to false.
1602  bool need_fd = ((!has_sd) && (has_fd || !notation_is_compact)) ||
1603                 (RoundingPriority::kAuto != digit_options.rounding_priority);
1604
1605  // 20. If needSd, then
1606  if (need_sd) {
1607    // 20.b If hasSd, then
1608    if (has_sd) {
1609      // 20.b.i Let mnsd be ? DefaultNumberOption(mnsd, 1, 21, 1).
1610      int mnsd;
1611      if (!DefaultNumberOption(isolate, mnsd_obj, 1, 21, 1,
1612                               factory->minimumSignificantDigits_string())
1613               .To(&mnsd)) {
1614        return Nothing<NumberFormatDigitOptions>();
1615      }
1616      // 20.b.ii Let mxsd be ? DefaultNumberOption(mxsd, mnsd, 21, 21).
1617      int mxsd;
1618      if (!DefaultNumberOption(isolate, mxsd_obj, mnsd, 21, 21,
1619                               factory->maximumSignificantDigits_string())
1620               .To(&mxsd)) {
1621        return Nothing<NumberFormatDigitOptions>();
1622      }
1623      // 20.b.iii Set intlObj.[[MinimumSignificantDigits]] to mnsd.
1624      digit_options.minimum_significant_digits = mnsd;
1625      // 20.b.iv Set intlObj.[[MaximumSignificantDigits]] to mxsd.
1626      digit_options.maximum_significant_digits = mxsd;
1627    } else {
1628      // 20.c Else
1629      // 20.c.i Set intlObj.[[MinimumSignificantDigits]] to 1.
1630      digit_options.minimum_significant_digits = 1;
1631      // 20.c.ii Set intlObj.[[MaximumSignificantDigits]] to 21.
1632      digit_options.maximum_significant_digits = 21;
1633    }
1634  }
1635
1636  // 21. If needFd, then
1637  if (need_fd) {
1638    // 21.a If hasFd, then
1639    if (has_fd) {
1640      Handle<String> mnfd_str = factory->minimumFractionDigits_string();
1641      Handle<String> mxfd_str = factory->maximumFractionDigits_string();
1642      // 21.a.i Let mnfd be ? DefaultNumberOption(mnfd, 0, 20, undefined).
1643      int mnfd;
1644      if (!DefaultNumberOption(isolate, mnfd_obj, 0, 20, -1, mnfd_str)
1645               .To(&mnfd)) {
1646        return Nothing<NumberFormatDigitOptions>();
1647      }
1648      // 21.a.ii Let mxfd be ? DefaultNumberOption(mxfd, 0, 20, undefined).
1649      int mxfd;
1650      if (!DefaultNumberOption(isolate, mxfd_obj, 0, 20, -1, mxfd_str)
1651               .To(&mxfd)) {
1652        return Nothing<NumberFormatDigitOptions>();
1653      }
1654      // 21.a.iii If mnfd is undefined, set mnfd to min(mnfdDefault, mxfd).
1655      if (mnfd_obj->IsUndefined(isolate)) {
1656        mnfd = std::min(mnfd_default, mxfd);
1657      } else if (mxfd_obj->IsUndefined(isolate)) {
1658        // 21.a.iv Else if mxfd is undefined, set mxfd to max(mxfdDefault,
1659        // mnfd).
1660        mxfd = std::max(mxfd_default, mnfd);
1661      } else if (mnfd > mxfd) {
1662        // 21.a.v Else if mnfd is greater than mxfd, throw a RangeError
1663        // exception.
1664        THROW_NEW_ERROR_RETURN_VALUE(
1665            isolate,
1666            NewRangeError(MessageTemplate::kPropertyValueOutOfRange, mxfd_str),
1667            Nothing<NumberFormatDigitOptions>());
1668      }
1669      // 21.a.vi Set intlObj.[[MinimumFractionDigits]] to mnfd.
1670      digit_options.minimum_fraction_digits = mnfd;
1671      // 21.a.vii Set intlObj.[[MaximumFractionDigits]] to mxfd.
1672      digit_options.maximum_fraction_digits = mxfd;
1673    } else {  // 17.b Else
1674      // 21.b.i Set intlObj.[[MinimumFractionDigits]] to mnfdDefault.
1675      digit_options.minimum_fraction_digits = mnfd_default;
1676      // 21.b.ii Set intlObj.[[MaximumFractionDigits]] to mxfdDefault.
1677      digit_options.maximum_fraction_digits = mxfd_default;
1678    }
1679  }
1680
1681  // 22. If needSd or needFd, then
1682  if (need_sd || need_fd) {
1683    // a. If roundingPriority is "morePrecision", then
1684    if (digit_options.rounding_priority == RoundingPriority::kMorePrecision) {
1685      // i. Set intlObj.[[RoundingType]] to morePrecision.
1686      digit_options.rounding_type = RoundingType::kMorePrecision;
1687      // b. Else if roundingPriority is "lessPrecision", then
1688    } else if (digit_options.rounding_priority ==
1689               RoundingPriority::kLessPrecision) {
1690      // i. Set intlObj.[[RoundingType]] to lessPrecision.
1691      digit_options.rounding_type = RoundingType::kLessPrecision;
1692      // c. Else if hasSd, then
1693    } else if (has_sd) {
1694      // i. Set intlObj.[[RoundingType]] to significantDigits.
1695      digit_options.rounding_type = RoundingType::kSignificantDigits;
1696      // d. Else,
1697    } else {
1698      // i.Set intlObj.[[RoundingType]] to fractionDigits.
1699      digit_options.rounding_type = RoundingType::kFractionDigits;
1700    }
1701    // 23. Else
1702  } else {
1703    // a. Set intlObj.[[RoundingType]] to morePrecision.
1704    digit_options.rounding_type = RoundingType::kMorePrecision;
1705    // b. Set intlObj.[[MinimumFractionDigits]] to 0.
1706    digit_options.minimum_fraction_digits = 0;
1707    // c. Set intlObj.[[MaximumFractionDigits]] to 0.
1708    digit_options.maximum_fraction_digits = 0;
1709    // d. Set intlObj.[[MinimumSignificantDigits]] to 1.
1710    digit_options.minimum_significant_digits = 1;
1711    // e. Set intlObj.[[MaximumSignificantDigits]] to 2.
1712    digit_options.maximum_significant_digits = 2;
1713  }
1714  return Just(digit_options);
1715}
1716
1717namespace {
1718
1719// ecma402/#sec-bestavailablelocale
1720std::string BestAvailableLocale(const std::set<std::string>& available_locales,
1721                                const std::string& locale) {
1722  // 1. Let candidate be locale.
1723  std::string candidate = locale;
1724
1725  // 2. Repeat,
1726  while (true) {
1727    // 2.a. If availableLocales contains an element equal to candidate, return
1728    //      candidate.
1729    if (available_locales.find(candidate) != available_locales.end()) {
1730      return candidate;
1731    }
1732
1733    // 2.b. Let pos be the character index of the last occurrence of "-"
1734    //      (U+002D) within candidate. If that character does not occur, return
1735    //      undefined.
1736    size_t pos = candidate.rfind('-');
1737    if (pos == std::string::npos) {
1738      return std::string();
1739    }
1740
1741    // 2.c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate,
1742    //      decrease pos by 2.
1743    if (pos >= 2 && candidate[pos - 2] == '-') {
1744      pos -= 2;
1745    }
1746
1747    // 2.d. Let candidate be the substring of candidate from position 0,
1748    //      inclusive, to position pos, exclusive.
1749    candidate = candidate.substr(0, pos);
1750  }
1751}
1752
1753struct ParsedLocale {
1754  std::string no_extensions_locale;
1755  std::string extension;
1756};
1757
1758// Returns a struct containing a bcp47 tag without unicode extensions
1759// and the removed unicode extensions.
1760//
1761// For example, given 'en-US-u-co-emoji' returns 'en-US' and
1762// 'u-co-emoji'.
1763ParsedLocale ParseBCP47Locale(const std::string& locale) {
1764  size_t length = locale.length();
1765  ParsedLocale parsed_locale;
1766
1767  // Privateuse or grandfathered locales have no extension sequences.
1768  if ((length > 1) && (locale[1] == '-')) {
1769    // Check to make sure that this really is a grandfathered or
1770    // privateuse extension. ICU can sometimes mess up the
1771    // canonicalization.
1772    DCHECK(locale[0] == 'x' || locale[0] == 'i');
1773    parsed_locale.no_extensions_locale = locale;
1774    return parsed_locale;
1775  }
1776
1777  size_t unicode_extension_start = locale.find("-u-");
1778
1779  // No unicode extensions found.
1780  if (unicode_extension_start == std::string::npos) {
1781    parsed_locale.no_extensions_locale = locale;
1782    return parsed_locale;
1783  }
1784
1785  size_t private_extension_start = locale.find("-x-");
1786
1787  // Unicode extensions found within privateuse subtags don't count.
1788  if (private_extension_start != std::string::npos &&
1789      private_extension_start < unicode_extension_start) {
1790    parsed_locale.no_extensions_locale = locale;
1791    return parsed_locale;
1792  }
1793
1794  const std::string beginning = locale.substr(0, unicode_extension_start);
1795  size_t unicode_extension_end = length;
1796  DCHECK_GT(length, 2);
1797
1798  // Find the end of the extension production as per the bcp47 grammar
1799  // by looking for '-' followed by 2 chars and then another '-'.
1800  for (size_t i = unicode_extension_start + 1; i < length - 2; i++) {
1801    if (locale[i] != '-') continue;
1802
1803    if (locale[i + 2] == '-') {
1804      unicode_extension_end = i;
1805      break;
1806    }
1807
1808    i += 2;
1809  }
1810
1811  const std::string end = locale.substr(unicode_extension_end);
1812  parsed_locale.no_extensions_locale = beginning + end;
1813  parsed_locale.extension = locale.substr(
1814      unicode_extension_start, unicode_extension_end - unicode_extension_start);
1815  return parsed_locale;
1816}
1817
1818// ecma402/#sec-lookupsupportedlocales
1819std::vector<std::string> LookupSupportedLocales(
1820    const std::set<std::string>& available_locales,
1821    const std::vector<std::string>& requested_locales) {
1822  // 1. Let subset be a new empty List.
1823  std::vector<std::string> subset;
1824
1825  // 2. For each element locale of requestedLocales in List order, do
1826  for (const std::string& locale : requested_locales) {
1827    // 2. a. Let noExtensionsLocale be the String value that is locale
1828    //       with all Unicode locale extension sequences removed.
1829    std::string no_extension_locale =
1830        ParseBCP47Locale(locale).no_extensions_locale;
1831
1832    // 2. b. Let availableLocale be
1833    //       BestAvailableLocale(availableLocales, noExtensionsLocale).
1834    std::string available_locale =
1835        BestAvailableLocale(available_locales, no_extension_locale);
1836
1837    // 2. c. If availableLocale is not undefined, append locale to the
1838    //       end of subset.
1839    if (!available_locale.empty()) {
1840      subset.push_back(locale);
1841    }
1842  }
1843
1844  // 3. Return subset.
1845  return subset;
1846}
1847
1848icu::LocaleMatcher BuildLocaleMatcher(
1849    Isolate* isolate, const std::set<std::string>& available_locales,
1850    UErrorCode* status) {
1851  icu::Locale default_locale =
1852      icu::Locale::forLanguageTag(isolate->DefaultLocale(), *status);
1853  icu::LocaleMatcher::Builder builder;
1854  if (U_FAILURE(*status)) {
1855    return builder.build(*status);
1856  }
1857  builder.setDefaultLocale(&default_locale);
1858  for (auto it = available_locales.begin(); it != available_locales.end();
1859       ++it) {
1860    *status = U_ZERO_ERROR;
1861    icu::Locale l = icu::Locale::forLanguageTag(it->c_str(), *status);
1862    // skip invalid locale such as no-NO-NY
1863    if (U_SUCCESS(*status)) {
1864      builder.addSupportedLocale(l);
1865    }
1866  }
1867  return builder.build(*status);
1868}
1869
1870class Iterator : public icu::Locale::Iterator {
1871 public:
1872  Iterator(std::vector<std::string>::const_iterator begin,
1873           std::vector<std::string>::const_iterator end)
1874      : iter_(begin), end_(end) {}
1875  ~Iterator() override = default;
1876
1877  UBool hasNext() const override { return iter_ != end_; }
1878
1879  const icu::Locale& next() override {
1880    UErrorCode status = U_ZERO_ERROR;
1881    locale_ = icu::Locale::forLanguageTag(iter_->c_str(), status);
1882    DCHECK(U_SUCCESS(status));
1883    ++iter_;
1884    return locale_;
1885  }
1886
1887 private:
1888  std::vector<std::string>::const_iterator iter_;
1889  std::vector<std::string>::const_iterator end_;
1890  icu::Locale locale_;
1891};
1892
1893// ecma402/#sec-bestfitmatcher
1894// The BestFitMatcher abstract operation compares requestedLocales, which must
1895// be a List as returned by CanonicalizeLocaleList, against the locales in
1896// availableLocales and determines the best available language to meet the
1897// request. The algorithm is implementation dependent, but should produce
1898// results that a typical user of the requested locales would perceive
1899// as at least as good as those produced by the LookupMatcher abstract
1900// operation. Options specified through Unicode locale extension sequences must
1901// be ignored by the algorithm. Information about such subsequences is returned
1902// separately. The abstract operation returns a record with a [[locale]] field,
1903// whose value is the language tag of the selected locale, which must be an
1904// element of availableLocales. If the language tag of the request locale that
1905// led to the selected locale contained a Unicode locale extension sequence,
1906// then the returned record also contains an [[extension]] field whose value is
1907// the first Unicode locale extension sequence within the request locale
1908// language tag.
1909std::string BestFitMatcher(Isolate* isolate,
1910                           const std::set<std::string>& available_locales,
1911                           const std::vector<std::string>& requested_locales) {
1912  UErrorCode status = U_ZERO_ERROR;
1913  Iterator iter(requested_locales.cbegin(), requested_locales.cend());
1914  std::string bestfit = BuildLocaleMatcher(isolate, available_locales, &status)
1915                            .getBestMatchResult(iter, status)
1916                            .makeResolvedLocale(status)
1917                            .toLanguageTag<std::string>(status);
1918  DCHECK(U_SUCCESS(status));
1919  return bestfit;
1920}
1921
1922// ECMA 402 9.2.8 BestFitSupportedLocales(availableLocales, requestedLocales)
1923// https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
1924std::vector<std::string> BestFitSupportedLocales(
1925    Isolate* isolate, const std::set<std::string>& available_locales,
1926    const std::vector<std::string>& requested_locales) {
1927  UErrorCode status = U_ZERO_ERROR;
1928  icu::LocaleMatcher matcher =
1929      BuildLocaleMatcher(isolate, available_locales, &status);
1930  std::vector<std::string> result;
1931  if (U_SUCCESS(status)) {
1932    for (auto it = requested_locales.cbegin(); it != requested_locales.cend();
1933         it++) {
1934      status = U_ZERO_ERROR;
1935      icu::Locale desired = icu::Locale::forLanguageTag(it->c_str(), status);
1936      icu::LocaleMatcher::Result matched =
1937          matcher.getBestMatchResult(desired, status);
1938      if (U_FAILURE(status)) continue;
1939      if (matched.getSupportedIndex() < 0) continue;
1940
1941      // The BestFitSupportedLocales abstract operation returns the *SUBSET* of
1942      // the provided BCP 47 language priority list requestedLocales for which
1943      // availableLocales has a matching locale when using the Best Fit Matcher
1944      // algorithm. Locales appear in the same order in the returned list as in
1945      // requestedLocales. The steps taken are implementation dependent.
1946      std::string bestfit = desired.toLanguageTag<std::string>(status);
1947      if (U_FAILURE(status)) continue;
1948      result.push_back(bestfit);
1949    }
1950  }
1951  return result;
1952}
1953
1954// ecma262 #sec-createarrayfromlist
1955MaybeHandle<JSArray> CreateArrayFromList(Isolate* isolate,
1956                                         std::vector<std::string> elements,
1957                                         PropertyAttributes attr) {
1958  Factory* factory = isolate->factory();
1959  // Let array be ! ArrayCreate(0).
1960  Handle<JSArray> array = factory->NewJSArray(0);
1961
1962  uint32_t length = static_cast<uint32_t>(elements.size());
1963  // 3. Let n be 0.
1964  // 4. For each element e of elements, do
1965  for (uint32_t i = 0; i < length; i++) {
1966    // a. Let status be CreateDataProperty(array, ! ToString(n), e).
1967    const std::string& part = elements[i];
1968    Handle<String> value =
1969        factory->NewStringFromUtf8(base::CStrVector(part.c_str()))
1970            .ToHandleChecked();
1971    MAYBE_RETURN(JSObject::AddDataElement(array, i, value, attr),
1972                 MaybeHandle<JSArray>());
1973  }
1974  // 5. Return array.
1975  return MaybeHandle<JSArray>(array);
1976}
1977
1978// ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options)
1979// https://tc39.github.io/ecma402/#sec-supportedlocales
1980MaybeHandle<JSObject> SupportedLocales(
1981    Isolate* isolate, const char* method_name,
1982    const std::set<std::string>& available_locales,
1983    const std::vector<std::string>& requested_locales, Handle<Object> options) {
1984  std::vector<std::string> supported_locales;
1985
1986  // 1. Set options to ? CoerceOptionsToObject(options).
1987  Handle<JSReceiver> options_obj;
1988  ASSIGN_RETURN_ON_EXCEPTION(
1989      isolate, options_obj,
1990      CoerceOptionsToObject(isolate, options, method_name), JSObject);
1991
1992  // 2. Let matcher be ? GetOption(options, "localeMatcher", "string",
1993  //       « "lookup", "best fit" », "best fit").
1994  Maybe<Intl::MatcherOption> maybe_locale_matcher =
1995      Intl::GetLocaleMatcher(isolate, options_obj, method_name);
1996  MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSObject>());
1997  Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
1998
1999  // 3. If matcher is "best fit", then
2000  //    a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
2001  //       requestedLocales).
2002  if (matcher == Intl::MatcherOption::kBestFit &&
2003      FLAG_harmony_intl_best_fit_matcher) {
2004    supported_locales =
2005        BestFitSupportedLocales(isolate, available_locales, requested_locales);
2006  } else {
2007    // 4. Else,
2008    //    a. Let supportedLocales be LookupSupportedLocales(availableLocales,
2009    //       requestedLocales).
2010    supported_locales =
2011        LookupSupportedLocales(available_locales, requested_locales);
2012  }
2013
2014  // 5. Return CreateArrayFromList(supportedLocales).
2015  return CreateArrayFromList(isolate, supported_locales,
2016                             PropertyAttributes::NONE);
2017}
2018
2019}  // namespace
2020
2021// ecma-402 #sec-intl.getcanonicallocales
2022MaybeHandle<JSArray> Intl::GetCanonicalLocales(Isolate* isolate,
2023                                               Handle<Object> locales) {
2024  // 1. Let ll be ? CanonicalizeLocaleList(locales).
2025  Maybe<std::vector<std::string>> maybe_ll =
2026      CanonicalizeLocaleList(isolate, locales, false);
2027  MAYBE_RETURN(maybe_ll, MaybeHandle<JSArray>());
2028
2029  // 2. Return CreateArrayFromList(ll).
2030  return CreateArrayFromList(isolate, maybe_ll.FromJust(),
2031                             PropertyAttributes::NONE);
2032}
2033
2034namespace {
2035
2036MaybeHandle<JSArray> AvailableCollations(Isolate* isolate) {
2037  UErrorCode status = U_ZERO_ERROR;
2038  std::unique_ptr<icu::StringEnumeration> enumeration(
2039      icu::Collator::getKeywordValues("collation", status));
2040  if (U_FAILURE(status)) {
2041    THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
2042                    JSArray);
2043  }
2044  return Intl::ToJSArray(isolate, "co", enumeration.get(),
2045                         Intl::RemoveCollation, true);
2046}
2047
2048MaybeHandle<JSArray> VectorToJSArray(Isolate* isolate,
2049                                     const std::vector<std::string>& array) {
2050  Factory* factory = isolate->factory();
2051  Handle<FixedArray> fixed_array =
2052      factory->NewFixedArray(static_cast<int32_t>(array.size()));
2053  int32_t index = 0;
2054  for (std::string item : array) {
2055    Handle<String> str = factory->NewStringFromAsciiChecked(item.c_str());
2056    fixed_array->set(index++, *str);
2057  }
2058  return factory->NewJSArrayWithElements(fixed_array);
2059}
2060
2061namespace {
2062
2063class ResourceAvailableCurrencies {
2064 public:
2065  ResourceAvailableCurrencies() {
2066    UErrorCode status = U_ZERO_ERROR;
2067    UEnumeration* uenum =
2068        ucurr_openISOCurrencies(UCURR_COMMON | UCURR_NON_DEPRECATED, &status);
2069    DCHECK(U_SUCCESS(status));
2070    const char* next = nullptr;
2071    while (U_SUCCESS(status) &&
2072           (next = uenum_next(uenum, nullptr, &status)) != nullptr) {
2073      // Work around the issue that we do not support VEF currency code
2074      // in DisplayNames by not reporting it.
2075      if (strcmp(next, "VEF") == 0) continue;
2076      AddIfAvailable(next);
2077    }
2078    // Work around the issue that we do support the following currency codes
2079    // in DisplayNames but the ICU API is not reporting it.
2080    AddIfAvailable("SVC");
2081    AddIfAvailable("XDR");
2082    AddIfAvailable("XSU");
2083    AddIfAvailable("ZWL");
2084    std::sort(list_.begin(), list_.end());
2085    uenum_close(uenum);
2086  }
2087
2088  const std::vector<std::string>& Get() const { return list_; }
2089
2090  void AddIfAvailable(const char* currency) {
2091    icu::UnicodeString code(currency, -1, US_INV);
2092    UErrorCode status = U_ZERO_ERROR;
2093    int32_t len = 0;
2094    const UChar* result =
2095        ucurr_getName(code.getTerminatedBuffer(), "en", UCURR_LONG_NAME,
2096                      nullptr, &len, &status);
2097    if (U_SUCCESS(status) &&
2098        u_strcmp(result, code.getTerminatedBuffer()) != 0) {
2099      list_.push_back(currency);
2100    }
2101  }
2102
2103 private:
2104  std::vector<std::string> list_;
2105};
2106
2107const std::vector<std::string>& GetAvailableCurrencies() {
2108  static base::LazyInstance<ResourceAvailableCurrencies>::type
2109      available_currencies = LAZY_INSTANCE_INITIALIZER;
2110  return available_currencies.Pointer()->Get();
2111}
2112}  // namespace
2113
2114MaybeHandle<JSArray> AvailableCurrencies(Isolate* isolate) {
2115  return VectorToJSArray(isolate, GetAvailableCurrencies());
2116}
2117
2118MaybeHandle<JSArray> AvailableNumberingSystems(Isolate* isolate) {
2119  UErrorCode status = U_ZERO_ERROR;
2120  std::unique_ptr<icu::StringEnumeration> enumeration(
2121      icu::NumberingSystem::getAvailableNames(status));
2122  if (U_FAILURE(status)) {
2123    THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
2124                    JSArray);
2125  }
2126  // Need to filter out isAlgorithmic
2127  return Intl::ToJSArray(
2128      isolate, "nu", enumeration.get(),
2129      [](const char* value) {
2130        UErrorCode status = U_ZERO_ERROR;
2131        std::unique_ptr<icu::NumberingSystem> numbering_system(
2132            icu::NumberingSystem::createInstanceByName(value, status));
2133        // Skip algorithmic one since chrome filter out the resource.
2134        return U_FAILURE(status) || numbering_system->isAlgorithmic();
2135      },
2136      true);
2137}
2138
2139MaybeHandle<JSArray> AvailableTimeZones(Isolate* isolate) {
2140  UErrorCode status = U_ZERO_ERROR;
2141  std::unique_ptr<icu::StringEnumeration> enumeration(
2142      icu::TimeZone::createTimeZoneIDEnumeration(
2143          UCAL_ZONE_TYPE_CANONICAL_LOCATION, nullptr, nullptr, status));
2144  if (U_FAILURE(status)) {
2145    THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
2146                    JSArray);
2147  }
2148  return Intl::ToJSArray(isolate, nullptr, enumeration.get(), nullptr, true);
2149}
2150
2151MaybeHandle<JSArray> AvailableUnits(Isolate* isolate) {
2152  Factory* factory = isolate->factory();
2153  std::set<std::string> sanctioned(Intl::SanctionedSimpleUnits());
2154  Handle<FixedArray> fixed_array =
2155      factory->NewFixedArray(static_cast<int32_t>(sanctioned.size()));
2156  int32_t index = 0;
2157  for (std::string item : sanctioned) {
2158    Handle<String> str = factory->NewStringFromAsciiChecked(item.c_str());
2159    fixed_array->set(index++, *str);
2160  }
2161  return factory->NewJSArrayWithElements(fixed_array);
2162}
2163
2164}  // namespace
2165
2166// ecma-402 #sec-intl.supportedvaluesof
2167MaybeHandle<JSArray> Intl::SupportedValuesOf(Isolate* isolate,
2168                                             Handle<Object> key_obj) {
2169  Factory* factory = isolate->factory();
2170  // 1. 1. Let key be ? ToString(key).
2171  Handle<String> key_str;
2172  ASSIGN_RETURN_ON_EXCEPTION(isolate, key_str,
2173                             Object::ToString(isolate, key_obj), JSArray);
2174  // 2. If key is "calendar", then
2175  if (factory->calendar_string()->Equals(*key_str)) {
2176    // a. Let list be ! AvailableCalendars( ).
2177    return Intl::AvailableCalendars(isolate);
2178  }
2179  // 3. Else if key is "collation", then
2180  if (factory->collation_string()->Equals(*key_str)) {
2181    // a. Let list be ! AvailableCollations( ).
2182    return AvailableCollations(isolate);
2183  }
2184  // 4. Else if key is "currency", then
2185  if (factory->currency_string()->Equals(*key_str)) {
2186    // a. Let list be ! AvailableCurrencies( ).
2187    return AvailableCurrencies(isolate);
2188  }
2189  // 5. Else if key is "numberingSystem", then
2190  if (factory->numberingSystem_string()->Equals(*key_str)) {
2191    // a. Let list be ! AvailableNumberingSystems( ).
2192    return AvailableNumberingSystems(isolate);
2193  }
2194  // 6. Else if key is "timeZone", then
2195  if (factory->timeZone_string()->Equals(*key_str)) {
2196    // a. Let list be ! AvailableTimeZones( ).
2197    return AvailableTimeZones(isolate);
2198  }
2199  // 7. Else if key is "unit", then
2200  if (factory->unit_string()->Equals(*key_str)) {
2201    // a. Let list be ! AvailableUnits( ).
2202    return AvailableUnits(isolate);
2203  }
2204  // 8. Else,
2205  // a. Throw a RangeError exception.
2206  // 9. Return ! CreateArrayFromList( list ).
2207
2208  THROW_NEW_ERROR(
2209      isolate,
2210      NewRangeError(MessageTemplate::kInvalid,
2211                    factory->NewStringFromStaticChars("key"), key_str),
2212      JSArray);
2213}
2214
2215// ECMA 402 Intl.*.supportedLocalesOf
2216MaybeHandle<JSObject> Intl::SupportedLocalesOf(
2217    Isolate* isolate, const char* method_name,
2218    const std::set<std::string>& available_locales, Handle<Object> locales,
2219    Handle<Object> options) {
2220  // Let availableLocales be %Collator%.[[AvailableLocales]].
2221
2222  // Let requestedLocales be ? CanonicalizeLocaleList(locales).
2223  Maybe<std::vector<std::string>> requested_locales =
2224      CanonicalizeLocaleList(isolate, locales, false);
2225  MAYBE_RETURN(requested_locales, MaybeHandle<JSObject>());
2226
2227  // Return ? SupportedLocales(availableLocales, requestedLocales, options).
2228  return SupportedLocales(isolate, method_name, available_locales,
2229                          requested_locales.FromJust(), options);
2230}
2231
2232namespace {
2233
2234template <typename T>
2235bool IsValidExtension(const icu::Locale& locale, const char* key,
2236                      const std::string& value) {
2237  const char* legacy_type = uloc_toLegacyType(key, value.c_str());
2238  if (legacy_type == nullptr) {
2239    return false;
2240  }
2241  UErrorCode status = U_ZERO_ERROR;
2242  std::unique_ptr<icu::StringEnumeration> enumeration(
2243      T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()),
2244                                   false, status));
2245  if (U_FAILURE(status)) {
2246    return false;
2247  }
2248  int32_t length;
2249  for (const char* item = enumeration->next(&length, status);
2250       U_SUCCESS(status) && item != nullptr;
2251       item = enumeration->next(&length, status)) {
2252    if (strcmp(legacy_type, item) == 0) {
2253      return true;
2254    }
2255  }
2256  return false;
2257}
2258
2259}  // namespace
2260
2261bool Intl::IsValidCollation(const icu::Locale& locale,
2262                            const std::string& value) {
2263  std::set<std::string> invalid_values = {"standard", "search"};
2264  if (invalid_values.find(value) != invalid_values.end()) return false;
2265  return IsValidExtension<icu::Collator>(locale, "collation", value);
2266}
2267
2268bool Intl::IsWellFormedCalendar(const std::string& value) {
2269  return JSLocale::Is38AlphaNumList(value);
2270}
2271
2272// ecma402/#sec-iswellformedcurrencycode
2273bool Intl::IsWellFormedCurrency(const std::string& currency) {
2274  return JSLocale::Is3Alpha(currency);
2275}
2276
2277bool Intl::IsValidCalendar(const icu::Locale& locale,
2278                           const std::string& value) {
2279  return IsValidExtension<icu::Calendar>(locale, "calendar", value);
2280}
2281
2282bool Intl::IsValidNumberingSystem(const std::string& value) {
2283  std::set<std::string> invalid_values = {"native", "traditio", "finance"};
2284  if (invalid_values.find(value) != invalid_values.end()) return false;
2285  UErrorCode status = U_ZERO_ERROR;
2286  std::unique_ptr<icu::NumberingSystem> numbering_system(
2287      icu::NumberingSystem::createInstanceByName(value.c_str(), status));
2288  return U_SUCCESS(status) && numbering_system.get() != nullptr &&
2289         !numbering_system->isAlgorithmic();
2290}
2291
2292namespace {
2293
2294bool IsWellFormedNumberingSystem(const std::string& value) {
2295  return JSLocale::Is38AlphaNumList(value);
2296}
2297
2298std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
2299    icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) {
2300  std::map<std::string, std::string> extensions;
2301
2302  UErrorCode status = U_ZERO_ERROR;
2303  icu::LocaleBuilder builder;
2304  builder.setLocale(*icu_locale).clearExtensions();
2305  std::unique_ptr<icu::StringEnumeration> keywords(
2306      icu_locale->createKeywords(status));
2307  if (U_FAILURE(status)) return extensions;
2308
2309  if (!keywords) return extensions;
2310  char value[ULOC_FULLNAME_CAPACITY];
2311
2312  int32_t length;
2313  status = U_ZERO_ERROR;
2314  for (const char* keyword = keywords->next(&length, status);
2315       keyword != nullptr; keyword = keywords->next(&length, status)) {
2316    // Ignore failures in ICU and skip to the next keyword.
2317    //
2318    // This is fine.™
2319    if (U_FAILURE(status)) {
2320      status = U_ZERO_ERROR;
2321      continue;
2322    }
2323
2324    icu_locale->getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status);
2325
2326    // Ignore failures in ICU and skip to the next keyword.
2327    //
2328    // This is fine.™
2329    if (U_FAILURE(status)) {
2330      status = U_ZERO_ERROR;
2331      continue;
2332    }
2333
2334    const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword);
2335
2336    if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
2337      const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
2338      bool is_valid_value = false;
2339      // 8.h.ii.1.a If keyLocaleData contains requestedValue, then
2340      if (strcmp("ca", bcp47_key) == 0) {
2341        is_valid_value = Intl::IsValidCalendar(*icu_locale, bcp47_value);
2342      } else if (strcmp("co", bcp47_key) == 0) {
2343        is_valid_value = Intl::IsValidCollation(*icu_locale, bcp47_value);
2344      } else if (strcmp("hc", bcp47_key) == 0) {
2345        // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
2346        std::set<std::string> valid_values = {"h11", "h12", "h23", "h24"};
2347        is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
2348      } else if (strcmp("lb", bcp47_key) == 0) {
2349        // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml
2350        std::set<std::string> valid_values = {"strict", "normal", "loose"};
2351        is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
2352      } else if (strcmp("kn", bcp47_key) == 0) {
2353        // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
2354        std::set<std::string> valid_values = {"true", "false"};
2355        is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
2356      } else if (strcmp("kf", bcp47_key) == 0) {
2357        // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
2358        std::set<std::string> valid_values = {"upper", "lower", "false"};
2359        is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
2360      } else if (strcmp("nu", bcp47_key) == 0) {
2361        is_valid_value = Intl::IsValidNumberingSystem(bcp47_value);
2362      }
2363      if (is_valid_value) {
2364        extensions.insert(
2365            std::pair<std::string, std::string>(bcp47_key, bcp47_value));
2366        builder.setUnicodeLocaleKeyword(bcp47_key, bcp47_value);
2367      }
2368    }
2369  }
2370
2371  status = U_ZERO_ERROR;
2372  *icu_locale = builder.build(status);
2373
2374  return extensions;
2375}
2376
2377// ecma402/#sec-lookupmatcher
2378std::string LookupMatcher(Isolate* isolate,
2379                          const std::set<std::string>& available_locales,
2380                          const std::vector<std::string>& requested_locales) {
2381  // 1. Let result be a new Record.
2382  std::string result;
2383
2384  // 2. For each element locale of requestedLocales in List order, do
2385  for (const std::string& locale : requested_locales) {
2386    // 2. a. Let noExtensionsLocale be the String value that is locale
2387    //       with all Unicode locale extension sequences removed.
2388    ParsedLocale parsed_locale = ParseBCP47Locale(locale);
2389    std::string no_extensions_locale = parsed_locale.no_extensions_locale;
2390
2391    // 2. b. Let availableLocale be
2392    //       BestAvailableLocale(availableLocales, noExtensionsLocale).
2393    std::string available_locale =
2394        BestAvailableLocale(available_locales, no_extensions_locale);
2395
2396    // 2. c. If availableLocale is not undefined, append locale to the
2397    //       end of subset.
2398    if (!available_locale.empty()) {
2399      // Note: The following steps are not performed here because we
2400      // can use ICU to parse the unicode locale extension sequence
2401      // as part of Intl::ResolveLocale.
2402      //
2403      // There's no need to separate the unicode locale extensions
2404      // right here. Instead just return the available locale with the
2405      // extensions.
2406      //
2407      // 2. c. i. Set result.[[locale]] to availableLocale.
2408      // 2. c. ii. If locale and noExtensionsLocale are not the same
2409      // String value, then
2410      // 2. c. ii. 1. Let extension be the String value consisting of
2411      // the first substring of locale that is a Unicode locale
2412      // extension sequence.
2413      // 2. c. ii. 2. Set result.[[extension]] to extension.
2414      // 2. c. iii. Return result.
2415      return available_locale + parsed_locale.extension;
2416    }
2417  }
2418
2419  // 3. Let defLocale be DefaultLocale();
2420  // 4. Set result.[[locale]] to defLocale.
2421  // 5. Return result.
2422  return isolate->DefaultLocale();
2423}
2424
2425}  // namespace
2426
2427// This function doesn't correspond exactly with the spec. Instead
2428// we use ICU to do all the string manipulations that the spec
2429// peforms.
2430//
2431// The spec uses this function to normalize values for various
2432// relevant extension keys (such as disallowing "search" for
2433// collation). Instead of doing this here, we let the callers of
2434// this method perform such normalization.
2435//
2436// ecma402/#sec-resolvelocale
2437Maybe<Intl::ResolvedLocale> Intl::ResolveLocale(
2438    Isolate* isolate, const std::set<std::string>& available_locales,
2439    const std::vector<std::string>& requested_locales, MatcherOption matcher,
2440    const std::set<std::string>& relevant_extension_keys) {
2441  std::string locale;
2442  if (matcher == Intl::MatcherOption::kBestFit &&
2443      FLAG_harmony_intl_best_fit_matcher) {
2444    locale = BestFitMatcher(isolate, available_locales, requested_locales);
2445  } else {
2446    locale = LookupMatcher(isolate, available_locales, requested_locales);
2447  }
2448
2449  Maybe<icu::Locale> maybe_icu_locale = CreateICULocale(locale);
2450  MAYBE_RETURN(maybe_icu_locale, Nothing<Intl::ResolvedLocale>());
2451  icu::Locale icu_locale = maybe_icu_locale.FromJust();
2452  std::map<std::string, std::string> extensions =
2453      LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys);
2454
2455  std::string canonicalized_locale = Intl::ToLanguageTag(icu_locale).FromJust();
2456
2457  // TODO(gsathya): Remove privateuse subtags from extensions.
2458
2459  return Just(
2460      Intl::ResolvedLocale{canonicalized_locale, icu_locale, extensions});
2461}
2462
2463Handle<Managed<icu::UnicodeString>> Intl::SetTextToBreakIterator(
2464    Isolate* isolate, Handle<String> text, icu::BreakIterator* break_iterator) {
2465  text = String::Flatten(isolate, text);
2466  icu::UnicodeString* u_text = static_cast<icu::UnicodeString*>(
2467      Intl::ToICUUnicodeString(isolate, text).clone());
2468
2469  Handle<Managed<icu::UnicodeString>> new_u_text =
2470      Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, u_text);
2471
2472  break_iterator->setText(*u_text);
2473  return new_u_text;
2474}
2475
2476// ecma262 #sec-string.prototype.normalize
2477MaybeHandle<String> Intl::Normalize(Isolate* isolate, Handle<String> string,
2478                                    Handle<Object> form_input) {
2479  const char* form_name;
2480  UNormalization2Mode form_mode;
2481  if (form_input->IsUndefined(isolate)) {
2482    // default is FNC
2483    form_name = "nfc";
2484    form_mode = UNORM2_COMPOSE;
2485  } else {
2486    Handle<String> form;
2487    ASSIGN_RETURN_ON_EXCEPTION(isolate, form,
2488                               Object::ToString(isolate, form_input), String);
2489
2490    if (String::Equals(isolate, form, isolate->factory()->NFC_string())) {
2491      form_name = "nfc";
2492      form_mode = UNORM2_COMPOSE;
2493    } else if (String::Equals(isolate, form,
2494                              isolate->factory()->NFD_string())) {
2495      form_name = "nfc";
2496      form_mode = UNORM2_DECOMPOSE;
2497    } else if (String::Equals(isolate, form,
2498                              isolate->factory()->NFKC_string())) {
2499      form_name = "nfkc";
2500      form_mode = UNORM2_COMPOSE;
2501    } else if (String::Equals(isolate, form,
2502                              isolate->factory()->NFKD_string())) {
2503      form_name = "nfkc";
2504      form_mode = UNORM2_DECOMPOSE;
2505    } else {
2506      Handle<String> valid_forms =
2507          isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
2508      THROW_NEW_ERROR(
2509          isolate,
2510          NewRangeError(MessageTemplate::kNormalizationForm, valid_forms),
2511          String);
2512    }
2513  }
2514
2515  int length = string->length();
2516  string = String::Flatten(isolate, string);
2517  icu::UnicodeString result;
2518  std::unique_ptr<base::uc16[]> sap;
2519  UErrorCode status = U_ZERO_ERROR;
2520  icu::UnicodeString input = ToICUUnicodeString(isolate, string);
2521  // Getting a singleton. Should not free it.
2522  const icu::Normalizer2* normalizer =
2523      icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status);
2524  DCHECK(U_SUCCESS(status));
2525  DCHECK_NOT_NULL(normalizer);
2526  int32_t normalized_prefix_length =
2527      normalizer->spanQuickCheckYes(input, status);
2528  // Quick return if the input is already normalized.
2529  if (length == normalized_prefix_length) return string;
2530  icu::UnicodeString unnormalized =
2531      input.tempSubString(normalized_prefix_length);
2532  // Read-only alias of the normalized prefix.
2533  result.setTo(false, input.getBuffer(), normalized_prefix_length);
2534  // copy-on-write; normalize the suffix and append to |result|.
2535  normalizer->normalizeSecondAndAppend(result, unnormalized, status);
2536
2537  if (U_FAILURE(status)) {
2538    THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
2539  }
2540
2541  return Intl::ToString(isolate, result);
2542}
2543
2544// ICUTimezoneCache calls out to ICU for TimezoneCache
2545// functionality in a straightforward way.
2546class ICUTimezoneCache : public base::TimezoneCache {
2547 public:
2548  ICUTimezoneCache() : timezone_(nullptr) { Clear(TimeZoneDetection::kSkip); }
2549
2550  ~ICUTimezoneCache() override { Clear(TimeZoneDetection::kSkip); }
2551
2552  const char* LocalTimezone(double time_ms) override;
2553
2554  double DaylightSavingsOffset(double time_ms) override;
2555
2556  double LocalTimeOffset(double time_ms, bool is_utc) override;
2557
2558  void Clear(TimeZoneDetection time_zone_detection) override;
2559
2560 private:
2561  icu::TimeZone* GetTimeZone();
2562
2563  bool GetOffsets(double time_ms, bool is_utc, int32_t* raw_offset,
2564                  int32_t* dst_offset);
2565
2566  icu::TimeZone* timezone_;
2567
2568  std::string timezone_name_;
2569  std::string dst_timezone_name_;
2570};
2571
2572const char* ICUTimezoneCache::LocalTimezone(double time_ms) {
2573  bool is_dst = DaylightSavingsOffset(time_ms) != 0;
2574  std::string* name = is_dst ? &dst_timezone_name_ : &timezone_name_;
2575  if (name->empty()) {
2576    icu::UnicodeString result;
2577    GetTimeZone()->getDisplayName(is_dst, icu::TimeZone::LONG, result);
2578    result += '\0';
2579
2580    icu::StringByteSink<std::string> byte_sink(name);
2581    result.toUTF8(byte_sink);
2582  }
2583  DCHECK(!name->empty());
2584  return name->c_str();
2585}
2586
2587icu::TimeZone* ICUTimezoneCache::GetTimeZone() {
2588  if (timezone_ == nullptr) {
2589    timezone_ = icu::TimeZone::createDefault();
2590  }
2591  return timezone_;
2592}
2593
2594bool ICUTimezoneCache::GetOffsets(double time_ms, bool is_utc,
2595                                  int32_t* raw_offset, int32_t* dst_offset) {
2596  UErrorCode status = U_ZERO_ERROR;
2597  if (is_utc) {
2598    GetTimeZone()->getOffset(time_ms, false, *raw_offset, *dst_offset, status);
2599  } else {
2600    // Note that casting TimeZone to BasicTimeZone is safe because we know that
2601    // icu::TimeZone used here is a BasicTimeZone.
2602    static_cast<const icu::BasicTimeZone*>(GetTimeZone())
2603        ->getOffsetFromLocal(time_ms, UCAL_TZ_LOCAL_FORMER,
2604                             UCAL_TZ_LOCAL_FORMER, *raw_offset, *dst_offset,
2605                             status);
2606  }
2607
2608  return U_SUCCESS(status);
2609}
2610
2611double ICUTimezoneCache::DaylightSavingsOffset(double time_ms) {
2612  int32_t raw_offset, dst_offset;
2613  if (!GetOffsets(time_ms, true, &raw_offset, &dst_offset)) return 0;
2614  return dst_offset;
2615}
2616
2617double ICUTimezoneCache::LocalTimeOffset(double time_ms, bool is_utc) {
2618  int32_t raw_offset, dst_offset;
2619  if (!GetOffsets(time_ms, is_utc, &raw_offset, &dst_offset)) return 0;
2620  return raw_offset + dst_offset;
2621}
2622
2623void ICUTimezoneCache::Clear(TimeZoneDetection time_zone_detection) {
2624  delete timezone_;
2625  timezone_ = nullptr;
2626  timezone_name_.clear();
2627  dst_timezone_name_.clear();
2628  if (time_zone_detection == TimeZoneDetection::kRedetect) {
2629    icu::TimeZone::adoptDefault(icu::TimeZone::detectHostTimeZone());
2630  }
2631}
2632
2633base::TimezoneCache* Intl::CreateTimeZoneCache() {
2634  return FLAG_icu_timezone_data ? new ICUTimezoneCache()
2635                                : base::OS::CreateTimezoneCache();
2636}
2637
2638Maybe<Intl::MatcherOption> Intl::GetLocaleMatcher(Isolate* isolate,
2639                                                  Handle<JSReceiver> options,
2640                                                  const char* method_name) {
2641  return GetStringOption<Intl::MatcherOption>(
2642      isolate, options, "localeMatcher", method_name, {"best fit", "lookup"},
2643      {Intl::MatcherOption::kBestFit, Intl::MatcherOption::kLookup},
2644      Intl::MatcherOption::kBestFit);
2645}
2646
2647Maybe<bool> Intl::GetNumberingSystem(Isolate* isolate,
2648                                     Handle<JSReceiver> options,
2649                                     const char* method_name,
2650                                     std::unique_ptr<char[]>* result) {
2651  const std::vector<const char*> empty_values = {};
2652  Maybe<bool> maybe = GetStringOption(isolate, options, "numberingSystem",
2653                                      empty_values, method_name, result);
2654  MAYBE_RETURN(maybe, Nothing<bool>());
2655  if (maybe.FromJust() && *result != nullptr) {
2656    if (!IsWellFormedNumberingSystem(result->get())) {
2657      THROW_NEW_ERROR_RETURN_VALUE(
2658          isolate,
2659          NewRangeError(
2660              MessageTemplate::kInvalid,
2661              isolate->factory()->numberingSystem_string(),
2662              isolate->factory()->NewStringFromAsciiChecked(result->get())),
2663          Nothing<bool>());
2664    }
2665    return Just(true);
2666  }
2667  return Just(false);
2668}
2669
2670const std::set<std::string>& Intl::GetAvailableLocales() {
2671  static base::LazyInstance<Intl::AvailableLocales<>>::type available_locales =
2672      LAZY_INSTANCE_INITIALIZER;
2673  return available_locales.Pointer()->Get();
2674}
2675
2676namespace {
2677
2678struct CheckCalendar {
2679  static const char* key() { return "calendar"; }
2680  static const char* path() { return nullptr; }
2681};
2682
2683}  // namespace
2684
2685const std::set<std::string>& Intl::GetAvailableLocalesForDateFormat() {
2686  static base::LazyInstance<Intl::AvailableLocales<CheckCalendar>>::type
2687      available_locales = LAZY_INSTANCE_INITIALIZER;
2688  return available_locales.Pointer()->Get();
2689}
2690
2691constexpr uint16_t kInfinityChar = 0x221e;
2692
2693Handle<String> Intl::NumberFieldToType(Isolate* isolate,
2694                                       const NumberFormatSpan& part,
2695                                       const icu::UnicodeString& text,
2696                                       bool is_nan) {
2697  switch (static_cast<UNumberFormatFields>(part.field_id)) {
2698    case UNUM_INTEGER_FIELD:
2699      if (is_nan) return isolate->factory()->nan_string();
2700      if (text.charAt(part.begin_pos) == kInfinityChar ||
2701          // en-US-POSIX output "INF" for Infinity
2702          (part.end_pos - part.begin_pos == 3 &&
2703           text.tempSubString(part.begin_pos, 3) == "INF")) {
2704        return isolate->factory()->infinity_string();
2705      }
2706      return isolate->factory()->integer_string();
2707    case UNUM_FRACTION_FIELD:
2708      return isolate->factory()->fraction_string();
2709    case UNUM_DECIMAL_SEPARATOR_FIELD:
2710      return isolate->factory()->decimal_string();
2711    case UNUM_GROUPING_SEPARATOR_FIELD:
2712      return isolate->factory()->group_string();
2713    case UNUM_CURRENCY_FIELD:
2714      return isolate->factory()->currency_string();
2715    case UNUM_PERCENT_FIELD:
2716      return isolate->factory()->percentSign_string();
2717    case UNUM_SIGN_FIELD:
2718      return (text.charAt(part.begin_pos) == '+')
2719                 ? isolate->factory()->plusSign_string()
2720                 : isolate->factory()->minusSign_string();
2721    case UNUM_EXPONENT_SYMBOL_FIELD:
2722      return isolate->factory()->exponentSeparator_string();
2723
2724    case UNUM_EXPONENT_SIGN_FIELD:
2725      return isolate->factory()->exponentMinusSign_string();
2726
2727    case UNUM_EXPONENT_FIELD:
2728      return isolate->factory()->exponentInteger_string();
2729
2730    case UNUM_PERMILL_FIELD:
2731      // We're not creating any permill formatter, and it's not even clear how
2732      // that would be possible with the ICU API.
2733      UNREACHABLE();
2734
2735    case UNUM_COMPACT_FIELD:
2736      return isolate->factory()->compact_string();
2737    case UNUM_MEASURE_UNIT_FIELD:
2738      return isolate->factory()->unit_string();
2739
2740    default:
2741      UNREACHABLE();
2742  }
2743}
2744
2745// A helper function to convert the FormattedValue for several Intl objects.
2746MaybeHandle<String> Intl::FormattedToString(
2747    Isolate* isolate, const icu::FormattedValue& formatted) {
2748  UErrorCode status = U_ZERO_ERROR;
2749  icu::UnicodeString result = formatted.toString(status);
2750  if (U_FAILURE(status)) {
2751    THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
2752  }
2753  return Intl::ToString(isolate, result);
2754}
2755
2756MaybeHandle<JSArray> Intl::ToJSArray(
2757    Isolate* isolate, const char* unicode_key,
2758    icu::StringEnumeration* enumeration,
2759    const std::function<bool(const char*)>& removes, bool sort) {
2760  UErrorCode status = U_ZERO_ERROR;
2761  std::vector<std::string> array;
2762  for (const char* item = enumeration->next(nullptr, status);
2763       U_SUCCESS(status) && item != nullptr;
2764       item = enumeration->next(nullptr, status)) {
2765    if (unicode_key != nullptr) {
2766      item = uloc_toUnicodeLocaleType(unicode_key, item);
2767    }
2768    if (removes == nullptr || !(removes)(item)) {
2769      array.push_back(item);
2770    }
2771  }
2772
2773  if (sort) {
2774    std::sort(array.begin(), array.end());
2775  }
2776  return VectorToJSArray(isolate, array);
2777}
2778
2779bool Intl::RemoveCollation(const char* collation) {
2780  return strcmp("standard", collation) == 0 || strcmp("search", collation) == 0;
2781}
2782
2783// See the list in ecma402 #sec-issanctionedsimpleunitidentifier
2784std::set<std::string> Intl::SanctionedSimpleUnits() {
2785  return std::set<std::string>({"acre",       "bit",        "byte",
2786                                "celsius",    "centimeter", "day",
2787                                "degree",     "fahrenheit", "fluid-ounce",
2788                                "foot",       "gallon",     "gigabit",
2789                                "gigabyte",   "gram",       "hectare",
2790                                "hour",       "inch",       "kilobit",
2791                                "kilobyte",   "kilogram",   "kilometer",
2792                                "liter",      "megabit",    "megabyte",
2793                                "meter",      "mile",       "mile-scandinavian",
2794                                "millimeter", "milliliter", "millisecond",
2795                                "minute",     "month",      "ounce",
2796                                "percent",    "petabyte",   "pound",
2797                                "second",     "stone",      "terabit",
2798                                "terabyte",   "week",       "yard",
2799                                "year"});
2800}
2801
2802// ecma-402/#sec-isvalidtimezonename
2803
2804namespace {
2805bool IsUnicodeStringValidTimeZoneName(const icu::UnicodeString& id) {
2806  UErrorCode status = U_ZERO_ERROR;
2807  icu::UnicodeString canonical;
2808  icu::TimeZone::getCanonicalID(id, canonical, status);
2809  return U_SUCCESS(status) &&
2810         canonical != icu::UnicodeString("Etc/Unknown", -1, US_INV);
2811}
2812}  // namespace
2813
2814MaybeHandle<String> Intl::CanonicalizeTimeZoneName(Isolate* isolate,
2815                                                   Handle<String> identifier) {
2816  UErrorCode status = U_ZERO_ERROR;
2817  std::string time_zone =
2818      JSDateTimeFormat::CanonicalizeTimeZoneID(identifier->ToCString().get());
2819  icu::UnicodeString time_zone_ustring =
2820      icu::UnicodeString(time_zone.c_str(), -1, US_INV);
2821  icu::UnicodeString canonical;
2822  icu::TimeZone::getCanonicalID(time_zone_ustring, canonical, status);
2823  CHECK(U_SUCCESS(status));
2824  if (canonical == UNICODE_STRING_SIMPLE("Etc/UTC") ||
2825      canonical == UNICODE_STRING_SIMPLE("Etc/GMT")) {
2826    return isolate->factory()->UTC_string();
2827  }
2828  return Intl::ToString(isolate, canonical);
2829}
2830
2831bool Intl::IsValidTimeZoneName(Isolate* isolate, Handle<String> id) {
2832  std::string time_zone =
2833      JSDateTimeFormat::CanonicalizeTimeZoneID(id->ToCString().get());
2834  icu::UnicodeString time_zone_ustring =
2835      icu::UnicodeString(time_zone.c_str(), -1, US_INV);
2836  return IsUnicodeStringValidTimeZoneName(time_zone_ustring);
2837}
2838
2839bool Intl::IsValidTimeZoneName(const icu::TimeZone& tz) {
2840  icu::UnicodeString id;
2841  tz.getID(id);
2842  return IsUnicodeStringValidTimeZoneName(id);
2843}
2844
2845// Function to support Temporal
2846std::string Intl::TimeZoneIdFromIndex(int32_t index) {
2847  if (index == 0) return "UTC";
2848  std::unique_ptr<icu::StringEnumeration> enumeration(
2849      icu::TimeZone::createEnumeration());
2850  int32_t curr = 0;
2851  const char* id;
2852
2853  UErrorCode status = U_ZERO_ERROR;
2854  while (U_SUCCESS(status) && curr < index &&
2855         ((id = enumeration->next(nullptr, status)) != nullptr)) {
2856    CHECK(U_SUCCESS(status));
2857    curr++;
2858  }
2859  CHECK(U_SUCCESS(status));
2860  CHECK(id != nullptr);
2861  return id;
2862}
2863
2864Maybe<bool> Intl::GetTimeZoneIndex(Isolate* isolate, Handle<String> identifier,
2865                                   int32_t* index) {
2866  if (identifier->Equals(*isolate->factory()->UTC_string())) {
2867    *index = 0;
2868    return Just(true);
2869  }
2870
2871  std::string identifier_str(identifier->ToCString().get());
2872  std::unique_ptr<icu::TimeZone> tz(
2873      icu::TimeZone::createTimeZone(identifier_str.c_str()));
2874  if (!IsValidTimeZoneName(*tz)) {
2875    return Just(false);
2876  }
2877
2878  std::unique_ptr<icu::StringEnumeration> enumeration(
2879      icu::TimeZone::createEnumeration());
2880  int32_t curr = 0;
2881  const char* id;
2882
2883  UErrorCode status = U_ZERO_ERROR;
2884  while (U_SUCCESS(status) &&
2885         (id = enumeration->next(nullptr, status)) != nullptr) {
2886    if (identifier_str == id) {
2887      *index = curr + 1;
2888      return Just(true);
2889    }
2890    curr++;
2891  }
2892  CHECK(U_SUCCESS(status));
2893  // We should not reach here, the !IsValidTimeZoneName should return earlier
2894  UNREACHABLE();
2895}
2896
2897// #sec-tointlmathematicalvalue
2898MaybeHandle<Object> Intl::ToIntlMathematicalValueAsNumberBigIntOrString(
2899    Isolate* isolate, Handle<Object> input) {
2900  if (input->IsNumber() || input->IsBigInt()) return input;  // Shortcut.
2901  // TODO(ftang) revisit the following after the resolution of
2902  // https://github.com/tc39/proposal-intl-numberformat-v3/pull/82
2903  if (input->IsOddball()) {
2904    return Oddball::ToNumber(isolate, Handle<Oddball>::cast(input));
2905  }
2906  if (input->IsSymbol()) {
2907    THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kSymbolToNumber),
2908                    Object);
2909  }
2910  ASSIGN_RETURN_ON_EXCEPTION(
2911      isolate, input,
2912      JSReceiver::ToPrimitive(isolate, Handle<JSReceiver>::cast(input),
2913                              ToPrimitiveHint::kNumber),
2914      Object);
2915  if (input->IsString()) UNIMPLEMENTED();
2916  return input;
2917}
2918
2919Intl::FormatRangeSourceTracker::FormatRangeSourceTracker() {
2920  start_[0] = start_[1] = limit_[0] = limit_[1] = 0;
2921}
2922
2923void Intl::FormatRangeSourceTracker::Add(int32_t field, int32_t start,
2924                                         int32_t limit) {
2925  DCHECK_LT(field, 2);
2926  start_[field] = start;
2927  limit_[field] = limit;
2928}
2929
2930Intl::FormatRangeSource Intl::FormatRangeSourceTracker::GetSource(
2931    int32_t start, int32_t limit) const {
2932  FormatRangeSource source = FormatRangeSource::kShared;
2933  if (FieldContains(0, start, limit)) {
2934    source = FormatRangeSource::kStartRange;
2935  } else if (FieldContains(1, start, limit)) {
2936    source = FormatRangeSource::kEndRange;
2937  }
2938  return source;
2939}
2940
2941bool Intl::FormatRangeSourceTracker::FieldContains(int32_t field, int32_t start,
2942                                                   int32_t limit) const {
2943  DCHECK_LT(field, 2);
2944  return (start_[field] <= start) && (start <= limit_[field]) &&
2945         (start_[field] <= limit) && (limit <= limit_[field]);
2946}
2947
2948Handle<String> Intl::SourceString(Isolate* isolate, FormatRangeSource source) {
2949  switch (source) {
2950    case FormatRangeSource::kShared:
2951      return ReadOnlyRoots(isolate).shared_string_handle();
2952    case FormatRangeSource::kStartRange:
2953      return ReadOnlyRoots(isolate).startRange_string_handle();
2954    case FormatRangeSource::kEndRange:
2955      return ReadOnlyRoots(isolate).endRange_string_handle();
2956  }
2957}
2958
2959}  // namespace internal
2960}  // namespace v8
2961