1// Copyright 2013 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_INTL_SUPPORT
6#error Internationalization is expected to be enabled.
7#endif  // V8_INTL_SUPPORT
8
9#ifndef V8_OBJECTS_INTL_OBJECTS_H_
10#define V8_OBJECTS_INTL_OBJECTS_H_
11
12#include <map>
13#include <memory>
14#include <set>
15#include <string>
16
17#include "src/base/timezone-cache.h"
18#include "src/objects/contexts.h"
19#include "src/objects/managed.h"
20#include "src/objects/objects.h"
21#include "unicode/locid.h"
22#include "unicode/uversion.h"
23
24#define V8_MINIMUM_ICU_VERSION 69
25
26namespace U_ICU_NAMESPACE {
27class BreakIterator;
28class Collator;
29class FormattedValue;
30class StringEnumeration;
31class TimeZone;
32class UnicodeString;
33}  // namespace U_ICU_NAMESPACE
34
35namespace v8 {
36namespace internal {
37
38struct NumberFormatSpan {
39  int32_t field_id;
40  int32_t begin_pos;
41  int32_t end_pos;
42
43  NumberFormatSpan() = default;
44  NumberFormatSpan(int32_t field_id, int32_t begin_pos, int32_t end_pos)
45      : field_id(field_id), begin_pos(begin_pos), end_pos(end_pos) {}
46};
47
48V8_EXPORT_PRIVATE std::vector<NumberFormatSpan> FlattenRegionsToParts(
49    std::vector<NumberFormatSpan>* regions);
50
51template <typename T>
52class Handle;
53class JSCollator;
54
55class Intl {
56 public:
57  enum class BoundFunctionContextSlot {
58    kBoundFunction = Context::MIN_CONTEXT_SLOTS,
59    kLength
60  };
61
62  enum class FormatRangeSource { kShared, kStartRange, kEndRange };
63
64  class FormatRangeSourceTracker {
65   public:
66    FormatRangeSourceTracker();
67    void Add(int32_t field, int32_t start, int32_t limit);
68    FormatRangeSource GetSource(int32_t start, int32_t limit) const;
69
70   private:
71    int32_t start_[2];
72    int32_t limit_[2];
73
74    bool FieldContains(int32_t field, int32_t start, int32_t limit) const;
75  };
76
77  static Handle<String> SourceString(Isolate* isolate,
78                                     FormatRangeSource source);
79
80  // Build a set of ICU locales from a list of Locales. If there is a locale
81  // with a script tag then the locales also include a locale without the
82  // script; eg, pa_Guru_IN (language=Panjabi, script=Gurmukhi, country-India)
83  // would include pa_IN.
84  static std::set<std::string> BuildLocaleSet(
85      const std::vector<std::string>& locales, const char* path,
86      const char* validate_key);
87
88  static Maybe<std::string> ToLanguageTag(const icu::Locale& locale);
89
90  // Get the name of the numbering system from locale.
91  // ICU doesn't expose numbering system in any way, so we have to assume that
92  // for given locale NumberingSystem constructor produces the same digits as
93  // NumberFormat/Calendar would.
94  static std::string GetNumberingSystem(const icu::Locale& icu_locale);
95
96  static V8_WARN_UNUSED_RESULT MaybeHandle<JSObject> SupportedLocalesOf(
97      Isolate* isolate, const char* method_name,
98      const std::set<std::string>& available_locales, Handle<Object> locales_in,
99      Handle<Object> options_in);
100
101  // https://tc39.github.io/ecma402/#sec-canonicalizelocalelist
102  // {only_return_one_result} is an optimization for callers that only
103  // care about the first result.
104  static Maybe<std::vector<std::string>> CanonicalizeLocaleList(
105      Isolate* isolate, Handle<Object> locales,
106      bool only_return_one_result = false);
107
108  // ecma-402 #sec-intl.getcanonicallocales
109  V8_WARN_UNUSED_RESULT static MaybeHandle<JSArray> GetCanonicalLocales(
110      Isolate* isolate, Handle<Object> locales);
111
112  // ecma-402 #sec-intl.supportedvaluesof
113  V8_WARN_UNUSED_RESULT static MaybeHandle<JSArray> SupportedValuesOf(
114      Isolate* isolate, Handle<Object> key);
115
116  // For locale sensitive functions
117  V8_WARN_UNUSED_RESULT static MaybeHandle<String> StringLocaleConvertCase(
118      Isolate* isolate, Handle<String> s, bool is_upper,
119      Handle<Object> locales);
120
121  V8_WARN_UNUSED_RESULT static MaybeHandle<String> ConvertToUpper(
122      Isolate* isolate, Handle<String> s);
123
124  V8_WARN_UNUSED_RESULT static MaybeHandle<String> ConvertToLower(
125      Isolate* isolate, Handle<String> s);
126
127  V8_WARN_UNUSED_RESULT static base::Optional<int> StringLocaleCompare(
128      Isolate* isolate, Handle<String> s1, Handle<String> s2,
129      Handle<Object> locales, Handle<Object> options, const char* method_name);
130
131  enum class CompareStringsOptions {
132    kNone,
133    kTryFastPath,
134  };
135  template <class IsolateT>
136  V8_EXPORT_PRIVATE static CompareStringsOptions CompareStringsOptionsFor(
137      IsolateT* isolate, Handle<Object> locales, Handle<Object> options);
138  V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static int CompareStrings(
139      Isolate* isolate, const icu::Collator& collator, Handle<String> s1,
140      Handle<String> s2,
141      CompareStringsOptions compare_strings_options =
142          CompareStringsOptions::kNone);
143
144  // ecma402/#sup-properties-of-the-number-prototype-object
145  V8_WARN_UNUSED_RESULT static MaybeHandle<String> NumberToLocaleString(
146      Isolate* isolate, Handle<Object> num, Handle<Object> locales,
147      Handle<Object> options, const char* method_name);
148
149  // [[RoundingPriority]] is one of the String values "auto", "morePrecision",
150  // or "lessPrecision", specifying the rounding priority for the number.
151  enum class RoundingPriority {
152    kAuto,
153    kMorePrecision,
154    kLessPrecision,
155  };
156
157  enum class RoundingType {
158    kFractionDigits,
159    kSignificantDigits,
160    kMorePrecision,
161    kLessPrecision,
162  };
163
164  // ecma402/#sec-setnfdigitoptions
165  struct NumberFormatDigitOptions {
166    int minimum_integer_digits;
167    int minimum_fraction_digits;
168    int maximum_fraction_digits;
169    int minimum_significant_digits;
170    int maximum_significant_digits;
171    RoundingPriority rounding_priority;
172    RoundingType rounding_type;
173  };
174  V8_WARN_UNUSED_RESULT static Maybe<NumberFormatDigitOptions>
175  SetNumberFormatDigitOptions(Isolate* isolate, Handle<JSReceiver> options,
176                              int mnfd_default, int mxfd_default,
177                              bool notation_is_compact);
178
179  // Helper function to convert a UnicodeString to a Handle<String>
180  V8_WARN_UNUSED_RESULT static MaybeHandle<String> ToString(
181      Isolate* isolate, const icu::UnicodeString& string);
182
183  // Helper function to convert a substring of UnicodeString to a Handle<String>
184  V8_WARN_UNUSED_RESULT static MaybeHandle<String> ToString(
185      Isolate* isolate, const icu::UnicodeString& string, int32_t begin,
186      int32_t end);
187
188  // Helper function to convert a FormattedValue to String
189  V8_WARN_UNUSED_RESULT static MaybeHandle<String> FormattedToString(
190      Isolate* isolate, const icu::FormattedValue& formatted);
191
192  // Helper function to convert number field id to type string.
193  static Handle<String> NumberFieldToType(Isolate* isolate,
194                                          const NumberFormatSpan& part,
195                                          const icu::UnicodeString& text,
196                                          bool is_nan);
197
198  // A helper function to implement formatToParts which add element to array as
199  // $array[$index] = { type: $field_type_string, value: $value }
200  static void AddElement(Isolate* isolate, Handle<JSArray> array, int index,
201                         Handle<String> field_type_string,
202                         Handle<String> value);
203
204  // A helper function to implement formatToParts which add element to array as
205  // $array[$index] = {
206  //   type: $field_type_string, value: $value,
207  //   $additional_property_name: $additional_property_value
208  // }
209  static void AddElement(Isolate* isolate, Handle<JSArray> array, int index,
210                         Handle<String> field_type_string, Handle<String> value,
211                         Handle<String> additional_property_name,
212                         Handle<String> additional_property_value);
213
214  // In ECMA 402 v1, Intl constructors supported a mode of operation
215  // where calling them with an existing object as a receiver would
216  // transform the receiver into the relevant Intl instance with all
217  // internal slots. In ECMA 402 v2, this capability was removed, to
218  // avoid adding internal slots on existing objects. In ECMA 402 v3,
219  // the capability was re-added as "normative optional" in a mode
220  // which chains the underlying Intl instance on any object, when the
221  // constructor is called
222  //
223  // See ecma402/#legacy-constructor.
224  V8_WARN_UNUSED_RESULT static MaybeHandle<Object> LegacyUnwrapReceiver(
225      Isolate* isolate, Handle<JSReceiver> receiver,
226      Handle<JSFunction> constructor, bool has_initialized_slot);
227
228  // enum for "localeMatcher" option: shared by many Intl objects.
229  enum class MatcherOption { kBestFit, kLookup };
230
231  // Shared function to read the "localeMatcher" option.
232  V8_WARN_UNUSED_RESULT static Maybe<MatcherOption> GetLocaleMatcher(
233      Isolate* isolate, Handle<JSReceiver> options, const char* method_name);
234
235  // Shared function to read the "numberingSystem" option.
236  V8_WARN_UNUSED_RESULT static Maybe<bool> GetNumberingSystem(
237      Isolate* isolate, Handle<JSReceiver> options, const char* method_name,
238      std::unique_ptr<char[]>* result);
239
240  // Check the calendar is valid or not for that locale.
241  static bool IsValidCalendar(const icu::Locale& locale,
242                              const std::string& value);
243
244  // Check the collation is valid or not for that locale.
245  static bool IsValidCollation(const icu::Locale& locale,
246                               const std::string& value);
247
248  // Check the numberingSystem is valid.
249  static bool IsValidNumberingSystem(const std::string& value);
250
251  // Check the calendar is well formed.
252  static bool IsWellFormedCalendar(const std::string& value);
253
254  // Check the currency is well formed.
255  static bool IsWellFormedCurrency(const std::string& value);
256
257  struct ResolvedLocale {
258    std::string locale;
259    icu::Locale icu_locale;
260    std::map<std::string, std::string> extensions;
261  };
262
263  static Maybe<ResolvedLocale> ResolveLocale(
264      Isolate* isolate, const std::set<std::string>& available_locales,
265      const std::vector<std::string>& requested_locales, MatcherOption options,
266      const std::set<std::string>& relevant_extension_keys);
267
268  // A helper template to implement the GetAvailableLocales
269  // Usage in src/objects/js-XXX.cc
270  // const std::set<std::string>& JSXxx::GetAvailableLocales() {
271  //   static base::LazyInstance<Intl::AvailableLocales<icu::YYY>>::type
272  //       available_locales = LAZY_INSTANCE_INITIALIZER;
273  //   return available_locales.Pointer()->Get();
274  // }
275
276  struct SkipResourceCheck {
277    static const char* key() { return nullptr; }
278    static const char* path() { return nullptr; }
279  };
280
281  template <typename C = SkipResourceCheck>
282  class AvailableLocales {
283   public:
284    AvailableLocales() {
285      UErrorCode status = U_ZERO_ERROR;
286      UEnumeration* uenum =
287          uloc_openAvailableByType(ULOC_AVAILABLE_WITH_LEGACY_ALIASES, &status);
288      DCHECK(U_SUCCESS(status));
289
290      std::vector<std::string> all_locales;
291      const char* loc;
292      while ((loc = uenum_next(uenum, nullptr, &status)) != nullptr) {
293        DCHECK(U_SUCCESS(status));
294        std::string locstr(loc);
295        std::replace(locstr.begin(), locstr.end(), '_', '-');
296        // Handle special case
297        if (locstr == "en-US-POSIX") locstr = "en-US-u-va-posix";
298        all_locales.push_back(locstr);
299      }
300      uenum_close(uenum);
301
302      set_ = Intl::BuildLocaleSet(all_locales, C::path(), C::key());
303    }
304    const std::set<std::string>& Get() const { return set_; }
305
306   private:
307    std::set<std::string> set_;
308  };
309
310  // Utility function to set text to BreakIterator.
311  static Handle<Managed<icu::UnicodeString>> SetTextToBreakIterator(
312      Isolate* isolate, Handle<String> text,
313      icu::BreakIterator* break_iterator);
314
315  // ecma262 #sec-string.prototype.normalize
316  V8_WARN_UNUSED_RESULT static MaybeHandle<String> Normalize(
317      Isolate* isolate, Handle<String> string, Handle<Object> form_input);
318  static base::TimezoneCache* CreateTimeZoneCache();
319
320  // Convert a Handle<String> to icu::UnicodeString
321  static icu::UnicodeString ToICUUnicodeString(Isolate* isolate,
322                                               Handle<String> string,
323                                               int offset = 0);
324
325  static const uint8_t* ToLatin1LowerTable();
326
327  static const uint8_t* AsciiCollationWeightsL1();
328  static const uint8_t* AsciiCollationWeightsL3();
329  static const int kAsciiCollationWeightsLength;
330
331  static String ConvertOneByteToLower(String src, String dst);
332
333  static const std::set<std::string>& GetAvailableLocales();
334
335  static const std::set<std::string>& GetAvailableLocalesForDateFormat();
336
337  V8_WARN_UNUSED_RESULT static MaybeHandle<JSArray> ToJSArray(
338      Isolate* isolate, const char* unicode_key,
339      icu::StringEnumeration* enumeration,
340      const std::function<bool(const char*)>& removes, bool sort);
341
342  static bool RemoveCollation(const char* collation);
343
344  static std::set<std::string> SanctionedSimpleUnits();
345
346  V8_WARN_UNUSED_RESULT static MaybeHandle<JSArray> AvailableCalendars(
347      Isolate* isolate);
348
349  V8_WARN_UNUSED_RESULT static bool IsValidTimeZoneName(
350      const icu::TimeZone& tz);
351  V8_WARN_UNUSED_RESULT static bool IsValidTimeZoneName(Isolate* isolate,
352                                                        const std::string& id);
353  V8_WARN_UNUSED_RESULT static bool IsValidTimeZoneName(Isolate* isolate,
354                                                        Handle<String> id);
355
356  // Function to support Temporal
357  V8_WARN_UNUSED_RESULT static std::string TimeZoneIdFromIndex(int32_t index);
358
359  V8_WARN_UNUSED_RESULT static Maybe<bool> GetTimeZoneIndex(
360      Isolate* isolate, Handle<String> identifier, int32_t* index);
361
362  V8_WARN_UNUSED_RESULT static MaybeHandle<String> CanonicalizeTimeZoneName(
363      Isolate* isolate, Handle<String> identifier);
364
365  // ecma402/#sec-coerceoptionstoobject
366  V8_WARN_UNUSED_RESULT static MaybeHandle<JSReceiver> CoerceOptionsToObject(
367      Isolate* isolate, Handle<Object> options, const char* service);
368
369  // #sec-tointlmathematicalvalue
370  // The implementation preserve the Object in String, BigInt or Number
371  V8_WARN_UNUSED_RESULT static MaybeHandle<Object>
372  ToIntlMathematicalValueAsNumberBigIntOrString(Isolate* isolate,
373                                                Handle<Object> input);
374};
375
376}  // namespace internal
377}  // namespace v8
378
379#endif  // V8_OBJECTS_INTL_OBJECTS_H_
380