1// Copyright 2018 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_INTL_SUPPORT
6#error Internationalization is expected to be enabled.
7#endif  // V8_INTL_SUPPORT
8
9#include "src/objects/js-break-iterator.h"
10
11#include "src/objects/intl-objects.h"
12#include "src/objects/js-break-iterator-inl.h"
13#include "src/objects/managed-inl.h"
14#include "src/objects/option-utils.h"
15#include "unicode/brkiter.h"
16
17namespace v8 {
18namespace internal {
19
20namespace {
21enum class Type { CHARACTER, WORD, SENTENCE, LINE };
22}  // anonymous namespace
23
24MaybeHandle<JSV8BreakIterator> JSV8BreakIterator::New(
25    Isolate* isolate, Handle<Map> map, Handle<Object> locales,
26    Handle<Object> options_obj, const char* service) {
27  Factory* factory = isolate->factory();
28
29  // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
30  Maybe<std::vector<std::string>> maybe_requested_locales =
31      Intl::CanonicalizeLocaleList(isolate, locales);
32  MAYBE_RETURN(maybe_requested_locales, MaybeHandle<JSV8BreakIterator>());
33  std::vector<std::string> requested_locales =
34      maybe_requested_locales.FromJust();
35
36  Handle<JSReceiver> options;
37  if (options_obj->IsUndefined(isolate)) {
38    options = factory->NewJSObjectWithNullProto();
39  } else {
40    ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
41                               Object::ToObject(isolate, options_obj, service),
42                               JSV8BreakIterator);
43  }
44
45  // Extract locale string
46  Maybe<Intl::MatcherOption> maybe_locale_matcher =
47      Intl::GetLocaleMatcher(isolate, options, service);
48  MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSV8BreakIterator>());
49  Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
50
51  Maybe<Intl::ResolvedLocale> maybe_resolve_locale =
52      Intl::ResolveLocale(isolate, JSV8BreakIterator::GetAvailableLocales(),
53                          requested_locales, matcher, {});
54  if (maybe_resolve_locale.IsNothing()) {
55    THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
56                    JSV8BreakIterator);
57  }
58  Intl::ResolvedLocale r = maybe_resolve_locale.FromJust();
59
60  // Extract type from options
61  Maybe<Type> maybe_type = GetStringOption<Type>(
62      isolate, options, "type", service,
63      {"word", "character", "sentence", "line"},
64      {Type::WORD, Type::CHARACTER, Type::SENTENCE, Type::LINE}, Type::WORD);
65  MAYBE_RETURN(maybe_type, MaybeHandle<JSV8BreakIterator>());
66  Type type_enum = maybe_type.FromJust();
67
68  icu::Locale icu_locale = r.icu_locale;
69  DCHECK(!icu_locale.isBogus());
70
71  // Construct break_iterator using icu_locale and type
72  UErrorCode status = U_ZERO_ERROR;
73  std::unique_ptr<icu::BreakIterator> break_iterator = nullptr;
74  switch (type_enum) {
75    case Type::CHARACTER:
76      break_iterator.reset(
77          icu::BreakIterator::createCharacterInstance(icu_locale, status));
78      break;
79    case Type::SENTENCE:
80      break_iterator.reset(
81          icu::BreakIterator::createSentenceInstance(icu_locale, status));
82      break;
83    case Type::LINE:
84      isolate->CountUsage(
85          v8::Isolate::UseCounterFeature::kBreakIteratorTypeLine);
86      break_iterator.reset(
87          icu::BreakIterator::createLineInstance(icu_locale, status));
88      break;
89    default:
90      isolate->CountUsage(
91          v8::Isolate::UseCounterFeature::kBreakIteratorTypeWord);
92      break_iterator.reset(
93          icu::BreakIterator::createWordInstance(icu_locale, status));
94      break;
95  }
96
97  // Error handling for break_iterator
98  if (U_FAILURE(status) || break_iterator.get() == nullptr) {
99    THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
100                    JSV8BreakIterator);
101  }
102  isolate->CountUsage(v8::Isolate::UseCounterFeature::kBreakIterator);
103
104  // Construct managed objects from pointers
105  Handle<Managed<icu::BreakIterator>> managed_break_iterator =
106      Managed<icu::BreakIterator>::FromUniquePtr(isolate, 0,
107                                                 std::move(break_iterator));
108  Handle<Managed<icu::UnicodeString>> managed_unicode_string =
109      Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, nullptr);
110
111  Handle<String> locale_str =
112      isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
113
114  // Now all properties are ready, so we can allocate the result object.
115  Handle<JSV8BreakIterator> break_iterator_holder =
116      Handle<JSV8BreakIterator>::cast(
117          isolate->factory()->NewFastOrSlowJSObjectFromMap(map));
118  DisallowGarbageCollection no_gc;
119  break_iterator_holder->set_locale(*locale_str);
120  break_iterator_holder->set_break_iterator(*managed_break_iterator);
121  break_iterator_holder->set_unicode_string(*managed_unicode_string);
122
123  // Return break_iterator_holder
124  return break_iterator_holder;
125}
126
127namespace {
128
129Type GetType(icu::BreakIterator* break_iterator) {
130  // Since the developer calling the Intl.v8BreakIterator already know the type,
131  // we usually do not need to know the type unless the resolvedOptions() is
132  // called, we use the following trick to figure out the type instead of
133  // storing it with the JSV8BreakIterator object to save memory.
134  // This routine is not fast but should be seldomly used only.
135
136  // We need to clone a copy of break iteator because we need to setText to it.
137  std::unique_ptr<icu::BreakIterator> cloned_break_iterator(
138      break_iterator->clone());
139  // Use a magic string "He is." to call next().
140  //  character type: will return 1 for "H"
141  //  word type: will return 2 for "He"
142  //  line type: will return 3 for "He "
143  //  sentence type: will return 6 for "He is."
144  icu::UnicodeString data("He is.");
145  cloned_break_iterator->setText(data);
146  switch (cloned_break_iterator->next()) {
147    case 1:  // After "H"
148      return Type::CHARACTER;
149    case 2:  // After "He"
150      return Type::WORD;
151    case 3:  // After "He "
152      return Type::LINE;
153    case 6:  // After "He is."
154      return Type::SENTENCE;
155    default:
156      UNREACHABLE();
157  }
158}
159
160Handle<String> TypeAsString(Isolate* isolate, Type type) {
161  switch (type) {
162    case Type::CHARACTER:
163      return ReadOnlyRoots(isolate).character_string_handle();
164    case Type::WORD:
165      return ReadOnlyRoots(isolate).word_string_handle();
166    case Type::SENTENCE:
167      return ReadOnlyRoots(isolate).sentence_string_handle();
168    case Type::LINE:
169      return ReadOnlyRoots(isolate).line_string_handle();
170  }
171  UNREACHABLE();
172}
173
174}  // anonymous namespace
175
176Handle<JSObject> JSV8BreakIterator::ResolvedOptions(
177    Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
178  Factory* factory = isolate->factory();
179
180  Type type = GetType(break_iterator->break_iterator().raw());
181
182  Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
183  Handle<String> locale(break_iterator->locale(), isolate);
184
185  JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
186                        NONE);
187  JSObject::AddProperty(isolate, result, factory->type_string(),
188                        TypeAsString(isolate, type), NONE);
189  return result;
190}
191
192void JSV8BreakIterator::AdoptText(
193    Isolate* isolate, Handle<JSV8BreakIterator> break_iterator_holder,
194    Handle<String> text) {
195  icu::BreakIterator* break_iterator =
196      break_iterator_holder->break_iterator().raw();
197  DCHECK_NOT_NULL(break_iterator);
198  Handle<Managed<icu::UnicodeString>> unicode_string =
199      Intl::SetTextToBreakIterator(isolate, text, break_iterator);
200  break_iterator_holder->set_unicode_string(*unicode_string);
201}
202
203Handle<Object> JSV8BreakIterator::Current(
204    Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
205  return isolate->factory()->NewNumberFromInt(
206      break_iterator->break_iterator().raw()->current());
207}
208
209Handle<Object> JSV8BreakIterator::First(
210    Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
211  return isolate->factory()->NewNumberFromInt(
212      break_iterator->break_iterator().raw()->first());
213}
214
215Handle<Object> JSV8BreakIterator::Next(
216    Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) {
217  return isolate->factory()->NewNumberFromInt(
218      break_iterator->break_iterator().raw()->next());
219}
220
221String JSV8BreakIterator::BreakType(Isolate* isolate,
222                                    Handle<JSV8BreakIterator> break_iterator) {
223  int32_t status = break_iterator->break_iterator().raw()->getRuleStatus();
224  // Keep return values in sync with JavaScript BreakType enum.
225  if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) {
226    return ReadOnlyRoots(isolate).none_string();
227  }
228  if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) {
229    return ReadOnlyRoots(isolate).number_string();
230  }
231  if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) {
232    return ReadOnlyRoots(isolate).letter_string();
233  }
234  if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
235    return ReadOnlyRoots(isolate).kana_string();
236  }
237  if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
238    return ReadOnlyRoots(isolate).ideo_string();
239  }
240  return ReadOnlyRoots(isolate).unknown_string();
241}
242
243const std::set<std::string>& JSV8BreakIterator::GetAvailableLocales() {
244  return Intl::GetAvailableLocales();
245}
246
247}  // namespace internal
248}  // namespace v8
249