1// Copyright 2018 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef V8_INTL_SUPPORT 6#error Internationalization is expected to be enabled. 7#endif // V8_INTL_SUPPORT 8 9#include "src/objects/js-break-iterator.h" 10 11#include "src/objects/intl-objects.h" 12#include "src/objects/js-break-iterator-inl.h" 13#include "src/objects/managed-inl.h" 14#include "src/objects/option-utils.h" 15#include "unicode/brkiter.h" 16 17namespace v8 { 18namespace internal { 19 20namespace { 21enum class Type { CHARACTER, WORD, SENTENCE, LINE }; 22} // anonymous namespace 23 24MaybeHandle<JSV8BreakIterator> JSV8BreakIterator::New( 25 Isolate* isolate, Handle<Map> map, Handle<Object> locales, 26 Handle<Object> options_obj, const char* service) { 27 Factory* factory = isolate->factory(); 28 29 // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales). 30 Maybe<std::vector<std::string>> maybe_requested_locales = 31 Intl::CanonicalizeLocaleList(isolate, locales); 32 MAYBE_RETURN(maybe_requested_locales, MaybeHandle<JSV8BreakIterator>()); 33 std::vector<std::string> requested_locales = 34 maybe_requested_locales.FromJust(); 35 36 Handle<JSReceiver> options; 37 if (options_obj->IsUndefined(isolate)) { 38 options = factory->NewJSObjectWithNullProto(); 39 } else { 40 ASSIGN_RETURN_ON_EXCEPTION(isolate, options, 41 Object::ToObject(isolate, options_obj, service), 42 JSV8BreakIterator); 43 } 44 45 // Extract locale string 46 Maybe<Intl::MatcherOption> maybe_locale_matcher = 47 Intl::GetLocaleMatcher(isolate, options, service); 48 MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSV8BreakIterator>()); 49 Intl::MatcherOption matcher = maybe_locale_matcher.FromJust(); 50 51 Maybe<Intl::ResolvedLocale> maybe_resolve_locale = 52 Intl::ResolveLocale(isolate, JSV8BreakIterator::GetAvailableLocales(), 53 requested_locales, matcher, {}); 54 if (maybe_resolve_locale.IsNothing()) { 55 THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError), 56 JSV8BreakIterator); 57 } 58 Intl::ResolvedLocale r = maybe_resolve_locale.FromJust(); 59 60 // Extract type from options 61 Maybe<Type> maybe_type = GetStringOption<Type>( 62 isolate, options, "type", service, 63 {"word", "character", "sentence", "line"}, 64 {Type::WORD, Type::CHARACTER, Type::SENTENCE, Type::LINE}, Type::WORD); 65 MAYBE_RETURN(maybe_type, MaybeHandle<JSV8BreakIterator>()); 66 Type type_enum = maybe_type.FromJust(); 67 68 icu::Locale icu_locale = r.icu_locale; 69 DCHECK(!icu_locale.isBogus()); 70 71 // Construct break_iterator using icu_locale and type 72 UErrorCode status = U_ZERO_ERROR; 73 std::unique_ptr<icu::BreakIterator> break_iterator = nullptr; 74 switch (type_enum) { 75 case Type::CHARACTER: 76 break_iterator.reset( 77 icu::BreakIterator::createCharacterInstance(icu_locale, status)); 78 break; 79 case Type::SENTENCE: 80 break_iterator.reset( 81 icu::BreakIterator::createSentenceInstance(icu_locale, status)); 82 break; 83 case Type::LINE: 84 isolate->CountUsage( 85 v8::Isolate::UseCounterFeature::kBreakIteratorTypeLine); 86 break_iterator.reset( 87 icu::BreakIterator::createLineInstance(icu_locale, status)); 88 break; 89 default: 90 isolate->CountUsage( 91 v8::Isolate::UseCounterFeature::kBreakIteratorTypeWord); 92 break_iterator.reset( 93 icu::BreakIterator::createWordInstance(icu_locale, status)); 94 break; 95 } 96 97 // Error handling for break_iterator 98 if (U_FAILURE(status) || break_iterator.get() == nullptr) { 99 THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError), 100 JSV8BreakIterator); 101 } 102 isolate->CountUsage(v8::Isolate::UseCounterFeature::kBreakIterator); 103 104 // Construct managed objects from pointers 105 Handle<Managed<icu::BreakIterator>> managed_break_iterator = 106 Managed<icu::BreakIterator>::FromUniquePtr(isolate, 0, 107 std::move(break_iterator)); 108 Handle<Managed<icu::UnicodeString>> managed_unicode_string = 109 Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, nullptr); 110 111 Handle<String> locale_str = 112 isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str()); 113 114 // Now all properties are ready, so we can allocate the result object. 115 Handle<JSV8BreakIterator> break_iterator_holder = 116 Handle<JSV8BreakIterator>::cast( 117 isolate->factory()->NewFastOrSlowJSObjectFromMap(map)); 118 DisallowGarbageCollection no_gc; 119 break_iterator_holder->set_locale(*locale_str); 120 break_iterator_holder->set_break_iterator(*managed_break_iterator); 121 break_iterator_holder->set_unicode_string(*managed_unicode_string); 122 123 // Return break_iterator_holder 124 return break_iterator_holder; 125} 126 127namespace { 128 129Type GetType(icu::BreakIterator* break_iterator) { 130 // Since the developer calling the Intl.v8BreakIterator already know the type, 131 // we usually do not need to know the type unless the resolvedOptions() is 132 // called, we use the following trick to figure out the type instead of 133 // storing it with the JSV8BreakIterator object to save memory. 134 // This routine is not fast but should be seldomly used only. 135 136 // We need to clone a copy of break iteator because we need to setText to it. 137 std::unique_ptr<icu::BreakIterator> cloned_break_iterator( 138 break_iterator->clone()); 139 // Use a magic string "He is." to call next(). 140 // character type: will return 1 for "H" 141 // word type: will return 2 for "He" 142 // line type: will return 3 for "He " 143 // sentence type: will return 6 for "He is." 144 icu::UnicodeString data("He is."); 145 cloned_break_iterator->setText(data); 146 switch (cloned_break_iterator->next()) { 147 case 1: // After "H" 148 return Type::CHARACTER; 149 case 2: // After "He" 150 return Type::WORD; 151 case 3: // After "He " 152 return Type::LINE; 153 case 6: // After "He is." 154 return Type::SENTENCE; 155 default: 156 UNREACHABLE(); 157 } 158} 159 160Handle<String> TypeAsString(Isolate* isolate, Type type) { 161 switch (type) { 162 case Type::CHARACTER: 163 return ReadOnlyRoots(isolate).character_string_handle(); 164 case Type::WORD: 165 return ReadOnlyRoots(isolate).word_string_handle(); 166 case Type::SENTENCE: 167 return ReadOnlyRoots(isolate).sentence_string_handle(); 168 case Type::LINE: 169 return ReadOnlyRoots(isolate).line_string_handle(); 170 } 171 UNREACHABLE(); 172} 173 174} // anonymous namespace 175 176Handle<JSObject> JSV8BreakIterator::ResolvedOptions( 177 Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) { 178 Factory* factory = isolate->factory(); 179 180 Type type = GetType(break_iterator->break_iterator().raw()); 181 182 Handle<JSObject> result = factory->NewJSObject(isolate->object_function()); 183 Handle<String> locale(break_iterator->locale(), isolate); 184 185 JSObject::AddProperty(isolate, result, factory->locale_string(), locale, 186 NONE); 187 JSObject::AddProperty(isolate, result, factory->type_string(), 188 TypeAsString(isolate, type), NONE); 189 return result; 190} 191 192void JSV8BreakIterator::AdoptText( 193 Isolate* isolate, Handle<JSV8BreakIterator> break_iterator_holder, 194 Handle<String> text) { 195 icu::BreakIterator* break_iterator = 196 break_iterator_holder->break_iterator().raw(); 197 DCHECK_NOT_NULL(break_iterator); 198 Handle<Managed<icu::UnicodeString>> unicode_string = 199 Intl::SetTextToBreakIterator(isolate, text, break_iterator); 200 break_iterator_holder->set_unicode_string(*unicode_string); 201} 202 203Handle<Object> JSV8BreakIterator::Current( 204 Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) { 205 return isolate->factory()->NewNumberFromInt( 206 break_iterator->break_iterator().raw()->current()); 207} 208 209Handle<Object> JSV8BreakIterator::First( 210 Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) { 211 return isolate->factory()->NewNumberFromInt( 212 break_iterator->break_iterator().raw()->first()); 213} 214 215Handle<Object> JSV8BreakIterator::Next( 216 Isolate* isolate, Handle<JSV8BreakIterator> break_iterator) { 217 return isolate->factory()->NewNumberFromInt( 218 break_iterator->break_iterator().raw()->next()); 219} 220 221String JSV8BreakIterator::BreakType(Isolate* isolate, 222 Handle<JSV8BreakIterator> break_iterator) { 223 int32_t status = break_iterator->break_iterator().raw()->getRuleStatus(); 224 // Keep return values in sync with JavaScript BreakType enum. 225 if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) { 226 return ReadOnlyRoots(isolate).none_string(); 227 } 228 if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) { 229 return ReadOnlyRoots(isolate).number_string(); 230 } 231 if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) { 232 return ReadOnlyRoots(isolate).letter_string(); 233 } 234 if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) { 235 return ReadOnlyRoots(isolate).kana_string(); 236 } 237 if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) { 238 return ReadOnlyRoots(isolate).ideo_string(); 239 } 240 return ReadOnlyRoots(isolate).unknown_string(); 241} 242 243const std::set<std::string>& JSV8BreakIterator::GetAvailableLocales() { 244 return Intl::GetAvailableLocales(); 245} 246 247} // namespace internal 248} // namespace v8 249