/* * Copyright (c) 2023 Shenzhen Kaihong Digital Industry Development Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "ecmascript/js_segments.h" #include #include "ecmascript/base/builtins_base.h" #include "ecmascript/intl/locale_helper.h" #include "ecmascript/object_factory-inl.h" namespace panda::ecmascript { void JSSegments::SetIcuBreakIterator(JSThread *thread, const JSHandle &segments, icu::BreakIterator* icuBreakIterator, const NativePointerCallback &callback) { EcmaVM *ecmaVm = thread->GetEcmaVM(); ObjectFactory *factory = ecmaVm->GetFactory(); ASSERT(icuBreakIterator != nullptr); JSTaggedValue data = segments->GetIcuField(); if (data.IsJSNativePointer()) { JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject()); native->ResetExternalPointer(thread, icuBreakIterator); return; } JSHandle pointer = factory->NewJSNativePointer(icuBreakIterator, callback); segments->SetIcuField(thread, pointer.GetTaggedValue()); } void JSSegments::SetUString(JSThread *thread, const JSHandle &segments, icu::UnicodeString* icuUnicodeString, const NativePointerCallback &callback) { EcmaVM *ecmaVm = thread->GetEcmaVM(); ObjectFactory *factory = ecmaVm->GetFactory(); ASSERT(icuUnicodeString != nullptr); JSTaggedValue data = segments->GetUnicodeString(); if (data.IsJSNativePointer()) { JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject()); native->ResetExternalPointer(thread, icuUnicodeString); return; } JSHandle pointer = factory->NewJSNativePointer(icuUnicodeString, callback); segments->SetUnicodeString(thread, pointer.GetTaggedValue()); } void SetTextToBreakIterator(JSThread *thread, const JSHandle &segments, JSHandle text, icu::BreakIterator* breakIterator) { std::u16string u16str = EcmaStringAccessor(text).ToU16String(); icu::UnicodeString src(u16str.data(), u16str.size()); icu::UnicodeString* uText = static_cast(src.clone()); breakIterator->setText(*uText); JSSegments::SetUString(thread, segments, uText, JSSegments::FreeUString); } JSHandle JSSegments::CreateSegmentsObject(JSThread *thread, const JSHandle &segmenter, const JSHandle &string) { ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); // 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ». // 2. Let segments be OrdinaryObjectCreate(%SegmentsPrototype%, internalSlotsList). JSHandle env = thread->GetEcmaVM()->GetGlobalEnv(); JSHandle segmentsCtor(env->GetSegmentsFunction()); JSHandle segments(factory->NewJSObjectByConstructor(segmentsCtor)); // 3. Set segments.[[SegmentsSegmenter]] to segmenter. icu::BreakIterator* icuBreakIterator = segmenter->GetIcuBreakIterator()->clone(); SetIcuBreakIterator(thread, segments, icuBreakIterator, JSSegments::FreeIcuBreakIterator); segments->SetGranularity(segmenter->GetGranularity()); // 4. Set segments.[[SegmentsString]] to string. segments->SetSegmentsString(thread, string); SetTextToBreakIterator(thread, segments, string, icuBreakIterator); return segments; } JSTaggedValue JSSegments::Containing(JSThread *thread, const JSHandle &segments, double index) { icu::UnicodeString* unicodeString = segments->GetUString(); // 5. Let len be the length of string. int32_t len = unicodeString->length(); // 7. If n < 0 or n ≥ len, return undefined. if (index < 0 || index >= len) { return JSTaggedValue::Undefined(); } int32_t n = static_cast(index); // n may point to the surrogate tail- adjust it back to the lead. n = unicodeString->getChar32Start(n); icu::BreakIterator* breakIterator = segments->GetIcuBreakIterator(); // 8. Let startIndex be ! FindBoundary(segmenter, string, n, before). int32_t startIndex = breakIterator->isBoundary(n) ? n : breakIterator->preceding(n); // 9. Let endIndex be ! FindBoundary(segmenter, string, n, after). int32_t endIndex = breakIterator->following(n); // 10. Return ! CreateSegmentDataObject(segmenter, string, startIndex, endIndex). return CreateSegmentDataObject(thread, segments->GetGranularity(), breakIterator, JSHandle(thread, segments->GetSegmentsString()), *unicodeString, startIndex, endIndex).GetTaggedValue(); } bool CurrentSegmentIsWordLike(icu::BreakIterator* breakIterator) { int32_t rule_status = breakIterator->getRuleStatus(); return (rule_status >= UBRK_WORD_NUMBER && rule_status < UBRK_WORD_NUMBER_LIMIT) || (rule_status >= UBRK_WORD_LETTER && rule_status < UBRK_WORD_LETTER_LIMIT) || (rule_status >= UBRK_WORD_KANA && rule_status < UBRK_WORD_KANA_LIMIT) || (rule_status >= UBRK_WORD_IDEO && rule_status < UBRK_WORD_IDEO_LIMIT); } // 18.7.1 CreateSegmentDataObject ( segmenter, string, startIndex, endIndex ) JSHandle JSSegments::CreateSegmentDataObject(JSThread *thread, GranularityOption granularity, icu::BreakIterator* breakIterator, const JSHandle &inputString, const icu::UnicodeString& unicodeString, int32_t startIndex, int32_t endIndex) { // 1. Let len be the length of string. // 2. Assert: startIndex ≥ 0. ASSERT(startIndex >= 0); // 3. Assert: endIndex ≤ len. ASSERT(endIndex <= unicodeString.length()); // 4. Assert: startIndex < endIndex. ASSERT(startIndex < endIndex); // 5. Let result be OrdinaryObjectCreate(%Object.prototype%). auto ecmaVm = thread->GetEcmaVM(); JSHandle env = ecmaVm->GetGlobalEnv(); ObjectFactory *factory = ecmaVm->GetFactory(); JSHandle ctor(env->GetObjectFunction()); JSHandle result(factory->NewJSObjectByConstructor(ctor)); // 6. Let segment be the substring of string from startIndex to endIndex. JSHandle segment = intl::LocaleHelper::UStringToString(thread, unicodeString, startIndex, endIndex); auto globalConst = thread->GlobalConstants(); // 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment). JSHandle segmentKey = globalConst->GetHandledSegmentString(); JSObject::CreateDataPropertyOrThrow(thread, result, segmentKey, JSHandle::Cast(segment)); // 8. Perform ! CreateDataPropertyOrThrow(result, "index", 𝔽(startIndex)). JSHandle indexKey = globalConst->GetHandledIndexString(); JSObject::CreateDataPropertyOrThrow(thread, result, indexKey, JSHandle(thread, base::BuiltinsBase::GetTaggedInt(startIndex))); // 9. Perform ! CreateDataPropertyOrThrow(result, "input", string). JSHandle inputKey = globalConst->GetHandledInputString(); JSObject::CreateDataPropertyOrThrow(thread, result, inputKey, JSHandle::Cast(inputString)); // 10. Let granularity be segmenter.[[SegmenterGranularity]]. // 11. If granularity is "word", then // a. Let isWordLike be a Boolean value indicating whether the segment in string is "word-like" // according to locale segmenter.[[Locale]]. // b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike). if (granularity == GranularityOption::WORD) { bool isWordLike = CurrentSegmentIsWordLike(breakIterator); JSHandle isWordLikeKey = globalConst->GetHandledIsWordLikeString(); JSObject::CreateDataPropertyOrThrow(thread, result, isWordLikeKey, JSHandle(thread, base::BuiltinsBase::GetTaggedBoolean(isWordLike))); } // 12. Return result. return result; } } // namespace panda::ecmascript