14514f5e3Sopenharmony_ci/* 24514f5e3Sopenharmony_ci * Copyright (c) 2023 Shenzhen Kaihong Digital Industry Development Co., Ltd. 34514f5e3Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 44514f5e3Sopenharmony_ci * you may not use this file except in compliance with the License. 54514f5e3Sopenharmony_ci * You may obtain a copy of the License at 64514f5e3Sopenharmony_ci * 74514f5e3Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 84514f5e3Sopenharmony_ci * 94514f5e3Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software 104514f5e3Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS, 114514f5e3Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 124514f5e3Sopenharmony_ci * See the License for the specific language governing permissions and 134514f5e3Sopenharmony_ci * limitations under the License. 144514f5e3Sopenharmony_ci */ 154514f5e3Sopenharmony_ci 164514f5e3Sopenharmony_ci#include "ecmascript/js_segments.h" 174514f5e3Sopenharmony_ci 184514f5e3Sopenharmony_ci#include <cstring> 194514f5e3Sopenharmony_ci 204514f5e3Sopenharmony_ci#include "ecmascript/base/builtins_base.h" 214514f5e3Sopenharmony_ci#include "ecmascript/intl/locale_helper.h" 224514f5e3Sopenharmony_ci#include "ecmascript/object_factory-inl.h" 234514f5e3Sopenharmony_ci 244514f5e3Sopenharmony_cinamespace panda::ecmascript { 254514f5e3Sopenharmony_ci 264514f5e3Sopenharmony_civoid JSSegments::SetIcuBreakIterator(JSThread *thread, const JSHandle<JSSegments> &segments, 274514f5e3Sopenharmony_ci icu::BreakIterator* icuBreakIterator, const NativePointerCallback &callback) 284514f5e3Sopenharmony_ci{ 294514f5e3Sopenharmony_ci EcmaVM *ecmaVm = thread->GetEcmaVM(); 304514f5e3Sopenharmony_ci ObjectFactory *factory = ecmaVm->GetFactory(); 314514f5e3Sopenharmony_ci 324514f5e3Sopenharmony_ci ASSERT(icuBreakIterator != nullptr); 334514f5e3Sopenharmony_ci JSTaggedValue data = segments->GetIcuField(); 344514f5e3Sopenharmony_ci if (data.IsJSNativePointer()) { 354514f5e3Sopenharmony_ci JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject()); 364514f5e3Sopenharmony_ci native->ResetExternalPointer(thread, icuBreakIterator); 374514f5e3Sopenharmony_ci return; 384514f5e3Sopenharmony_ci } 394514f5e3Sopenharmony_ci JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuBreakIterator, callback); 404514f5e3Sopenharmony_ci segments->SetIcuField(thread, pointer.GetTaggedValue()); 414514f5e3Sopenharmony_ci} 424514f5e3Sopenharmony_ci 434514f5e3Sopenharmony_civoid JSSegments::SetUString(JSThread *thread, const JSHandle<JSSegments> &segments, 444514f5e3Sopenharmony_ci icu::UnicodeString* icuUnicodeString, const NativePointerCallback &callback) 454514f5e3Sopenharmony_ci{ 464514f5e3Sopenharmony_ci EcmaVM *ecmaVm = thread->GetEcmaVM(); 474514f5e3Sopenharmony_ci ObjectFactory *factory = ecmaVm->GetFactory(); 484514f5e3Sopenharmony_ci 494514f5e3Sopenharmony_ci ASSERT(icuUnicodeString != nullptr); 504514f5e3Sopenharmony_ci JSTaggedValue data = segments->GetUnicodeString(); 514514f5e3Sopenharmony_ci if (data.IsJSNativePointer()) { 524514f5e3Sopenharmony_ci JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject()); 534514f5e3Sopenharmony_ci native->ResetExternalPointer(thread, icuUnicodeString); 544514f5e3Sopenharmony_ci return; 554514f5e3Sopenharmony_ci } 564514f5e3Sopenharmony_ci JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuUnicodeString, callback); 574514f5e3Sopenharmony_ci segments->SetUnicodeString(thread, pointer.GetTaggedValue()); 584514f5e3Sopenharmony_ci} 594514f5e3Sopenharmony_ci 604514f5e3Sopenharmony_civoid SetTextToBreakIterator(JSThread *thread, const JSHandle<JSSegments> &segments, 614514f5e3Sopenharmony_ci JSHandle<EcmaString> text, icu::BreakIterator* breakIterator) 624514f5e3Sopenharmony_ci{ 634514f5e3Sopenharmony_ci std::u16string u16str = EcmaStringAccessor(text).ToU16String(); 644514f5e3Sopenharmony_ci icu::UnicodeString src(u16str.data(), u16str.size()); 654514f5e3Sopenharmony_ci icu::UnicodeString* uText = static_cast<icu::UnicodeString*>(src.clone()); 664514f5e3Sopenharmony_ci breakIterator->setText(*uText); 674514f5e3Sopenharmony_ci JSSegments::SetUString(thread, segments, uText, JSSegments::FreeUString); 684514f5e3Sopenharmony_ci} 694514f5e3Sopenharmony_ci 704514f5e3Sopenharmony_ciJSHandle<JSSegments> JSSegments::CreateSegmentsObject(JSThread *thread, 714514f5e3Sopenharmony_ci const JSHandle<JSSegmenter> &segmenter, 724514f5e3Sopenharmony_ci const JSHandle<EcmaString> &string) 734514f5e3Sopenharmony_ci{ 744514f5e3Sopenharmony_ci ObjectFactory *factory = thread->GetEcmaVM()->GetFactory(); 754514f5e3Sopenharmony_ci // 1. Let internalSlotsList be « [[SegmentsSegmenter]], [[SegmentsString]] ». 764514f5e3Sopenharmony_ci // 2. Let segments be OrdinaryObjectCreate(%SegmentsPrototype%, internalSlotsList). 774514f5e3Sopenharmony_ci JSHandle<GlobalEnv> env = thread->GetEcmaVM()->GetGlobalEnv(); 784514f5e3Sopenharmony_ci JSHandle<JSFunction> segmentsCtor(env->GetSegmentsFunction()); 794514f5e3Sopenharmony_ci JSHandle<JSSegments> segments(factory->NewJSObjectByConstructor(segmentsCtor)); 804514f5e3Sopenharmony_ci // 3. Set segments.[[SegmentsSegmenter]] to segmenter. 814514f5e3Sopenharmony_ci icu::BreakIterator* icuBreakIterator = segmenter->GetIcuBreakIterator()->clone(); 824514f5e3Sopenharmony_ci SetIcuBreakIterator(thread, segments, icuBreakIterator, JSSegments::FreeIcuBreakIterator); 834514f5e3Sopenharmony_ci segments->SetGranularity(segmenter->GetGranularity()); 844514f5e3Sopenharmony_ci // 4. Set segments.[[SegmentsString]] to string. 854514f5e3Sopenharmony_ci segments->SetSegmentsString(thread, string); 864514f5e3Sopenharmony_ci SetTextToBreakIterator(thread, segments, string, icuBreakIterator); 874514f5e3Sopenharmony_ci return segments; 884514f5e3Sopenharmony_ci} 894514f5e3Sopenharmony_ci 904514f5e3Sopenharmony_ciJSTaggedValue JSSegments::Containing(JSThread *thread, const JSHandle<JSSegments> &segments, double index) 914514f5e3Sopenharmony_ci{ 924514f5e3Sopenharmony_ci icu::UnicodeString* unicodeString = segments->GetUString(); 934514f5e3Sopenharmony_ci // 5. Let len be the length of string. 944514f5e3Sopenharmony_ci int32_t len = unicodeString->length(); 954514f5e3Sopenharmony_ci // 7. If n < 0 or n ≥ len, return undefined. 964514f5e3Sopenharmony_ci if (index < 0 || index >= len) { 974514f5e3Sopenharmony_ci return JSTaggedValue::Undefined(); 984514f5e3Sopenharmony_ci } 994514f5e3Sopenharmony_ci int32_t n = static_cast<int32_t>(index); 1004514f5e3Sopenharmony_ci // n may point to the surrogate tail- adjust it back to the lead. 1014514f5e3Sopenharmony_ci n = unicodeString->getChar32Start(n); 1024514f5e3Sopenharmony_ci icu::BreakIterator* breakIterator = segments->GetIcuBreakIterator(); 1034514f5e3Sopenharmony_ci // 8. Let startIndex be ! FindBoundary(segmenter, string, n, before). 1044514f5e3Sopenharmony_ci int32_t startIndex = breakIterator->isBoundary(n) ? n : breakIterator->preceding(n); 1054514f5e3Sopenharmony_ci // 9. Let endIndex be ! FindBoundary(segmenter, string, n, after). 1064514f5e3Sopenharmony_ci int32_t endIndex = breakIterator->following(n); 1074514f5e3Sopenharmony_ci // 10. Return ! CreateSegmentDataObject(segmenter, string, startIndex, endIndex). 1084514f5e3Sopenharmony_ci return CreateSegmentDataObject(thread, segments->GetGranularity(), breakIterator, 1094514f5e3Sopenharmony_ci JSHandle<EcmaString>(thread, segments->GetSegmentsString()), 1104514f5e3Sopenharmony_ci *unicodeString, startIndex, endIndex).GetTaggedValue(); 1114514f5e3Sopenharmony_ci} 1124514f5e3Sopenharmony_ci 1134514f5e3Sopenharmony_cibool CurrentSegmentIsWordLike(icu::BreakIterator* breakIterator) 1144514f5e3Sopenharmony_ci{ 1154514f5e3Sopenharmony_ci int32_t rule_status = breakIterator->getRuleStatus(); 1164514f5e3Sopenharmony_ci return (rule_status >= UBRK_WORD_NUMBER && 1174514f5e3Sopenharmony_ci rule_status < UBRK_WORD_NUMBER_LIMIT) || 1184514f5e3Sopenharmony_ci (rule_status >= UBRK_WORD_LETTER && 1194514f5e3Sopenharmony_ci rule_status < UBRK_WORD_LETTER_LIMIT) || 1204514f5e3Sopenharmony_ci (rule_status >= UBRK_WORD_KANA && 1214514f5e3Sopenharmony_ci rule_status < UBRK_WORD_KANA_LIMIT) || 1224514f5e3Sopenharmony_ci (rule_status >= UBRK_WORD_IDEO && rule_status < UBRK_WORD_IDEO_LIMIT); 1234514f5e3Sopenharmony_ci} 1244514f5e3Sopenharmony_ci 1254514f5e3Sopenharmony_ci// 18.7.1 CreateSegmentDataObject ( segmenter, string, startIndex, endIndex ) 1264514f5e3Sopenharmony_ciJSHandle<JSObject> JSSegments::CreateSegmentDataObject(JSThread *thread, GranularityOption granularity, 1274514f5e3Sopenharmony_ci icu::BreakIterator* breakIterator, const JSHandle<EcmaString> &inputString, 1284514f5e3Sopenharmony_ci const icu::UnicodeString& unicodeString, int32_t startIndex, int32_t endIndex) 1294514f5e3Sopenharmony_ci{ 1304514f5e3Sopenharmony_ci // 1. Let len be the length of string. 1314514f5e3Sopenharmony_ci // 2. Assert: startIndex ≥ 0. 1324514f5e3Sopenharmony_ci ASSERT(startIndex >= 0); 1334514f5e3Sopenharmony_ci // 3. Assert: endIndex ≤ len. 1344514f5e3Sopenharmony_ci ASSERT(endIndex <= unicodeString.length()); 1354514f5e3Sopenharmony_ci // 4. Assert: startIndex < endIndex. 1364514f5e3Sopenharmony_ci ASSERT(startIndex < endIndex); 1374514f5e3Sopenharmony_ci // 5. Let result be OrdinaryObjectCreate(%Object.prototype%). 1384514f5e3Sopenharmony_ci auto ecmaVm = thread->GetEcmaVM(); 1394514f5e3Sopenharmony_ci JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv(); 1404514f5e3Sopenharmony_ci ObjectFactory *factory = ecmaVm->GetFactory(); 1414514f5e3Sopenharmony_ci JSHandle<JSFunction> ctor(env->GetObjectFunction()); 1424514f5e3Sopenharmony_ci JSHandle<JSObject> result(factory->NewJSObjectByConstructor(ctor)); 1434514f5e3Sopenharmony_ci // 6. Let segment be the substring of string from startIndex to endIndex. 1444514f5e3Sopenharmony_ci JSHandle<EcmaString> segment = 1454514f5e3Sopenharmony_ci intl::LocaleHelper::UStringToString(thread, unicodeString, startIndex, endIndex); 1464514f5e3Sopenharmony_ci auto globalConst = thread->GlobalConstants(); 1474514f5e3Sopenharmony_ci // 7. Perform ! CreateDataPropertyOrThrow(result, "segment", segment). 1484514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> segmentKey = globalConst->GetHandledSegmentString(); 1494514f5e3Sopenharmony_ci JSObject::CreateDataPropertyOrThrow(thread, result, segmentKey, JSHandle<JSTaggedValue>::Cast(segment)); 1504514f5e3Sopenharmony_ci // 8. Perform ! CreateDataPropertyOrThrow(result, "index", (startIndex)). 1514514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> indexKey = globalConst->GetHandledIndexString(); 1524514f5e3Sopenharmony_ci JSObject::CreateDataPropertyOrThrow(thread, result, indexKey, JSHandle<JSTaggedValue>(thread, 1534514f5e3Sopenharmony_ci base::BuiltinsBase::GetTaggedInt(startIndex))); 1544514f5e3Sopenharmony_ci // 9. Perform ! CreateDataPropertyOrThrow(result, "input", string). 1554514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> inputKey = globalConst->GetHandledInputString(); 1564514f5e3Sopenharmony_ci JSObject::CreateDataPropertyOrThrow(thread, result, inputKey, JSHandle<JSTaggedValue>::Cast(inputString)); 1574514f5e3Sopenharmony_ci // 10. Let granularity be segmenter.[[SegmenterGranularity]]. 1584514f5e3Sopenharmony_ci // 11. If granularity is "word", then 1594514f5e3Sopenharmony_ci // a. Let isWordLike be a Boolean value indicating whether the segment in string is "word-like" 1604514f5e3Sopenharmony_ci // according to locale segmenter.[[Locale]]. 1614514f5e3Sopenharmony_ci // b. Perform ! CreateDataPropertyOrThrow(result, "isWordLike", isWordLike). 1624514f5e3Sopenharmony_ci if (granularity == GranularityOption::WORD) { 1634514f5e3Sopenharmony_ci bool isWordLike = CurrentSegmentIsWordLike(breakIterator); 1644514f5e3Sopenharmony_ci JSHandle<JSTaggedValue> isWordLikeKey = globalConst->GetHandledIsWordLikeString(); 1654514f5e3Sopenharmony_ci JSObject::CreateDataPropertyOrThrow(thread, result, isWordLikeKey, JSHandle<JSTaggedValue>(thread, 1664514f5e3Sopenharmony_ci base::BuiltinsBase::GetTaggedBoolean(isWordLike))); 1674514f5e3Sopenharmony_ci } 1684514f5e3Sopenharmony_ci // 12. Return result. 1694514f5e3Sopenharmony_ci return result; 1704514f5e3Sopenharmony_ci} 1714514f5e3Sopenharmony_ci} // namespace panda::ecmascript 172