11cb0ef41Sopenharmony_ci// Copyright Joyent, Inc. and other Node contributors. 21cb0ef41Sopenharmony_ci// 31cb0ef41Sopenharmony_ci// Permission is hereby granted, free of charge, to any person obtaining a 41cb0ef41Sopenharmony_ci// copy of this software and associated documentation files (the 51cb0ef41Sopenharmony_ci// "Software"), to deal in the Software without restriction, including 61cb0ef41Sopenharmony_ci// without limitation the rights to use, copy, modify, merge, publish, 71cb0ef41Sopenharmony_ci// distribute, sublicense, and/or sell copies of the Software, and to permit 81cb0ef41Sopenharmony_ci// persons to whom the Software is furnished to do so, subject to the 91cb0ef41Sopenharmony_ci// following conditions: 101cb0ef41Sopenharmony_ci// 111cb0ef41Sopenharmony_ci// The above copyright notice and this permission notice shall be included 121cb0ef41Sopenharmony_ci// in all copies or substantial portions of the Software. 131cb0ef41Sopenharmony_ci// 141cb0ef41Sopenharmony_ci// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 151cb0ef41Sopenharmony_ci// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 161cb0ef41Sopenharmony_ci// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 171cb0ef41Sopenharmony_ci// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 181cb0ef41Sopenharmony_ci// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 191cb0ef41Sopenharmony_ci// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 201cb0ef41Sopenharmony_ci// USE OR OTHER DEALINGS IN THE SOFTWARE. 211cb0ef41Sopenharmony_ci 221cb0ef41Sopenharmony_ci/* 231cb0ef41Sopenharmony_ci * notes: by srl295 241cb0ef41Sopenharmony_ci * - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data 251cb0ef41Sopenharmony_ci * ( stubdata/libicudata.a ) containing nothing, no data, and it's also 261cb0ef41Sopenharmony_ci * linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT 271cb0ef41Sopenharmony_ci * macro names. That's the "english+root" data. 281cb0ef41Sopenharmony_ci * 291cb0ef41Sopenharmony_ci * If icu_data_path is non-null, the user has provided a path and we assume 301cb0ef41Sopenharmony_ci * it goes somewhere useful. We set that path in ICU, and exit. 311cb0ef41Sopenharmony_ci * If icu_data_path is null, they haven't set a path and we want the 321cb0ef41Sopenharmony_ci * "english+root" data. We call 331cb0ef41Sopenharmony_ci * udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...) 341cb0ef41Sopenharmony_ci * to load up the english+root data. 351cb0ef41Sopenharmony_ci * 361cb0ef41Sopenharmony_ci * - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full 371cb0ef41Sopenharmony_ci * data. All of the variables and command line options for changing data at 381cb0ef41Sopenharmony_ci * runtime are disabled, as they wouldn't fully override the internal data. 391cb0ef41Sopenharmony_ci * See: http://bugs.icu-project.org/trac/ticket/10924 401cb0ef41Sopenharmony_ci */ 411cb0ef41Sopenharmony_ci 421cb0ef41Sopenharmony_ci 431cb0ef41Sopenharmony_ci#include "node_i18n.h" 441cb0ef41Sopenharmony_ci#include "node_external_reference.h" 451cb0ef41Sopenharmony_ci 461cb0ef41Sopenharmony_ci#if defined(NODE_HAVE_I18N_SUPPORT) 471cb0ef41Sopenharmony_ci 481cb0ef41Sopenharmony_ci#include "base_object-inl.h" 491cb0ef41Sopenharmony_ci#include "node.h" 501cb0ef41Sopenharmony_ci#include "node_buffer.h" 511cb0ef41Sopenharmony_ci#include "node_errors.h" 521cb0ef41Sopenharmony_ci#include "node_internals.h" 531cb0ef41Sopenharmony_ci#include "string_bytes.h" 541cb0ef41Sopenharmony_ci#include "util-inl.h" 551cb0ef41Sopenharmony_ci#include "v8.h" 561cb0ef41Sopenharmony_ci 571cb0ef41Sopenharmony_ci#include <unicode/utypes.h> 581cb0ef41Sopenharmony_ci#include <unicode/putil.h> 591cb0ef41Sopenharmony_ci#include <unicode/uchar.h> 601cb0ef41Sopenharmony_ci#include <unicode/uclean.h> 611cb0ef41Sopenharmony_ci#include <unicode/udata.h> 621cb0ef41Sopenharmony_ci#include <unicode/uidna.h> 631cb0ef41Sopenharmony_ci#include <unicode/ucnv.h> 641cb0ef41Sopenharmony_ci#include <unicode/utf8.h> 651cb0ef41Sopenharmony_ci#include <unicode/utf16.h> 661cb0ef41Sopenharmony_ci#include <unicode/timezone.h> 671cb0ef41Sopenharmony_ci#include <unicode/ulocdata.h> 681cb0ef41Sopenharmony_ci#include <unicode/uvernum.h> 691cb0ef41Sopenharmony_ci#include <unicode/uversion.h> 701cb0ef41Sopenharmony_ci#include <unicode/ustring.h> 711cb0ef41Sopenharmony_ci 721cb0ef41Sopenharmony_ci#ifdef NODE_HAVE_SMALL_ICU 731cb0ef41Sopenharmony_ci/* if this is defined, we have a 'secondary' entry point. 741cb0ef41Sopenharmony_ci compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */ 751cb0ef41Sopenharmony_ci#define SMALL_ICUDATA_ENTRY_POINT \ 761cb0ef41Sopenharmony_ci SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME) 771cb0ef41Sopenharmony_ci#define SMALL_DEF2(major, suff) SMALL_DEF(major, suff) 781cb0ef41Sopenharmony_ci#ifndef U_LIB_SUFFIX_C_NAME 791cb0ef41Sopenharmony_ci#define SMALL_DEF(major, suff) icusmdt##major##_dat 801cb0ef41Sopenharmony_ci#else 811cb0ef41Sopenharmony_ci#define SMALL_DEF(major, suff) icusmdt##suff##major##_dat 821cb0ef41Sopenharmony_ci#endif 831cb0ef41Sopenharmony_ci 841cb0ef41Sopenharmony_ciextern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[]; 851cb0ef41Sopenharmony_ci#endif 861cb0ef41Sopenharmony_ci 871cb0ef41Sopenharmony_cinamespace node { 881cb0ef41Sopenharmony_ci 891cb0ef41Sopenharmony_ciusing v8::Context; 901cb0ef41Sopenharmony_ciusing v8::FunctionCallbackInfo; 911cb0ef41Sopenharmony_ciusing v8::FunctionTemplate; 921cb0ef41Sopenharmony_ciusing v8::Int32; 931cb0ef41Sopenharmony_ciusing v8::Isolate; 941cb0ef41Sopenharmony_ciusing v8::Local; 951cb0ef41Sopenharmony_ciusing v8::MaybeLocal; 961cb0ef41Sopenharmony_ciusing v8::NewStringType; 971cb0ef41Sopenharmony_ciusing v8::Object; 981cb0ef41Sopenharmony_ciusing v8::ObjectTemplate; 991cb0ef41Sopenharmony_ciusing v8::String; 1001cb0ef41Sopenharmony_ciusing v8::Value; 1011cb0ef41Sopenharmony_ci 1021cb0ef41Sopenharmony_cinamespace i18n { 1031cb0ef41Sopenharmony_cinamespace { 1041cb0ef41Sopenharmony_ci 1051cb0ef41Sopenharmony_citemplate <typename T> 1061cb0ef41Sopenharmony_ciMaybeLocal<Object> ToBufferEndian(Environment* env, MaybeStackBuffer<T>* buf) { 1071cb0ef41Sopenharmony_ci MaybeLocal<Object> ret = Buffer::New(env, buf); 1081cb0ef41Sopenharmony_ci if (ret.IsEmpty()) 1091cb0ef41Sopenharmony_ci return ret; 1101cb0ef41Sopenharmony_ci 1111cb0ef41Sopenharmony_ci static_assert(sizeof(T) == 1 || sizeof(T) == 2, 1121cb0ef41Sopenharmony_ci "Currently only one- or two-byte buffers are supported"); 1131cb0ef41Sopenharmony_ci if (sizeof(T) > 1 && IsBigEndian()) { 1141cb0ef41Sopenharmony_ci SPREAD_BUFFER_ARG(ret.ToLocalChecked(), retbuf); 1151cb0ef41Sopenharmony_ci SwapBytes16(retbuf_data, retbuf_length); 1161cb0ef41Sopenharmony_ci } 1171cb0ef41Sopenharmony_ci 1181cb0ef41Sopenharmony_ci return ret; 1191cb0ef41Sopenharmony_ci} 1201cb0ef41Sopenharmony_ci 1211cb0ef41Sopenharmony_ci// One-Shot Converters 1221cb0ef41Sopenharmony_ci 1231cb0ef41Sopenharmony_civoid CopySourceBuffer(MaybeStackBuffer<UChar>* dest, 1241cb0ef41Sopenharmony_ci const char* data, 1251cb0ef41Sopenharmony_ci const size_t length, 1261cb0ef41Sopenharmony_ci const size_t length_in_chars) { 1271cb0ef41Sopenharmony_ci dest->AllocateSufficientStorage(length_in_chars); 1281cb0ef41Sopenharmony_ci char* dst = reinterpret_cast<char*>(**dest); 1291cb0ef41Sopenharmony_ci memcpy(dst, data, length); 1301cb0ef41Sopenharmony_ci if (IsBigEndian()) { 1311cb0ef41Sopenharmony_ci SwapBytes16(dst, length); 1321cb0ef41Sopenharmony_ci } 1331cb0ef41Sopenharmony_ci} 1341cb0ef41Sopenharmony_ci 1351cb0ef41Sopenharmony_citypedef MaybeLocal<Object> (*TranscodeFunc)(Environment* env, 1361cb0ef41Sopenharmony_ci const char* fromEncoding, 1371cb0ef41Sopenharmony_ci const char* toEncoding, 1381cb0ef41Sopenharmony_ci const char* source, 1391cb0ef41Sopenharmony_ci const size_t source_length, 1401cb0ef41Sopenharmony_ci UErrorCode* status); 1411cb0ef41Sopenharmony_ci 1421cb0ef41Sopenharmony_ciMaybeLocal<Object> Transcode(Environment* env, 1431cb0ef41Sopenharmony_ci const char* fromEncoding, 1441cb0ef41Sopenharmony_ci const char* toEncoding, 1451cb0ef41Sopenharmony_ci const char* source, 1461cb0ef41Sopenharmony_ci const size_t source_length, 1471cb0ef41Sopenharmony_ci UErrorCode* status) { 1481cb0ef41Sopenharmony_ci *status = U_ZERO_ERROR; 1491cb0ef41Sopenharmony_ci MaybeLocal<Object> ret; 1501cb0ef41Sopenharmony_ci MaybeStackBuffer<char> result; 1511cb0ef41Sopenharmony_ci Converter to(toEncoding); 1521cb0ef41Sopenharmony_ci Converter from(fromEncoding); 1531cb0ef41Sopenharmony_ci 1541cb0ef41Sopenharmony_ci size_t sublen = ucnv_getMinCharSize(to.conv()); 1551cb0ef41Sopenharmony_ci std::string sub(sublen, '?'); 1561cb0ef41Sopenharmony_ci to.set_subst_chars(sub.c_str()); 1571cb0ef41Sopenharmony_ci 1581cb0ef41Sopenharmony_ci const uint32_t limit = source_length * to.max_char_size(); 1591cb0ef41Sopenharmony_ci result.AllocateSufficientStorage(limit); 1601cb0ef41Sopenharmony_ci char* target = *result; 1611cb0ef41Sopenharmony_ci ucnv_convertEx(to.conv(), from.conv(), &target, target + limit, 1621cb0ef41Sopenharmony_ci &source, source + source_length, nullptr, nullptr, 1631cb0ef41Sopenharmony_ci nullptr, nullptr, true, true, status); 1641cb0ef41Sopenharmony_ci if (U_SUCCESS(*status)) { 1651cb0ef41Sopenharmony_ci result.SetLength(target - &result[0]); 1661cb0ef41Sopenharmony_ci ret = ToBufferEndian(env, &result); 1671cb0ef41Sopenharmony_ci } 1681cb0ef41Sopenharmony_ci return ret; 1691cb0ef41Sopenharmony_ci} 1701cb0ef41Sopenharmony_ci 1711cb0ef41Sopenharmony_ciMaybeLocal<Object> TranscodeToUcs2(Environment* env, 1721cb0ef41Sopenharmony_ci const char* fromEncoding, 1731cb0ef41Sopenharmony_ci const char* toEncoding, 1741cb0ef41Sopenharmony_ci const char* source, 1751cb0ef41Sopenharmony_ci const size_t source_length, 1761cb0ef41Sopenharmony_ci UErrorCode* status) { 1771cb0ef41Sopenharmony_ci *status = U_ZERO_ERROR; 1781cb0ef41Sopenharmony_ci MaybeLocal<Object> ret; 1791cb0ef41Sopenharmony_ci MaybeStackBuffer<UChar> destbuf(source_length); 1801cb0ef41Sopenharmony_ci Converter from(fromEncoding); 1811cb0ef41Sopenharmony_ci const size_t length_in_chars = source_length * sizeof(UChar); 1821cb0ef41Sopenharmony_ci ucnv_toUChars(from.conv(), *destbuf, length_in_chars, 1831cb0ef41Sopenharmony_ci source, source_length, status); 1841cb0ef41Sopenharmony_ci if (U_SUCCESS(*status)) 1851cb0ef41Sopenharmony_ci ret = ToBufferEndian(env, &destbuf); 1861cb0ef41Sopenharmony_ci return ret; 1871cb0ef41Sopenharmony_ci} 1881cb0ef41Sopenharmony_ci 1891cb0ef41Sopenharmony_ciMaybeLocal<Object> TranscodeFromUcs2(Environment* env, 1901cb0ef41Sopenharmony_ci const char* fromEncoding, 1911cb0ef41Sopenharmony_ci const char* toEncoding, 1921cb0ef41Sopenharmony_ci const char* source, 1931cb0ef41Sopenharmony_ci const size_t source_length, 1941cb0ef41Sopenharmony_ci UErrorCode* status) { 1951cb0ef41Sopenharmony_ci *status = U_ZERO_ERROR; 1961cb0ef41Sopenharmony_ci MaybeStackBuffer<UChar> sourcebuf; 1971cb0ef41Sopenharmony_ci MaybeLocal<Object> ret; 1981cb0ef41Sopenharmony_ci Converter to(toEncoding); 1991cb0ef41Sopenharmony_ci 2001cb0ef41Sopenharmony_ci size_t sublen = ucnv_getMinCharSize(to.conv()); 2011cb0ef41Sopenharmony_ci std::string sub(sublen, '?'); 2021cb0ef41Sopenharmony_ci to.set_subst_chars(sub.c_str()); 2031cb0ef41Sopenharmony_ci 2041cb0ef41Sopenharmony_ci const size_t length_in_chars = source_length / sizeof(UChar); 2051cb0ef41Sopenharmony_ci CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars); 2061cb0ef41Sopenharmony_ci MaybeStackBuffer<char> destbuf(length_in_chars); 2071cb0ef41Sopenharmony_ci const uint32_t len = ucnv_fromUChars(to.conv(), *destbuf, length_in_chars, 2081cb0ef41Sopenharmony_ci *sourcebuf, length_in_chars, status); 2091cb0ef41Sopenharmony_ci if (U_SUCCESS(*status)) { 2101cb0ef41Sopenharmony_ci destbuf.SetLength(len); 2111cb0ef41Sopenharmony_ci ret = ToBufferEndian(env, &destbuf); 2121cb0ef41Sopenharmony_ci } 2131cb0ef41Sopenharmony_ci return ret; 2141cb0ef41Sopenharmony_ci} 2151cb0ef41Sopenharmony_ci 2161cb0ef41Sopenharmony_ciMaybeLocal<Object> TranscodeUcs2FromUtf8(Environment* env, 2171cb0ef41Sopenharmony_ci const char* fromEncoding, 2181cb0ef41Sopenharmony_ci const char* toEncoding, 2191cb0ef41Sopenharmony_ci const char* source, 2201cb0ef41Sopenharmony_ci const size_t source_length, 2211cb0ef41Sopenharmony_ci UErrorCode* status) { 2221cb0ef41Sopenharmony_ci *status = U_ZERO_ERROR; 2231cb0ef41Sopenharmony_ci MaybeStackBuffer<UChar> destbuf; 2241cb0ef41Sopenharmony_ci int32_t result_length; 2251cb0ef41Sopenharmony_ci u_strFromUTF8(*destbuf, destbuf.capacity(), &result_length, 2261cb0ef41Sopenharmony_ci source, source_length, status); 2271cb0ef41Sopenharmony_ci MaybeLocal<Object> ret; 2281cb0ef41Sopenharmony_ci if (U_SUCCESS(*status)) { 2291cb0ef41Sopenharmony_ci destbuf.SetLength(result_length); 2301cb0ef41Sopenharmony_ci ret = ToBufferEndian(env, &destbuf); 2311cb0ef41Sopenharmony_ci } else if (*status == U_BUFFER_OVERFLOW_ERROR) { 2321cb0ef41Sopenharmony_ci *status = U_ZERO_ERROR; 2331cb0ef41Sopenharmony_ci destbuf.AllocateSufficientStorage(result_length); 2341cb0ef41Sopenharmony_ci u_strFromUTF8(*destbuf, result_length, &result_length, 2351cb0ef41Sopenharmony_ci source, source_length, status); 2361cb0ef41Sopenharmony_ci if (U_SUCCESS(*status)) { 2371cb0ef41Sopenharmony_ci destbuf.SetLength(result_length); 2381cb0ef41Sopenharmony_ci ret = ToBufferEndian(env, &destbuf); 2391cb0ef41Sopenharmony_ci } 2401cb0ef41Sopenharmony_ci } 2411cb0ef41Sopenharmony_ci return ret; 2421cb0ef41Sopenharmony_ci} 2431cb0ef41Sopenharmony_ci 2441cb0ef41Sopenharmony_ciMaybeLocal<Object> TranscodeUtf8FromUcs2(Environment* env, 2451cb0ef41Sopenharmony_ci const char* fromEncoding, 2461cb0ef41Sopenharmony_ci const char* toEncoding, 2471cb0ef41Sopenharmony_ci const char* source, 2481cb0ef41Sopenharmony_ci const size_t source_length, 2491cb0ef41Sopenharmony_ci UErrorCode* status) { 2501cb0ef41Sopenharmony_ci *status = U_ZERO_ERROR; 2511cb0ef41Sopenharmony_ci MaybeLocal<Object> ret; 2521cb0ef41Sopenharmony_ci const size_t length_in_chars = source_length / sizeof(UChar); 2531cb0ef41Sopenharmony_ci int32_t result_length; 2541cb0ef41Sopenharmony_ci MaybeStackBuffer<UChar> sourcebuf; 2551cb0ef41Sopenharmony_ci MaybeStackBuffer<char> destbuf; 2561cb0ef41Sopenharmony_ci CopySourceBuffer(&sourcebuf, source, source_length, length_in_chars); 2571cb0ef41Sopenharmony_ci u_strToUTF8(*destbuf, destbuf.capacity(), &result_length, 2581cb0ef41Sopenharmony_ci *sourcebuf, length_in_chars, status); 2591cb0ef41Sopenharmony_ci if (U_SUCCESS(*status)) { 2601cb0ef41Sopenharmony_ci destbuf.SetLength(result_length); 2611cb0ef41Sopenharmony_ci ret = ToBufferEndian(env, &destbuf); 2621cb0ef41Sopenharmony_ci } else if (*status == U_BUFFER_OVERFLOW_ERROR) { 2631cb0ef41Sopenharmony_ci *status = U_ZERO_ERROR; 2641cb0ef41Sopenharmony_ci destbuf.AllocateSufficientStorage(result_length); 2651cb0ef41Sopenharmony_ci u_strToUTF8(*destbuf, result_length, &result_length, *sourcebuf, 2661cb0ef41Sopenharmony_ci length_in_chars, status); 2671cb0ef41Sopenharmony_ci if (U_SUCCESS(*status)) { 2681cb0ef41Sopenharmony_ci destbuf.SetLength(result_length); 2691cb0ef41Sopenharmony_ci ret = ToBufferEndian(env, &destbuf); 2701cb0ef41Sopenharmony_ci } 2711cb0ef41Sopenharmony_ci } 2721cb0ef41Sopenharmony_ci return ret; 2731cb0ef41Sopenharmony_ci} 2741cb0ef41Sopenharmony_ci 2751cb0ef41Sopenharmony_ciconst char* EncodingName(const enum encoding encoding) { 2761cb0ef41Sopenharmony_ci switch (encoding) { 2771cb0ef41Sopenharmony_ci case ASCII: return "us-ascii"; 2781cb0ef41Sopenharmony_ci case LATIN1: return "iso8859-1"; 2791cb0ef41Sopenharmony_ci case UCS2: return "utf16le"; 2801cb0ef41Sopenharmony_ci case UTF8: return "utf-8"; 2811cb0ef41Sopenharmony_ci default: return nullptr; 2821cb0ef41Sopenharmony_ci } 2831cb0ef41Sopenharmony_ci} 2841cb0ef41Sopenharmony_ci 2851cb0ef41Sopenharmony_cibool SupportedEncoding(const enum encoding encoding) { 2861cb0ef41Sopenharmony_ci switch (encoding) { 2871cb0ef41Sopenharmony_ci case ASCII: 2881cb0ef41Sopenharmony_ci case LATIN1: 2891cb0ef41Sopenharmony_ci case UCS2: 2901cb0ef41Sopenharmony_ci case UTF8: return true; 2911cb0ef41Sopenharmony_ci default: return false; 2921cb0ef41Sopenharmony_ci } 2931cb0ef41Sopenharmony_ci} 2941cb0ef41Sopenharmony_ci 2951cb0ef41Sopenharmony_civoid Transcode(const FunctionCallbackInfo<Value>&args) { 2961cb0ef41Sopenharmony_ci Environment* env = Environment::GetCurrent(args); 2971cb0ef41Sopenharmony_ci Isolate* isolate = env->isolate(); 2981cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 2991cb0ef41Sopenharmony_ci MaybeLocal<Object> result; 3001cb0ef41Sopenharmony_ci 3011cb0ef41Sopenharmony_ci ArrayBufferViewContents<char> input(args[0]); 3021cb0ef41Sopenharmony_ci const enum encoding fromEncoding = ParseEncoding(isolate, args[1], BUFFER); 3031cb0ef41Sopenharmony_ci const enum encoding toEncoding = ParseEncoding(isolate, args[2], BUFFER); 3041cb0ef41Sopenharmony_ci 3051cb0ef41Sopenharmony_ci if (SupportedEncoding(fromEncoding) && SupportedEncoding(toEncoding)) { 3061cb0ef41Sopenharmony_ci TranscodeFunc tfn = &Transcode; 3071cb0ef41Sopenharmony_ci switch (fromEncoding) { 3081cb0ef41Sopenharmony_ci case ASCII: 3091cb0ef41Sopenharmony_ci case LATIN1: 3101cb0ef41Sopenharmony_ci if (toEncoding == UCS2) 3111cb0ef41Sopenharmony_ci tfn = &TranscodeToUcs2; 3121cb0ef41Sopenharmony_ci break; 3131cb0ef41Sopenharmony_ci case UTF8: 3141cb0ef41Sopenharmony_ci if (toEncoding == UCS2) 3151cb0ef41Sopenharmony_ci tfn = &TranscodeUcs2FromUtf8; 3161cb0ef41Sopenharmony_ci break; 3171cb0ef41Sopenharmony_ci case UCS2: 3181cb0ef41Sopenharmony_ci switch (toEncoding) { 3191cb0ef41Sopenharmony_ci case UCS2: 3201cb0ef41Sopenharmony_ci tfn = &Transcode; 3211cb0ef41Sopenharmony_ci break; 3221cb0ef41Sopenharmony_ci case UTF8: 3231cb0ef41Sopenharmony_ci tfn = &TranscodeUtf8FromUcs2; 3241cb0ef41Sopenharmony_ci break; 3251cb0ef41Sopenharmony_ci default: 3261cb0ef41Sopenharmony_ci tfn = &TranscodeFromUcs2; 3271cb0ef41Sopenharmony_ci } 3281cb0ef41Sopenharmony_ci break; 3291cb0ef41Sopenharmony_ci default: 3301cb0ef41Sopenharmony_ci // This should not happen because of the SupportedEncoding checks 3311cb0ef41Sopenharmony_ci ABORT(); 3321cb0ef41Sopenharmony_ci } 3331cb0ef41Sopenharmony_ci 3341cb0ef41Sopenharmony_ci result = tfn(env, EncodingName(fromEncoding), EncodingName(toEncoding), 3351cb0ef41Sopenharmony_ci input.data(), input.length(), &status); 3361cb0ef41Sopenharmony_ci } else { 3371cb0ef41Sopenharmony_ci status = U_ILLEGAL_ARGUMENT_ERROR; 3381cb0ef41Sopenharmony_ci } 3391cb0ef41Sopenharmony_ci 3401cb0ef41Sopenharmony_ci if (result.IsEmpty()) 3411cb0ef41Sopenharmony_ci return args.GetReturnValue().Set(status); 3421cb0ef41Sopenharmony_ci 3431cb0ef41Sopenharmony_ci return args.GetReturnValue().Set(result.ToLocalChecked()); 3441cb0ef41Sopenharmony_ci} 3451cb0ef41Sopenharmony_ci 3461cb0ef41Sopenharmony_civoid ICUErrorName(const FunctionCallbackInfo<Value>& args) { 3471cb0ef41Sopenharmony_ci Environment* env = Environment::GetCurrent(args); 3481cb0ef41Sopenharmony_ci CHECK(args[0]->IsInt32()); 3491cb0ef41Sopenharmony_ci UErrorCode status = static_cast<UErrorCode>(args[0].As<Int32>()->Value()); 3501cb0ef41Sopenharmony_ci args.GetReturnValue().Set( 3511cb0ef41Sopenharmony_ci String::NewFromUtf8(env->isolate(), 3521cb0ef41Sopenharmony_ci u_errorName(status)).ToLocalChecked()); 3531cb0ef41Sopenharmony_ci} 3541cb0ef41Sopenharmony_ci 3551cb0ef41Sopenharmony_ci} // anonymous namespace 3561cb0ef41Sopenharmony_ci 3571cb0ef41Sopenharmony_ciConverter::Converter(const char* name, const char* sub) { 3581cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 3591cb0ef41Sopenharmony_ci UConverter* conv = ucnv_open(name, &status); 3601cb0ef41Sopenharmony_ci CHECK(U_SUCCESS(status)); 3611cb0ef41Sopenharmony_ci conv_.reset(conv); 3621cb0ef41Sopenharmony_ci set_subst_chars(sub); 3631cb0ef41Sopenharmony_ci} 3641cb0ef41Sopenharmony_ci 3651cb0ef41Sopenharmony_ciConverter::Converter(UConverter* converter, const char* sub) 3661cb0ef41Sopenharmony_ci : conv_(converter) { 3671cb0ef41Sopenharmony_ci set_subst_chars(sub); 3681cb0ef41Sopenharmony_ci} 3691cb0ef41Sopenharmony_ci 3701cb0ef41Sopenharmony_civoid Converter::set_subst_chars(const char* sub) { 3711cb0ef41Sopenharmony_ci CHECK(conv_); 3721cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 3731cb0ef41Sopenharmony_ci if (sub != nullptr) { 3741cb0ef41Sopenharmony_ci ucnv_setSubstChars(conv_.get(), sub, strlen(sub), &status); 3751cb0ef41Sopenharmony_ci CHECK(U_SUCCESS(status)); 3761cb0ef41Sopenharmony_ci } 3771cb0ef41Sopenharmony_ci} 3781cb0ef41Sopenharmony_ci 3791cb0ef41Sopenharmony_civoid Converter::reset() { 3801cb0ef41Sopenharmony_ci ucnv_reset(conv_.get()); 3811cb0ef41Sopenharmony_ci} 3821cb0ef41Sopenharmony_ci 3831cb0ef41Sopenharmony_cisize_t Converter::min_char_size() const { 3841cb0ef41Sopenharmony_ci CHECK(conv_); 3851cb0ef41Sopenharmony_ci return ucnv_getMinCharSize(conv_.get()); 3861cb0ef41Sopenharmony_ci} 3871cb0ef41Sopenharmony_ci 3881cb0ef41Sopenharmony_cisize_t Converter::max_char_size() const { 3891cb0ef41Sopenharmony_ci CHECK(conv_); 3901cb0ef41Sopenharmony_ci return ucnv_getMaxCharSize(conv_.get()); 3911cb0ef41Sopenharmony_ci} 3921cb0ef41Sopenharmony_ci 3931cb0ef41Sopenharmony_civoid ConverterObject::Has(const FunctionCallbackInfo<Value>& args) { 3941cb0ef41Sopenharmony_ci Environment* env = Environment::GetCurrent(args); 3951cb0ef41Sopenharmony_ci 3961cb0ef41Sopenharmony_ci CHECK_GE(args.Length(), 1); 3971cb0ef41Sopenharmony_ci Utf8Value label(env->isolate(), args[0]); 3981cb0ef41Sopenharmony_ci 3991cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 4001cb0ef41Sopenharmony_ci ConverterPointer conv(ucnv_open(*label, &status)); 4011cb0ef41Sopenharmony_ci args.GetReturnValue().Set(!!U_SUCCESS(status)); 4021cb0ef41Sopenharmony_ci} 4031cb0ef41Sopenharmony_ci 4041cb0ef41Sopenharmony_civoid ConverterObject::Create(const FunctionCallbackInfo<Value>& args) { 4051cb0ef41Sopenharmony_ci Environment* env = Environment::GetCurrent(args); 4061cb0ef41Sopenharmony_ci 4071cb0ef41Sopenharmony_ci Local<ObjectTemplate> t = env->i18n_converter_template(); 4081cb0ef41Sopenharmony_ci Local<Object> obj; 4091cb0ef41Sopenharmony_ci if (!t->NewInstance(env->context()).ToLocal(&obj)) return; 4101cb0ef41Sopenharmony_ci 4111cb0ef41Sopenharmony_ci CHECK_GE(args.Length(), 2); 4121cb0ef41Sopenharmony_ci Utf8Value label(env->isolate(), args[0]); 4131cb0ef41Sopenharmony_ci int flags = args[1]->Uint32Value(env->context()).ToChecked(); 4141cb0ef41Sopenharmony_ci bool fatal = 4151cb0ef41Sopenharmony_ci (flags & CONVERTER_FLAGS_FATAL) == CONVERTER_FLAGS_FATAL; 4161cb0ef41Sopenharmony_ci 4171cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 4181cb0ef41Sopenharmony_ci UConverter* conv = ucnv_open(*label, &status); 4191cb0ef41Sopenharmony_ci if (U_FAILURE(status)) 4201cb0ef41Sopenharmony_ci return; 4211cb0ef41Sopenharmony_ci 4221cb0ef41Sopenharmony_ci if (fatal) { 4231cb0ef41Sopenharmony_ci status = U_ZERO_ERROR; 4241cb0ef41Sopenharmony_ci ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP, 4251cb0ef41Sopenharmony_ci nullptr, nullptr, nullptr, &status); 4261cb0ef41Sopenharmony_ci } 4271cb0ef41Sopenharmony_ci 4281cb0ef41Sopenharmony_ci auto converter = new ConverterObject(env, obj, conv, flags); 4291cb0ef41Sopenharmony_ci size_t sublen = ucnv_getMinCharSize(conv); 4301cb0ef41Sopenharmony_ci std::string sub(sublen, '?'); 4311cb0ef41Sopenharmony_ci converter->set_subst_chars(sub.c_str()); 4321cb0ef41Sopenharmony_ci 4331cb0ef41Sopenharmony_ci args.GetReturnValue().Set(obj); 4341cb0ef41Sopenharmony_ci} 4351cb0ef41Sopenharmony_ci 4361cb0ef41Sopenharmony_civoid ConverterObject::Decode(const FunctionCallbackInfo<Value>& args) { 4371cb0ef41Sopenharmony_ci Environment* env = Environment::GetCurrent(args); 4381cb0ef41Sopenharmony_ci 4391cb0ef41Sopenharmony_ci CHECK_GE(args.Length(), 4); // Converter, Buffer, Flags, Encoding 4401cb0ef41Sopenharmony_ci 4411cb0ef41Sopenharmony_ci ConverterObject* converter; 4421cb0ef41Sopenharmony_ci ASSIGN_OR_RETURN_UNWRAP(&converter, args[0].As<Object>()); 4431cb0ef41Sopenharmony_ci 4441cb0ef41Sopenharmony_ci if (!(args[1]->IsArrayBuffer() || args[1]->IsSharedArrayBuffer() || 4451cb0ef41Sopenharmony_ci args[1]->IsArrayBufferView())) { 4461cb0ef41Sopenharmony_ci return node::THROW_ERR_INVALID_ARG_TYPE( 4471cb0ef41Sopenharmony_ci env->isolate(), 4481cb0ef41Sopenharmony_ci "The \"input\" argument must be an instance of SharedArrayBuffer, " 4491cb0ef41Sopenharmony_ci "ArrayBuffer or ArrayBufferView."); 4501cb0ef41Sopenharmony_ci } 4511cb0ef41Sopenharmony_ci 4521cb0ef41Sopenharmony_ci ArrayBufferViewContents<char> input(args[1]); 4531cb0ef41Sopenharmony_ci int flags = args[2]->Uint32Value(env->context()).ToChecked(); 4541cb0ef41Sopenharmony_ci 4551cb0ef41Sopenharmony_ci CHECK(args[3]->IsString()); 4561cb0ef41Sopenharmony_ci Local<String> from_encoding = args[3].As<String>(); 4571cb0ef41Sopenharmony_ci 4581cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 4591cb0ef41Sopenharmony_ci MaybeStackBuffer<UChar> result; 4601cb0ef41Sopenharmony_ci 4611cb0ef41Sopenharmony_ci UBool flush = (flags & CONVERTER_FLAGS_FLUSH) == CONVERTER_FLAGS_FLUSH; 4621cb0ef41Sopenharmony_ci 4631cb0ef41Sopenharmony_ci // When flushing the final chunk, the limit is the maximum 4641cb0ef41Sopenharmony_ci // of either the input buffer length or the number of pending 4651cb0ef41Sopenharmony_ci // characters times the min char size, multiplied by 2 as unicode may 4661cb0ef41Sopenharmony_ci // take up to 2 UChars to encode a character 4671cb0ef41Sopenharmony_ci size_t limit = 2 * converter->min_char_size() * 4681cb0ef41Sopenharmony_ci (!flush ? 4691cb0ef41Sopenharmony_ci input.length() : 4701cb0ef41Sopenharmony_ci std::max( 4711cb0ef41Sopenharmony_ci input.length(), 4721cb0ef41Sopenharmony_ci static_cast<size_t>( 4731cb0ef41Sopenharmony_ci ucnv_toUCountPending(converter->conv(), &status)))); 4741cb0ef41Sopenharmony_ci status = U_ZERO_ERROR; 4751cb0ef41Sopenharmony_ci 4761cb0ef41Sopenharmony_ci if (limit > 0) 4771cb0ef41Sopenharmony_ci result.AllocateSufficientStorage(limit); 4781cb0ef41Sopenharmony_ci 4791cb0ef41Sopenharmony_ci auto cleanup = OnScopeLeave([&]() { 4801cb0ef41Sopenharmony_ci if (flush) { 4811cb0ef41Sopenharmony_ci // Reset the converter state. 4821cb0ef41Sopenharmony_ci converter->set_bom_seen(false); 4831cb0ef41Sopenharmony_ci converter->reset(); 4841cb0ef41Sopenharmony_ci } 4851cb0ef41Sopenharmony_ci }); 4861cb0ef41Sopenharmony_ci 4871cb0ef41Sopenharmony_ci const char* source = input.data(); 4881cb0ef41Sopenharmony_ci size_t source_length = input.length(); 4891cb0ef41Sopenharmony_ci 4901cb0ef41Sopenharmony_ci UChar* target = *result; 4911cb0ef41Sopenharmony_ci ucnv_toUnicode(converter->conv(), 4921cb0ef41Sopenharmony_ci &target, 4931cb0ef41Sopenharmony_ci target + limit, 4941cb0ef41Sopenharmony_ci &source, 4951cb0ef41Sopenharmony_ci source + source_length, 4961cb0ef41Sopenharmony_ci nullptr, 4971cb0ef41Sopenharmony_ci flush, 4981cb0ef41Sopenharmony_ci &status); 4991cb0ef41Sopenharmony_ci 5001cb0ef41Sopenharmony_ci if (U_SUCCESS(status)) { 5011cb0ef41Sopenharmony_ci bool omit_initial_bom = false; 5021cb0ef41Sopenharmony_ci if (limit > 0) { 5031cb0ef41Sopenharmony_ci result.SetLength(target - &result[0]); 5041cb0ef41Sopenharmony_ci if (result.length() > 0 && 5051cb0ef41Sopenharmony_ci converter->unicode() && 5061cb0ef41Sopenharmony_ci !converter->ignore_bom() && 5071cb0ef41Sopenharmony_ci !converter->bom_seen()) { 5081cb0ef41Sopenharmony_ci // If the very first result in the stream is a BOM, and we are not 5091cb0ef41Sopenharmony_ci // explicitly told to ignore it, then we mark it for discarding. 5101cb0ef41Sopenharmony_ci if (result[0] == 0xFEFF) 5111cb0ef41Sopenharmony_ci omit_initial_bom = true; 5121cb0ef41Sopenharmony_ci converter->set_bom_seen(true); 5131cb0ef41Sopenharmony_ci } 5141cb0ef41Sopenharmony_ci } 5151cb0ef41Sopenharmony_ci 5161cb0ef41Sopenharmony_ci Local<Value> error; 5171cb0ef41Sopenharmony_ci UChar* output = result.out(); 5181cb0ef41Sopenharmony_ci size_t beginning = 0; 5191cb0ef41Sopenharmony_ci size_t length = result.length() * sizeof(UChar); 5201cb0ef41Sopenharmony_ci 5211cb0ef41Sopenharmony_ci if (omit_initial_bom) { 5221cb0ef41Sopenharmony_ci // Perform `ret = ret.slice(2)`. 5231cb0ef41Sopenharmony_ci beginning += 2; 5241cb0ef41Sopenharmony_ci length -= 2; 5251cb0ef41Sopenharmony_ci } 5261cb0ef41Sopenharmony_ci 5271cb0ef41Sopenharmony_ci char* value = reinterpret_cast<char*>(output) + beginning; 5281cb0ef41Sopenharmony_ci 5291cb0ef41Sopenharmony_ci if (IsBigEndian()) { 5301cb0ef41Sopenharmony_ci SwapBytes16(value, length); 5311cb0ef41Sopenharmony_ci } 5321cb0ef41Sopenharmony_ci 5331cb0ef41Sopenharmony_ci MaybeLocal<Value> encoded = 5341cb0ef41Sopenharmony_ci StringBytes::Encode(env->isolate(), value, length, UCS2, &error); 5351cb0ef41Sopenharmony_ci 5361cb0ef41Sopenharmony_ci Local<Value> ret; 5371cb0ef41Sopenharmony_ci if (encoded.ToLocal(&ret)) { 5381cb0ef41Sopenharmony_ci args.GetReturnValue().Set(ret); 5391cb0ef41Sopenharmony_ci return; 5401cb0ef41Sopenharmony_ci } 5411cb0ef41Sopenharmony_ci } 5421cb0ef41Sopenharmony_ci 5431cb0ef41Sopenharmony_ci node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( 5441cb0ef41Sopenharmony_ci env->isolate(), 5451cb0ef41Sopenharmony_ci "The encoded data was not valid for encoding %s", 5461cb0ef41Sopenharmony_ci *node::Utf8Value(env->isolate(), from_encoding)); 5471cb0ef41Sopenharmony_ci} 5481cb0ef41Sopenharmony_ci 5491cb0ef41Sopenharmony_ciConverterObject::ConverterObject( 5501cb0ef41Sopenharmony_ci Environment* env, 5511cb0ef41Sopenharmony_ci Local<Object> wrap, 5521cb0ef41Sopenharmony_ci UConverter* converter, 5531cb0ef41Sopenharmony_ci int flags, 5541cb0ef41Sopenharmony_ci const char* sub) 5551cb0ef41Sopenharmony_ci : BaseObject(env, wrap), 5561cb0ef41Sopenharmony_ci Converter(converter, sub), 5571cb0ef41Sopenharmony_ci flags_(flags) { 5581cb0ef41Sopenharmony_ci MakeWeak(); 5591cb0ef41Sopenharmony_ci 5601cb0ef41Sopenharmony_ci switch (ucnv_getType(converter)) { 5611cb0ef41Sopenharmony_ci case UCNV_UTF8: 5621cb0ef41Sopenharmony_ci case UCNV_UTF16_BigEndian: 5631cb0ef41Sopenharmony_ci case UCNV_UTF16_LittleEndian: 5641cb0ef41Sopenharmony_ci flags_ |= CONVERTER_FLAGS_UNICODE; 5651cb0ef41Sopenharmony_ci break; 5661cb0ef41Sopenharmony_ci default: { 5671cb0ef41Sopenharmony_ci // Fall through 5681cb0ef41Sopenharmony_ci } 5691cb0ef41Sopenharmony_ci } 5701cb0ef41Sopenharmony_ci} 5711cb0ef41Sopenharmony_ci 5721cb0ef41Sopenharmony_ci 5731cb0ef41Sopenharmony_cibool InitializeICUDirectory(const std::string& path) { 5741cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 5751cb0ef41Sopenharmony_ci if (path.empty()) { 5761cb0ef41Sopenharmony_ci#ifdef NODE_HAVE_SMALL_ICU 5771cb0ef41Sopenharmony_ci // install the 'small' data. 5781cb0ef41Sopenharmony_ci udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status); 5791cb0ef41Sopenharmony_ci#else // !NODE_HAVE_SMALL_ICU 5801cb0ef41Sopenharmony_ci // no small data, so nothing to do. 5811cb0ef41Sopenharmony_ci#endif // !NODE_HAVE_SMALL_ICU 5821cb0ef41Sopenharmony_ci } else { 5831cb0ef41Sopenharmony_ci u_setDataDirectory(path.c_str()); 5841cb0ef41Sopenharmony_ci u_init(&status); 5851cb0ef41Sopenharmony_ci } 5861cb0ef41Sopenharmony_ci return status == U_ZERO_ERROR; 5871cb0ef41Sopenharmony_ci} 5881cb0ef41Sopenharmony_ci 5891cb0ef41Sopenharmony_civoid SetDefaultTimeZone(const char* tzid) { 5901cb0ef41Sopenharmony_ci size_t tzidlen = strlen(tzid) + 1; 5911cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 5921cb0ef41Sopenharmony_ci MaybeStackBuffer<UChar, 256> id(tzidlen); 5931cb0ef41Sopenharmony_ci u_charsToUChars(tzid, id.out(), tzidlen); 5941cb0ef41Sopenharmony_ci // This is threadsafe: 5951cb0ef41Sopenharmony_ci ucal_setDefaultTimeZone(id.out(), &status); 5961cb0ef41Sopenharmony_ci CHECK(U_SUCCESS(status)); 5971cb0ef41Sopenharmony_ci} 5981cb0ef41Sopenharmony_ci 5991cb0ef41Sopenharmony_ciint32_t ToUnicode(MaybeStackBuffer<char>* buf, 6001cb0ef41Sopenharmony_ci const char* input, 6011cb0ef41Sopenharmony_ci size_t length) { 6021cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 6031cb0ef41Sopenharmony_ci uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE; 6041cb0ef41Sopenharmony_ci UIDNA* uidna = uidna_openUTS46(options, &status); 6051cb0ef41Sopenharmony_ci if (U_FAILURE(status)) 6061cb0ef41Sopenharmony_ci return -1; 6071cb0ef41Sopenharmony_ci UIDNAInfo info = UIDNA_INFO_INITIALIZER; 6081cb0ef41Sopenharmony_ci 6091cb0ef41Sopenharmony_ci int32_t len = uidna_nameToUnicodeUTF8(uidna, 6101cb0ef41Sopenharmony_ci input, length, 6111cb0ef41Sopenharmony_ci **buf, buf->capacity(), 6121cb0ef41Sopenharmony_ci &info, 6131cb0ef41Sopenharmony_ci &status); 6141cb0ef41Sopenharmony_ci 6151cb0ef41Sopenharmony_ci // Do not check info.errors like we do with ToASCII since ToUnicode always 6161cb0ef41Sopenharmony_ci // returns a string, despite any possible errors that may have occurred. 6171cb0ef41Sopenharmony_ci 6181cb0ef41Sopenharmony_ci if (status == U_BUFFER_OVERFLOW_ERROR) { 6191cb0ef41Sopenharmony_ci status = U_ZERO_ERROR; 6201cb0ef41Sopenharmony_ci buf->AllocateSufficientStorage(len); 6211cb0ef41Sopenharmony_ci len = uidna_nameToUnicodeUTF8(uidna, 6221cb0ef41Sopenharmony_ci input, length, 6231cb0ef41Sopenharmony_ci **buf, buf->capacity(), 6241cb0ef41Sopenharmony_ci &info, 6251cb0ef41Sopenharmony_ci &status); 6261cb0ef41Sopenharmony_ci } 6271cb0ef41Sopenharmony_ci 6281cb0ef41Sopenharmony_ci // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode 6291cb0ef41Sopenharmony_ci // string, regardless of whether an error occurred. 6301cb0ef41Sopenharmony_ci 6311cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { 6321cb0ef41Sopenharmony_ci len = -1; 6331cb0ef41Sopenharmony_ci buf->SetLength(0); 6341cb0ef41Sopenharmony_ci } else { 6351cb0ef41Sopenharmony_ci buf->SetLength(len); 6361cb0ef41Sopenharmony_ci } 6371cb0ef41Sopenharmony_ci 6381cb0ef41Sopenharmony_ci uidna_close(uidna); 6391cb0ef41Sopenharmony_ci return len; 6401cb0ef41Sopenharmony_ci} 6411cb0ef41Sopenharmony_ci 6421cb0ef41Sopenharmony_ciint32_t ToASCII(MaybeStackBuffer<char>* buf, 6431cb0ef41Sopenharmony_ci const char* input, 6441cb0ef41Sopenharmony_ci size_t length, 6451cb0ef41Sopenharmony_ci idna_mode mode) { 6461cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 6471cb0ef41Sopenharmony_ci uint32_t options = // CheckHyphens = false; handled later 6481cb0ef41Sopenharmony_ci UIDNA_CHECK_BIDI | // CheckBidi = true 6491cb0ef41Sopenharmony_ci UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true 6501cb0ef41Sopenharmony_ci UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing 6511cb0ef41Sopenharmony_ci if (mode == idna_mode::kStrict) { 6521cb0ef41Sopenharmony_ci options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict 6531cb0ef41Sopenharmony_ci // VerifyDnsLength = beStrict; 6541cb0ef41Sopenharmony_ci // handled later 6551cb0ef41Sopenharmony_ci } 6561cb0ef41Sopenharmony_ci 6571cb0ef41Sopenharmony_ci UIDNA* uidna = uidna_openUTS46(options, &status); 6581cb0ef41Sopenharmony_ci if (U_FAILURE(status)) 6591cb0ef41Sopenharmony_ci return -1; 6601cb0ef41Sopenharmony_ci UIDNAInfo info = UIDNA_INFO_INITIALIZER; 6611cb0ef41Sopenharmony_ci 6621cb0ef41Sopenharmony_ci int32_t len = uidna_nameToASCII_UTF8(uidna, 6631cb0ef41Sopenharmony_ci input, length, 6641cb0ef41Sopenharmony_ci **buf, buf->capacity(), 6651cb0ef41Sopenharmony_ci &info, 6661cb0ef41Sopenharmony_ci &status); 6671cb0ef41Sopenharmony_ci 6681cb0ef41Sopenharmony_ci if (status == U_BUFFER_OVERFLOW_ERROR) { 6691cb0ef41Sopenharmony_ci status = U_ZERO_ERROR; 6701cb0ef41Sopenharmony_ci buf->AllocateSufficientStorage(len); 6711cb0ef41Sopenharmony_ci len = uidna_nameToASCII_UTF8(uidna, 6721cb0ef41Sopenharmony_ci input, length, 6731cb0ef41Sopenharmony_ci **buf, buf->capacity(), 6741cb0ef41Sopenharmony_ci &info, 6751cb0ef41Sopenharmony_ci &status); 6761cb0ef41Sopenharmony_ci } 6771cb0ef41Sopenharmony_ci 6781cb0ef41Sopenharmony_ci // In UTS #46 which specifies ToASCII, certain error conditions are 6791cb0ef41Sopenharmony_ci // configurable through options, and the WHATWG URL Standard promptly elects 6801cb0ef41Sopenharmony_ci // to disable some of them to accommodate for real-world use cases. 6811cb0ef41Sopenharmony_ci // Unfortunately, ICU4C's IDNA module does not support disabling some of 6821cb0ef41Sopenharmony_ci // these options through `options` above, and thus continues throwing 6831cb0ef41Sopenharmony_ci // unnecessary errors. To counter this situation, we just filter out the 6841cb0ef41Sopenharmony_ci // errors that may have happened afterwards, before deciding whether to 6851cb0ef41Sopenharmony_ci // return an error from this function. 6861cb0ef41Sopenharmony_ci 6871cb0ef41Sopenharmony_ci // CheckHyphens = false 6881cb0ef41Sopenharmony_ci // (Specified in the current UTS #46 draft rev. 18.) 6891cb0ef41Sopenharmony_ci // Refs: 6901cb0ef41Sopenharmony_ci // - https://github.com/whatwg/url/issues/53 6911cb0ef41Sopenharmony_ci // - https://github.com/whatwg/url/pull/309 6921cb0ef41Sopenharmony_ci // - http://www.unicode.org/review/pri317/ 6931cb0ef41Sopenharmony_ci // - http://www.unicode.org/reports/tr46/tr46-18.html 6941cb0ef41Sopenharmony_ci // - https://www.icann.org/news/announcement-2000-01-07-en 6951cb0ef41Sopenharmony_ci info.errors &= ~UIDNA_ERROR_HYPHEN_3_4; 6961cb0ef41Sopenharmony_ci info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN; 6971cb0ef41Sopenharmony_ci info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN; 6981cb0ef41Sopenharmony_ci 6991cb0ef41Sopenharmony_ci if (mode != idna_mode::kStrict) { 7001cb0ef41Sopenharmony_ci // VerifyDnsLength = beStrict 7011cb0ef41Sopenharmony_ci info.errors &= ~UIDNA_ERROR_EMPTY_LABEL; 7021cb0ef41Sopenharmony_ci info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG; 7031cb0ef41Sopenharmony_ci info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; 7041cb0ef41Sopenharmony_ci } 7051cb0ef41Sopenharmony_ci 7061cb0ef41Sopenharmony_ci if (U_FAILURE(status) || (mode != idna_mode::kLenient && info.errors != 0)) { 7071cb0ef41Sopenharmony_ci len = -1; 7081cb0ef41Sopenharmony_ci buf->SetLength(0); 7091cb0ef41Sopenharmony_ci } else { 7101cb0ef41Sopenharmony_ci buf->SetLength(len); 7111cb0ef41Sopenharmony_ci } 7121cb0ef41Sopenharmony_ci 7131cb0ef41Sopenharmony_ci uidna_close(uidna); 7141cb0ef41Sopenharmony_ci return len; 7151cb0ef41Sopenharmony_ci} 7161cb0ef41Sopenharmony_ci 7171cb0ef41Sopenharmony_cistatic void ToUnicode(const FunctionCallbackInfo<Value>& args) { 7181cb0ef41Sopenharmony_ci Environment* env = Environment::GetCurrent(args); 7191cb0ef41Sopenharmony_ci CHECK_GE(args.Length(), 1); 7201cb0ef41Sopenharmony_ci CHECK(args[0]->IsString()); 7211cb0ef41Sopenharmony_ci Utf8Value val(env->isolate(), args[0]); 7221cb0ef41Sopenharmony_ci 7231cb0ef41Sopenharmony_ci MaybeStackBuffer<char> buf; 7241cb0ef41Sopenharmony_ci int32_t len = ToUnicode(&buf, *val, val.length()); 7251cb0ef41Sopenharmony_ci 7261cb0ef41Sopenharmony_ci if (len < 0) { 7271cb0ef41Sopenharmony_ci return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to Unicode"); 7281cb0ef41Sopenharmony_ci } 7291cb0ef41Sopenharmony_ci 7301cb0ef41Sopenharmony_ci args.GetReturnValue().Set( 7311cb0ef41Sopenharmony_ci String::NewFromUtf8(env->isolate(), 7321cb0ef41Sopenharmony_ci *buf, 7331cb0ef41Sopenharmony_ci NewStringType::kNormal, 7341cb0ef41Sopenharmony_ci len).ToLocalChecked()); 7351cb0ef41Sopenharmony_ci} 7361cb0ef41Sopenharmony_ci 7371cb0ef41Sopenharmony_cistatic void ToASCII(const FunctionCallbackInfo<Value>& args) { 7381cb0ef41Sopenharmony_ci Environment* env = Environment::GetCurrent(args); 7391cb0ef41Sopenharmony_ci CHECK_GE(args.Length(), 1); 7401cb0ef41Sopenharmony_ci CHECK(args[0]->IsString()); 7411cb0ef41Sopenharmony_ci Utf8Value val(env->isolate(), args[0]); 7421cb0ef41Sopenharmony_ci // optional arg 7431cb0ef41Sopenharmony_ci bool lenient = args[1]->BooleanValue(env->isolate()); 7441cb0ef41Sopenharmony_ci idna_mode mode = lenient ? idna_mode::kLenient : idna_mode::kDefault; 7451cb0ef41Sopenharmony_ci 7461cb0ef41Sopenharmony_ci MaybeStackBuffer<char> buf; 7471cb0ef41Sopenharmony_ci int32_t len = ToASCII(&buf, *val, val.length(), mode); 7481cb0ef41Sopenharmony_ci 7491cb0ef41Sopenharmony_ci if (len < 0) { 7501cb0ef41Sopenharmony_ci return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to ASCII"); 7511cb0ef41Sopenharmony_ci } 7521cb0ef41Sopenharmony_ci 7531cb0ef41Sopenharmony_ci args.GetReturnValue().Set( 7541cb0ef41Sopenharmony_ci String::NewFromUtf8(env->isolate(), 7551cb0ef41Sopenharmony_ci *buf, 7561cb0ef41Sopenharmony_ci NewStringType::kNormal, 7571cb0ef41Sopenharmony_ci len).ToLocalChecked()); 7581cb0ef41Sopenharmony_ci} 7591cb0ef41Sopenharmony_ci 7601cb0ef41Sopenharmony_ci// This is similar to wcwidth except that it takes the current unicode 7611cb0ef41Sopenharmony_ci// character properties database into consideration, allowing it to 7621cb0ef41Sopenharmony_ci// correctly calculate the column widths of things like emoji's and 7631cb0ef41Sopenharmony_ci// newer wide characters. wcwidth, on the other hand, uses a fixed 7641cb0ef41Sopenharmony_ci// algorithm that does not take things like emoji into proper 7651cb0ef41Sopenharmony_ci// consideration. 7661cb0ef41Sopenharmony_ci// 7671cb0ef41Sopenharmony_ci// TODO(TimothyGu): Investigate Cc (C0/C1 control codes). Both VTE (used by 7681cb0ef41Sopenharmony_ci// GNOME Terminal) and Konsole don't consider them to be zero-width (see refs 7691cb0ef41Sopenharmony_ci// below), and when printed in VTE it is Narrow. However GNOME Terminal doesn't 7701cb0ef41Sopenharmony_ci// allow it to be input. Linux's PTY terminal prints control characters as 7711cb0ef41Sopenharmony_ci// Narrow rhombi. 7721cb0ef41Sopenharmony_ci// 7731cb0ef41Sopenharmony_ci// TODO(TimothyGu): Investigate Hangul jamo characters. Medial vowels and final 7741cb0ef41Sopenharmony_ci// consonants are 0-width when combined with initial consonants; otherwise they 7751cb0ef41Sopenharmony_ci// are technically Wide. But many terminals (including Konsole and 7761cb0ef41Sopenharmony_ci// VTE/GLib-based) implement all medials and finals as 0-width. 7771cb0ef41Sopenharmony_ci// 7781cb0ef41Sopenharmony_ci// Refs: https://eev.ee/blog/2015/09/12/dark-corners-of-unicode/#combining-characters-and-character-width 7791cb0ef41Sopenharmony_ci// Refs: https://github.com/GNOME/glib/blob/79e4d4c6be/glib/guniprop.c#L388-L420 7801cb0ef41Sopenharmony_ci// Refs: https://github.com/KDE/konsole/blob/8c6a5d13c0/src/konsole_wcwidth.cpp#L101-L223 7811cb0ef41Sopenharmony_cistatic int GetColumnWidth(UChar32 codepoint, 7821cb0ef41Sopenharmony_ci bool ambiguous_as_full_width = false) { 7831cb0ef41Sopenharmony_ci // UCHAR_EAST_ASIAN_WIDTH is the Unicode property that identifies a 7841cb0ef41Sopenharmony_ci // codepoint as being full width, wide, ambiguous, neutral, narrow, 7851cb0ef41Sopenharmony_ci // or halfwidth. 7861cb0ef41Sopenharmony_ci const int eaw = u_getIntPropertyValue(codepoint, UCHAR_EAST_ASIAN_WIDTH); 7871cb0ef41Sopenharmony_ci switch (eaw) { 7881cb0ef41Sopenharmony_ci case U_EA_FULLWIDTH: 7891cb0ef41Sopenharmony_ci case U_EA_WIDE: 7901cb0ef41Sopenharmony_ci return 2; 7911cb0ef41Sopenharmony_ci case U_EA_AMBIGUOUS: 7921cb0ef41Sopenharmony_ci // See: http://www.unicode.org/reports/tr11/#Ambiguous for details 7931cb0ef41Sopenharmony_ci if (ambiguous_as_full_width) { 7941cb0ef41Sopenharmony_ci return 2; 7951cb0ef41Sopenharmony_ci } 7961cb0ef41Sopenharmony_ci // If ambiguous_as_full_width is false: 7971cb0ef41Sopenharmony_ci [[fallthrough]]; 7981cb0ef41Sopenharmony_ci case U_EA_NEUTRAL: 7991cb0ef41Sopenharmony_ci if (u_hasBinaryProperty(codepoint, UCHAR_EMOJI_PRESENTATION)) { 8001cb0ef41Sopenharmony_ci return 2; 8011cb0ef41Sopenharmony_ci } 8021cb0ef41Sopenharmony_ci [[fallthrough]]; 8031cb0ef41Sopenharmony_ci case U_EA_HALFWIDTH: 8041cb0ef41Sopenharmony_ci case U_EA_NARROW: 8051cb0ef41Sopenharmony_ci default: 8061cb0ef41Sopenharmony_ci const auto zero_width_mask = U_GC_CC_MASK | // C0/C1 control code 8071cb0ef41Sopenharmony_ci U_GC_CF_MASK | // Format control character 8081cb0ef41Sopenharmony_ci U_GC_ME_MASK | // Enclosing mark 8091cb0ef41Sopenharmony_ci U_GC_MN_MASK; // Nonspacing mark 8101cb0ef41Sopenharmony_ci if (codepoint != 0x00AD && // SOFT HYPHEN is Cf but not zero-width 8111cb0ef41Sopenharmony_ci ((U_MASK(u_charType(codepoint)) & zero_width_mask) || 8121cb0ef41Sopenharmony_ci u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) { 8131cb0ef41Sopenharmony_ci return 0; 8141cb0ef41Sopenharmony_ci } 8151cb0ef41Sopenharmony_ci return 1; 8161cb0ef41Sopenharmony_ci } 8171cb0ef41Sopenharmony_ci} 8181cb0ef41Sopenharmony_ci 8191cb0ef41Sopenharmony_ci// Returns the column width for the given String. 8201cb0ef41Sopenharmony_cistatic void GetStringWidth(const FunctionCallbackInfo<Value>& args) { 8211cb0ef41Sopenharmony_ci Environment* env = Environment::GetCurrent(args); 8221cb0ef41Sopenharmony_ci CHECK(args[0]->IsString()); 8231cb0ef41Sopenharmony_ci 8241cb0ef41Sopenharmony_ci bool ambiguous_as_full_width = args[1]->IsTrue(); 8251cb0ef41Sopenharmony_ci bool expand_emoji_sequence = !args[2]->IsBoolean() || args[2]->IsTrue(); 8261cb0ef41Sopenharmony_ci 8271cb0ef41Sopenharmony_ci TwoByteValue value(env->isolate(), args[0]); 8281cb0ef41Sopenharmony_ci // reinterpret_cast is required by windows to compile 8291cb0ef41Sopenharmony_ci UChar* str = reinterpret_cast<UChar*>(*value); 8301cb0ef41Sopenharmony_ci static_assert(sizeof(*str) == sizeof(**value), 8311cb0ef41Sopenharmony_ci "sizeof(*str) == sizeof(**value)"); 8321cb0ef41Sopenharmony_ci UChar32 c = 0; 8331cb0ef41Sopenharmony_ci UChar32 p; 8341cb0ef41Sopenharmony_ci size_t n = 0; 8351cb0ef41Sopenharmony_ci uint32_t width = 0; 8361cb0ef41Sopenharmony_ci 8371cb0ef41Sopenharmony_ci while (n < value.length()) { 8381cb0ef41Sopenharmony_ci p = c; 8391cb0ef41Sopenharmony_ci U16_NEXT(str, n, value.length(), c); 8401cb0ef41Sopenharmony_ci // Don't count individual emoji codepoints that occur within an 8411cb0ef41Sopenharmony_ci // emoji sequence. This is not necessarily foolproof. Some 8421cb0ef41Sopenharmony_ci // environments display emoji sequences in the appropriate 8431cb0ef41Sopenharmony_ci // condensed form (as a single emoji glyph), other environments 8441cb0ef41Sopenharmony_ci // may not understand an emoji sequence and will display each 8451cb0ef41Sopenharmony_ci // individual emoji separately. When this happens, the width 8461cb0ef41Sopenharmony_ci // calculated will be off, and there's no reliable way of knowing 8471cb0ef41Sopenharmony_ci // in advance if a particular sequence is going to be supported. 8481cb0ef41Sopenharmony_ci // The expand_emoji_sequence option allows the caller to skip this 8491cb0ef41Sopenharmony_ci // check and count each code within an emoji sequence separately. 8501cb0ef41Sopenharmony_ci // https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences 8511cb0ef41Sopenharmony_ci if (!expand_emoji_sequence && 8521cb0ef41Sopenharmony_ci n > 0 && p == 0x200d && // 0x200d == ZWJ (zero width joiner) 8531cb0ef41Sopenharmony_ci (u_hasBinaryProperty(c, UCHAR_EMOJI_PRESENTATION) || 8541cb0ef41Sopenharmony_ci u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER))) { 8551cb0ef41Sopenharmony_ci continue; 8561cb0ef41Sopenharmony_ci } 8571cb0ef41Sopenharmony_ci width += GetColumnWidth(c, ambiguous_as_full_width); 8581cb0ef41Sopenharmony_ci } 8591cb0ef41Sopenharmony_ci args.GetReturnValue().Set(width); 8601cb0ef41Sopenharmony_ci} 8611cb0ef41Sopenharmony_ci 8621cb0ef41Sopenharmony_civoid Initialize(Local<Object> target, 8631cb0ef41Sopenharmony_ci Local<Value> unused, 8641cb0ef41Sopenharmony_ci Local<Context> context, 8651cb0ef41Sopenharmony_ci void* priv) { 8661cb0ef41Sopenharmony_ci Environment* env = Environment::GetCurrent(context); 8671cb0ef41Sopenharmony_ci SetMethod(context, target, "toUnicode", ToUnicode); 8681cb0ef41Sopenharmony_ci SetMethod(context, target, "toASCII", ToASCII); 8691cb0ef41Sopenharmony_ci SetMethod(context, target, "getStringWidth", GetStringWidth); 8701cb0ef41Sopenharmony_ci 8711cb0ef41Sopenharmony_ci // One-shot converters 8721cb0ef41Sopenharmony_ci SetMethod(context, target, "icuErrName", ICUErrorName); 8731cb0ef41Sopenharmony_ci SetMethod(context, target, "transcode", Transcode); 8741cb0ef41Sopenharmony_ci 8751cb0ef41Sopenharmony_ci // ConverterObject 8761cb0ef41Sopenharmony_ci { 8771cb0ef41Sopenharmony_ci Local<FunctionTemplate> t = NewFunctionTemplate(env->isolate(), nullptr); 8781cb0ef41Sopenharmony_ci t->Inherit(BaseObject::GetConstructorTemplate(env)); 8791cb0ef41Sopenharmony_ci t->InstanceTemplate()->SetInternalFieldCount( 8801cb0ef41Sopenharmony_ci ConverterObject::kInternalFieldCount); 8811cb0ef41Sopenharmony_ci Local<String> converter_string = 8821cb0ef41Sopenharmony_ci FIXED_ONE_BYTE_STRING(env->isolate(), "Converter"); 8831cb0ef41Sopenharmony_ci t->SetClassName(converter_string); 8841cb0ef41Sopenharmony_ci env->set_i18n_converter_template(t->InstanceTemplate()); 8851cb0ef41Sopenharmony_ci } 8861cb0ef41Sopenharmony_ci 8871cb0ef41Sopenharmony_ci SetMethod(context, target, "getConverter", ConverterObject::Create); 8881cb0ef41Sopenharmony_ci SetMethod(context, target, "decode", ConverterObject::Decode); 8891cb0ef41Sopenharmony_ci SetMethod(context, target, "hasConverter", ConverterObject::Has); 8901cb0ef41Sopenharmony_ci} 8911cb0ef41Sopenharmony_ci 8921cb0ef41Sopenharmony_civoid RegisterExternalReferences(ExternalReferenceRegistry* registry) { 8931cb0ef41Sopenharmony_ci registry->Register(ToUnicode); 8941cb0ef41Sopenharmony_ci registry->Register(ToASCII); 8951cb0ef41Sopenharmony_ci registry->Register(GetStringWidth); 8961cb0ef41Sopenharmony_ci registry->Register(ICUErrorName); 8971cb0ef41Sopenharmony_ci registry->Register(Transcode); 8981cb0ef41Sopenharmony_ci registry->Register(ConverterObject::Create); 8991cb0ef41Sopenharmony_ci registry->Register(ConverterObject::Decode); 9001cb0ef41Sopenharmony_ci registry->Register(ConverterObject::Has); 9011cb0ef41Sopenharmony_ci} 9021cb0ef41Sopenharmony_ci 9031cb0ef41Sopenharmony_ci} // namespace i18n 9041cb0ef41Sopenharmony_ci} // namespace node 9051cb0ef41Sopenharmony_ci 9061cb0ef41Sopenharmony_ciNODE_BINDING_CONTEXT_AWARE_INTERNAL(icu, node::i18n::Initialize) 9071cb0ef41Sopenharmony_ciNODE_BINDING_EXTERNAL_REFERENCE(icu, node::i18n::RegisterExternalReferences) 9081cb0ef41Sopenharmony_ci 9091cb0ef41Sopenharmony_ci#endif // NODE_HAVE_I18N_SUPPORT 910