1/* 2 * Copyright (c) 2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#include "js_textdecoder.h" 17#include <algorithm> 18#include <codecvt> 19 20#include <locale> 21#include <map> 22#include <string> 23#include <vector> 24 25#include "ohos/init_data.h" 26#include "securec.h" 27#include "unicode/unistr.h" 28#include "util_helper.h" 29#include "tools/log.h" 30 31namespace OHOS::Util { 32 using namespace Commonlibrary::Platform; 33 34 TextDecoder::TextDecoder(const std::string &buff, int32_t flags) 35 : encStr_(buff), tranTool_(nullptr, nullptr) 36 { 37 label_ |= flags; 38#if !defined(__ARKUI_CROSS__) 39 SetHwIcuDirectory(); 40#endif 41 bool fatal = (flags & static_cast<int32_t>(ConverterFlags::FATAL_FLG)) == 42 static_cast<int32_t>(ConverterFlags::FATAL_FLG); 43 UErrorCode codeflag = U_ZERO_ERROR; 44 UConverter *conv = CreateConverter(encStr_, codeflag); 45 if (U_FAILURE(codeflag)) { 46 HILOG_ERROR("ucnv_open failed !"); 47 return; 48 } 49 if (fatal) { 50 codeflag = U_ZERO_ERROR; 51 ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, &codeflag); 52 } 53 TransformToolPointer tempTranTool(conv, ConverterClose); 54 tranTool_ = std::move(tempTranTool); 55 } 56 57 //static 58 bool TextDecoder::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len) 59 { 60 uint32_t index = 0; 61 for (; index + 4 <= utf16Len; index += 4) { // 4: process the data in chunks of 4 elements to improve speed 62 // Check if all four characters in the current block are ASCII characters 63 if (!IsASCIICharacter(utf16Data[index]) || 64 !IsASCIICharacter(utf16Data[index + 1]) || // 1: the second element of the block 65 !IsASCIICharacter(utf16Data[index + 2]) || // 2: the third element of the block 66 !IsASCIICharacter(utf16Data[index + 3])) { // 3: the fourth element of the block 67 return false; 68 } 69 } 70 // Check remaining characters if they are ASCII 71 for (; index < utf16Len; ++index) { 72 if (!IsASCIICharacter(utf16Data[index])) { 73 return false; 74 } 75 } 76 return true; 77 } 78 79 std::pair<char *, bool> TextDecoder::ConvertToChar(UChar *uchar, size_t length, char *tempCharArray) 80 { 81 uint16_t *uint16Data = reinterpret_cast<uint16_t *>(uchar); 82 if (CanBeCompressed(uint16Data, length)) { 83 if (length <= 0) { 84 HILOG_ERROR("textencoder:: length is error"); 85 return std::make_pair(nullptr, false); 86 } 87 char *strUtf8; 88 if (length <= TEMP_CHAR_LENGTH) { 89 strUtf8 = tempCharArray; 90 } else { 91 strUtf8 = new (std::nothrow) char[length]; 92 if (strUtf8 == nullptr) { 93 HILOG_ERROR("textencoder:: data allocation failed"); 94 return std::make_pair(nullptr, false); 95 } 96 } 97 for (size_t i = 0; i < length; ++i) { 98 strUtf8[i] = static_cast<char>(uchar[i]); 99 } 100 return std::make_pair(strUtf8, true); 101 } 102 return std::make_pair(nullptr, false); 103 } 104 105 napi_value TextDecoder::Decode(napi_env env, napi_value src, bool iflag) 106 { 107 uint8_t flags = 0; 108 flags |= (iflag ? 0 : static_cast<uint8_t>(ConverterFlags::FLUSH_FLG)); 109 UBool flush = ((flags & static_cast<uint8_t>(ConverterFlags::FLUSH_FLG))) == 110 static_cast<uint8_t>(ConverterFlags::FLUSH_FLG); 111 napi_typedarray_type type; 112 size_t length = 0; 113 void *data = nullptr; 114 size_t byteOffset = 0; 115 napi_value arrayBuffer = nullptr; 116 NAPI_CALL(env, napi_get_typedarray_info(env, src, &type, &length, &data, &arrayBuffer, &byteOffset)); 117 const char *source = ReplaceNull(data, length); 118 size_t limit = GetMinByteSize() * length; 119 size_t len = limit * sizeof(UChar); 120 UChar *arr = nullptr; 121 if (limit > 0) { 122 arr = new (std::nothrow) UChar[limit + 1]; 123 if (arr == nullptr) { 124 HILOG_ERROR("decode arr is nullptr"); 125 return nullptr; 126 } 127 if (memset_s(arr, len + sizeof(UChar), 0, len + sizeof(UChar)) != EOK) { 128 HILOG_ERROR("decode arr memset_s failed"); 129 FreedMemory(arr); 130 return nullptr; 131 } 132 } else { 133 HILOG_DEBUG("limit is error"); 134 return nullptr; 135 } 136 UChar *target = arr; 137 size_t tarStartPos = reinterpret_cast<uintptr_t>(arr); 138 UErrorCode codeFlag = U_ZERO_ERROR; 139 ucnv_toUnicode(GetConverterPtr(), &target, target + len, &source, source + length, nullptr, flush, &codeFlag); 140 if (codeFlag != U_ZERO_ERROR) { 141 return ThrowError(env, "TextDecoder decoding error."); 142 } 143 size_t resultLength = 0; 144 bool omitInitialBom = false; 145 DecodeArr decArr(target, tarStartPos, limit); 146 SetBomFlag(arr, codeFlag, decArr, resultLength, omitInitialBom); 147 UChar *arrDat = arr; 148 if (omitInitialBom && resultLength > 0) { 149 arrDat = &arr[2]; // 2: Obtains the 2 value of the array. 150 } 151 std::string tepStr = ConvertToString(arrDat, length); 152 napi_value resultStr = nullptr; 153 NAPI_CALL(env, napi_create_string_utf8(env, tepStr.c_str(), tepStr.size(), &resultStr)); 154 FreedMemory(arr); 155 if (flush) { 156 label_ &= static_cast<int32_t>(ConverterFlags::BOM_SEEN_FLG); 157 Reset(); 158 } 159 return resultStr; 160 } 161 162 napi_value TextDecoder::GetResultStr(napi_env env, UChar *arrDat, 163 size_t length) 164 { 165 napi_value resultStr = nullptr; 166 if (length <= TEMP_CHAR_LENGTH) { 167 char tempCharArray[TEMP_CHAR_LENGTH]; 168 std::pair<char *, bool> tempPair = ConvertToChar(arrDat, length, tempCharArray); 169 if (tempPair.second == true) { 170 char *utf8Str = tempPair.first; 171 napi_create_string_utf8(env, utf8Str, length, &resultStr); 172 } else { 173 napi_create_string_utf16(env, reinterpret_cast<char16_t *>(arrDat), length, &resultStr); 174 } 175 } else { 176 std::pair<char *, bool> tempPair = ConvertToChar(arrDat, length, nullptr); 177 if (tempPair.second == true) { 178 char *utf8Str = tempPair.first; 179 napi_create_string_utf8(env, utf8Str, length, &resultStr); 180 NAPI_ASSERT(env, utf8Str != nullptr, "Data allocation failed"); 181 delete[] utf8Str; 182 } else { 183 napi_create_string_utf16(env, reinterpret_cast<char16_t *>(arrDat), length, &resultStr); 184 } 185 } 186 return resultStr; 187 } 188 189 napi_value TextDecoder::DecodeToString(napi_env env, 190 napi_value src, bool iflag) 191 { 192 uint8_t flags = 0; 193 flags |= (iflag ? 0 : static_cast<uint8_t>(ConverterFlags::FLUSH_FLG)); 194 UBool flush = (flags & static_cast<uint8_t>(ConverterFlags::FLUSH_FLG)) == 195 static_cast<uint8_t>(ConverterFlags::FLUSH_FLG); 196 napi_typedarray_type type; 197 size_t length = 0; 198 void *data = nullptr; 199 size_t byteOffset = 0; 200 napi_value arrayBuffer = nullptr; 201 napi_get_typedarray_info(env, src, &type, &length, &data, &arrayBuffer, &byteOffset); 202 const char *source = static_cast<char *>(data); 203 size_t limit = GetMinByteSize() * length; 204 size_t len = limit * sizeof(UChar); 205 UChar *arr = nullptr; 206 if (limit > 0) { 207 arr = new (std::nothrow) UChar[limit + 1]{0}; 208 if (arr == nullptr) { 209 HILOG_DEBUG("arr is nullptr"); 210 return nullptr; 211 } 212 } else { 213 HILOG_DEBUG("limit is error"); 214 return nullptr; 215 } 216 UChar *target = arr; 217 UErrorCode codeFlag = U_ZERO_ERROR; 218 ucnv_toUnicode(GetConverterPtr(), &target, target + len, &source, source + length, nullptr, flush, &codeFlag); 219 if (codeFlag != U_ZERO_ERROR) { 220 FreedMemory(arr); 221 napi_throw_error(env, "401", 222 "Parameter error. Please check if the decode data matches the encoding format."); 223 return nullptr; 224 } 225 size_t resultLen = target - arr; 226 bool omitInitialBom = false; 227 SetIgnoreBOM(arr, resultLen, omitInitialBom); 228 UChar *arrDat = arr; 229 if (omitInitialBom) { 230 arrDat = &arr[1]; 231 resultLen--; 232 } 233 napi_value resultStr = GetResultStr(env, arrDat, resultLen); 234 FreedMemory(arr); 235 if (flush) { 236 label_ &= ~static_cast<int32_t>(ConverterFlags::BOM_SEEN_FLG); 237 Reset(); 238 } 239 return resultStr; 240 } 241 242 size_t TextDecoder::GetMinByteSize() const 243 { 244 if (tranTool_ == nullptr) { 245 return 0; 246 } 247 size_t res = static_cast<size_t>(ucnv_getMinCharSize(tranTool_.get())); 248 return res; 249 } 250 251 void TextDecoder::Reset() const 252 { 253 if (tranTool_ == nullptr) { 254 return; 255 } 256 ucnv_reset(tranTool_.get()); 257 } 258 259 void TextDecoder::FreedMemory(UChar *&pData) 260 { 261 if (pData != nullptr) { 262 delete[] pData; 263 pData = nullptr; 264 } 265 } 266 267 void TextDecoder::SetBomFlag(const UChar *arr, const UErrorCode codeFlag, const DecodeArr decArr, 268 size_t &rstLen, bool &bomFlag) 269 { 270 if (arr == nullptr) { 271 return; 272 } 273 if (U_SUCCESS(codeFlag)) { 274 if (decArr.limitLen > 0) { 275 rstLen = reinterpret_cast<uintptr_t>(decArr.target) - decArr.tarStartPos; 276 if (rstLen > 0 && IsUnicode() && !IsIgnoreBom() && !IsBomFlag()) { 277 bomFlag = (arr[0] == 0xFEFF) ? true : false; 278 label_ |= static_cast<int32_t>(ConverterFlags::BOM_SEEN_FLG); 279 } 280 } 281 } 282 } 283 284 void TextDecoder::SetIgnoreBOM(const UChar *arr, size_t resultLen, bool &bomFlag) 285 { 286 switch (ucnv_getType(GetConverterPtr())) { 287 case UCNV_UTF8: 288 case UCNV_UTF16_BigEndian: 289 case UCNV_UTF16_LittleEndian: 290 label_ |= static_cast<int32_t>(ConverterFlags::UNICODE_FLG); 291 break; 292 default: 293 break; 294 } 295 if (resultLen > 0 && IsUnicode() && IsIgnoreBom()) { 296 bomFlag = (arr[0] == 0xFEFF) ? true : false; 297 } 298 label_ |= static_cast<int32_t>(ConverterFlags::BOM_SEEN_FLG); 299 } 300 301 napi_value TextDecoder::ThrowError(napi_env env, const char* errMessage) 302 { 303 napi_value utilError = nullptr; 304 napi_value code = nullptr; 305 uint32_t errCode = 10200019; 306 napi_create_uint32(env, errCode, &code); 307 napi_value name = nullptr; 308 std::string errName = "BusinessError"; 309 napi_value msg = nullptr; 310 napi_create_string_utf8(env, errMessage, NAPI_AUTO_LENGTH, &msg); 311 napi_create_string_utf8(env, errName.c_str(), NAPI_AUTO_LENGTH, &name); 312 napi_create_error(env, nullptr, msg, &utilError); 313 napi_set_named_property(env, utilError, "code", code); 314 napi_set_named_property(env, utilError, "name", name); 315 napi_throw(env, utilError); 316 napi_value res = nullptr; 317 NAPI_CALL(env, napi_get_undefined(env, &res)); 318 return res; 319 } 320 321 const char* TextDecoder::ReplaceNull(void *data, size_t length) const 322 { 323 char *str = static_cast<char*>(data); 324 if (encStr_ == "utf-8") { 325 for (size_t i = 0; i < length; ++i) { 326 if (str[i] == '\0') { 327 str[i] = ' '; 328 } 329 } 330 } 331 return const_cast<const char*>(str); 332 } 333} 334