1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "js_textdecoder.h"
17 #include <algorithm>
18 #include <codecvt>
19 
20 #include <locale>
21 #include <map>
22 #include <string>
23 #include <vector>
24 
25 #include "ohos/init_data.h"
26 #include "securec.h"
27 #include "unicode/unistr.h"
28 #include "util_helper.h"
29 #include "tools/log.h"
30 
31 namespace OHOS::Util {
32     using namespace Commonlibrary::Platform;
33 
TextDecoder(const std::string &buff, int32_t flags)34     TextDecoder::TextDecoder(const std::string &buff, int32_t flags)
35         : encStr_(buff), tranTool_(nullptr, nullptr)
36     {
37         label_ |= flags;
38 #if !defined(__ARKUI_CROSS__)
39         SetHwIcuDirectory();
40 #endif
41         bool fatal = (flags & static_cast<int32_t>(ConverterFlags::FATAL_FLG)) ==
42              static_cast<int32_t>(ConverterFlags::FATAL_FLG);
43         UErrorCode codeflag = U_ZERO_ERROR;
44         UConverter *conv = CreateConverter(encStr_, codeflag);
45         if (U_FAILURE(codeflag)) {
46             HILOG_ERROR("ucnv_open failed !");
47             return;
48         }
49         if (fatal) {
50             codeflag = U_ZERO_ERROR;
51             ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, &codeflag);
52         }
53         TransformToolPointer tempTranTool(conv, ConverterClose);
54         tranTool_ = std::move(tempTranTool);
55     }
56 
57     //static
CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)58     bool TextDecoder::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
59     {
60         uint32_t index = 0;
61         for (; index + 4 <= utf16Len; index += 4) { // 4: process the data in chunks of 4 elements to improve speed
62             // Check if all four characters in the current block are ASCII characters
63             if (!IsASCIICharacter(utf16Data[index]) ||
64                 !IsASCIICharacter(utf16Data[index + 1]) || // 1: the second element of the block
65                 !IsASCIICharacter(utf16Data[index + 2]) || // 2: the third element of the block
66                 !IsASCIICharacter(utf16Data[index + 3])) { // 3: the fourth element of the block
67                 return false;
68             }
69         }
70         // Check remaining characters if they are ASCII
71         for (; index < utf16Len; ++index) {
72             if (!IsASCIICharacter(utf16Data[index])) {
73                 return false;
74             }
75         }
76         return true;
77     }
78 
ConvertToChar(UChar *uchar, size_t length, char *tempCharArray)79     std::pair<char *, bool> TextDecoder::ConvertToChar(UChar *uchar, size_t length, char *tempCharArray)
80     {
81         uint16_t *uint16Data = reinterpret_cast<uint16_t *>(uchar);
82         if (CanBeCompressed(uint16Data, length)) {
83             if (length <= 0) {
84                 HILOG_ERROR("textencoder:: length is error");
85                 return std::make_pair(nullptr, false);
86             }
87             char *strUtf8;
88             if (length <= TEMP_CHAR_LENGTH) {
89                 strUtf8 = tempCharArray;
90             } else {
91                 strUtf8 = new (std::nothrow) char[length];
92                 if (strUtf8 == nullptr) {
93                     HILOG_ERROR("textencoder:: data allocation failed");
94                     return std::make_pair(nullptr, false);
95                 }
96             }
97             for (size_t i = 0; i < length; ++i) {
98                 strUtf8[i] = static_cast<char>(uchar[i]);
99             }
100             return std::make_pair(strUtf8, true);
101         }
102         return std::make_pair(nullptr, false);
103     }
104 
Decode(napi_env env, napi_value src, bool iflag)105     napi_value TextDecoder::Decode(napi_env env, napi_value src, bool iflag)
106     {
107         uint8_t flags = 0;
108         flags |= (iflag ? 0 : static_cast<uint8_t>(ConverterFlags::FLUSH_FLG));
109         UBool flush = ((flags & static_cast<uint8_t>(ConverterFlags::FLUSH_FLG))) ==
110         static_cast<uint8_t>(ConverterFlags::FLUSH_FLG);
111         napi_typedarray_type type;
112         size_t length = 0;
113         void *data = nullptr;
114         size_t byteOffset = 0;
115         napi_value arrayBuffer = nullptr;
116         NAPI_CALL(env, napi_get_typedarray_info(env, src, &type, &length, &data, &arrayBuffer, &byteOffset));
117         const char *source = ReplaceNull(data, length);
118         size_t limit = GetMinByteSize() * length;
119         size_t len = limit * sizeof(UChar);
120         UChar *arr = nullptr;
121         if (limit > 0) {
122             arr = new (std::nothrow) UChar[limit + 1];
123             if (arr == nullptr) {
124                 HILOG_ERROR("decode arr is nullptr");
125                 return nullptr;
126             }
127             if (memset_s(arr, len + sizeof(UChar), 0, len + sizeof(UChar)) != EOK) {
128                 HILOG_ERROR("decode arr memset_s failed");
129                 FreedMemory(arr);
130                 return nullptr;
131             }
132         } else {
133             HILOG_DEBUG("limit is error");
134             return nullptr;
135         }
136         UChar *target = arr;
137         size_t tarStartPos = reinterpret_cast<uintptr_t>(arr);
138         UErrorCode codeFlag = U_ZERO_ERROR;
139         ucnv_toUnicode(GetConverterPtr(), &target, target + len, &source, source + length, nullptr, flush, &codeFlag);
140         if (codeFlag != U_ZERO_ERROR) {
141             return ThrowError(env, "TextDecoder decoding error.");
142         }
143         size_t resultLength = 0;
144         bool omitInitialBom = false;
145         DecodeArr decArr(target, tarStartPos, limit);
146         SetBomFlag(arr, codeFlag, decArr, resultLength, omitInitialBom);
147         UChar *arrDat = arr;
148         if (omitInitialBom && resultLength > 0) {
149             arrDat = &arr[2]; // 2: Obtains the 2 value of the array.
150         }
151         std::string tepStr = ConvertToString(arrDat, length);
152         napi_value resultStr = nullptr;
153         NAPI_CALL(env, napi_create_string_utf8(env, tepStr.c_str(), tepStr.size(), &resultStr));
154         FreedMemory(arr);
155         if (flush) {
156             label_ &= static_cast<int32_t>(ConverterFlags::BOM_SEEN_FLG);
157             Reset();
158         }
159         return resultStr;
160     }
161 
GetResultStr(napi_env env, UChar *arrDat, size_t length)162     napi_value TextDecoder::GetResultStr(napi_env env, UChar *arrDat,
163                                          size_t length)
164     {
165         napi_value resultStr = nullptr;
166         if (length <= TEMP_CHAR_LENGTH) {
167             char tempCharArray[TEMP_CHAR_LENGTH];
168             std::pair<char *, bool> tempPair = ConvertToChar(arrDat, length, tempCharArray);
169             if (tempPair.second == true) {
170                 char *utf8Str = tempPair.first;
171                 napi_create_string_utf8(env, utf8Str, length, &resultStr);
172             } else {
173                 napi_create_string_utf16(env, reinterpret_cast<char16_t *>(arrDat), length, &resultStr);
174             }
175         } else {
176             std::pair<char *, bool> tempPair = ConvertToChar(arrDat, length, nullptr);
177             if (tempPair.second == true) {
178                 char *utf8Str = tempPair.first;
179                 napi_create_string_utf8(env, utf8Str, length, &resultStr);
180                 NAPI_ASSERT(env, utf8Str != nullptr, "Data allocation failed");
181                 delete[] utf8Str;
182             } else {
183                 napi_create_string_utf16(env, reinterpret_cast<char16_t *>(arrDat), length, &resultStr);
184             }
185         }
186         return resultStr;
187     }
188 
DecodeToString(napi_env env, napi_value src, bool iflag)189     napi_value TextDecoder::DecodeToString(napi_env env,
190                                            napi_value src, bool iflag)
191     {
192         uint8_t flags = 0;
193         flags |= (iflag ? 0 : static_cast<uint8_t>(ConverterFlags::FLUSH_FLG));
194         UBool flush = (flags & static_cast<uint8_t>(ConverterFlags::FLUSH_FLG)) ==
195             static_cast<uint8_t>(ConverterFlags::FLUSH_FLG);
196         napi_typedarray_type type;
197         size_t length = 0;
198         void *data = nullptr;
199         size_t byteOffset = 0;
200         napi_value arrayBuffer = nullptr;
201         napi_get_typedarray_info(env, src, &type, &length, &data, &arrayBuffer, &byteOffset);
202         const char *source = static_cast<char *>(data);
203         size_t limit = GetMinByteSize() * length;
204         size_t len = limit * sizeof(UChar);
205         UChar *arr = nullptr;
206         if (limit > 0) {
207             arr = new (std::nothrow) UChar[limit + 1]{0};
208             if (arr == nullptr) {
209                 HILOG_DEBUG("arr is nullptr");
210                 return nullptr;
211             }
212         } else {
213             HILOG_DEBUG("limit is error");
214             return nullptr;
215         }
216         UChar *target = arr;
217         UErrorCode codeFlag = U_ZERO_ERROR;
218         ucnv_toUnicode(GetConverterPtr(), &target, target + len, &source, source + length, nullptr, flush, &codeFlag);
219         if (codeFlag != U_ZERO_ERROR) {
220             FreedMemory(arr);
221             napi_throw_error(env, "401",
222                 "Parameter error. Please check if the decode data matches the encoding format.");
223             return nullptr;
224         }
225         size_t resultLen = target - arr;
226         bool omitInitialBom = false;
227         SetIgnoreBOM(arr, resultLen, omitInitialBom);
228         UChar *arrDat = arr;
229         if (omitInitialBom) {
230             arrDat = &arr[1];
231             resultLen--;
232         }
233         napi_value resultStr = GetResultStr(env, arrDat, resultLen);
234         FreedMemory(arr);
235         if (flush) {
236             label_ &= ~static_cast<int32_t>(ConverterFlags::BOM_SEEN_FLG);
237             Reset();
238         }
239         return resultStr;
240     }
241 
GetMinByteSize() const242     size_t TextDecoder::GetMinByteSize() const
243     {
244         if (tranTool_ == nullptr) {
245             return 0;
246         }
247         size_t res = static_cast<size_t>(ucnv_getMinCharSize(tranTool_.get()));
248         return res;
249     }
250 
Reset() const251     void TextDecoder::Reset() const
252     {
253         if (tranTool_ == nullptr) {
254             return;
255         }
256         ucnv_reset(tranTool_.get());
257     }
258 
FreedMemory(UChar *&pData)259     void TextDecoder::FreedMemory(UChar *&pData)
260     {
261         if (pData != nullptr) {
262             delete[] pData;
263             pData = nullptr;
264         }
265     }
266 
SetBomFlag(const UChar *arr, const UErrorCode codeFlag, const DecodeArr decArr, size_t &rstLen, bool &bomFlag)267     void TextDecoder::SetBomFlag(const UChar *arr, const UErrorCode codeFlag, const DecodeArr decArr,
268                                  size_t &rstLen, bool &bomFlag)
269     {
270         if (arr == nullptr) {
271             return;
272         }
273         if (U_SUCCESS(codeFlag)) {
274             if (decArr.limitLen > 0) {
275                 rstLen = reinterpret_cast<uintptr_t>(decArr.target) - decArr.tarStartPos;
276                 if (rstLen > 0 && IsUnicode() && !IsIgnoreBom() && !IsBomFlag()) {
277                     bomFlag = (arr[0] == 0xFEFF) ? true : false;
278                     label_ |= static_cast<int32_t>(ConverterFlags::BOM_SEEN_FLG);
279                 }
280             }
281         }
282     }
283 
SetIgnoreBOM(const UChar *arr, size_t resultLen, bool &bomFlag)284     void TextDecoder::SetIgnoreBOM(const UChar *arr, size_t resultLen, bool &bomFlag)
285     {
286         switch (ucnv_getType(GetConverterPtr())) {
287             case UCNV_UTF8:
288             case UCNV_UTF16_BigEndian:
289             case UCNV_UTF16_LittleEndian:
290                 label_ |= static_cast<int32_t>(ConverterFlags::UNICODE_FLG);
291                 break;
292             default:
293                 break;
294         }
295         if (resultLen > 0 && IsUnicode() && IsIgnoreBom()) {
296             bomFlag = (arr[0] == 0xFEFF) ? true : false;
297         }
298         label_ |= static_cast<int32_t>(ConverterFlags::BOM_SEEN_FLG);
299     }
300 
ThrowError(napi_env env, const char* errMessage)301     napi_value TextDecoder::ThrowError(napi_env env, const char* errMessage)
302     {
303         napi_value utilError = nullptr;
304         napi_value code = nullptr;
305         uint32_t errCode = 10200019;
306         napi_create_uint32(env, errCode, &code);
307         napi_value name = nullptr;
308         std::string errName = "BusinessError";
309         napi_value msg = nullptr;
310         napi_create_string_utf8(env, errMessage, NAPI_AUTO_LENGTH, &msg);
311         napi_create_string_utf8(env, errName.c_str(), NAPI_AUTO_LENGTH, &name);
312         napi_create_error(env, nullptr, msg, &utilError);
313         napi_set_named_property(env, utilError, "code", code);
314         napi_set_named_property(env, utilError, "name", name);
315         napi_throw(env, utilError);
316         napi_value res = nullptr;
317         NAPI_CALL(env, napi_get_undefined(env, &res));
318         return res;
319     }
320 
ReplaceNull(void *data, size_t length) const321     const char* TextDecoder::ReplaceNull(void *data, size_t length) const
322     {
323         char *str = static_cast<char*>(data);
324         if (encStr_ == "utf-8") {
325             for (size_t i = 0; i < length; ++i) {
326                 if (str[i] == '\0') {
327                     str[i] = ' ';
328                 }
329             }
330         }
331         return const_cast<const char*>(str);
332     }
333 }
334