1/*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include "js_textdecoder.h"
17#include <algorithm>
18#include <codecvt>
19
20#include <locale>
21#include <map>
22#include <string>
23#include <vector>
24
25#include "ohos/init_data.h"
26#include "securec.h"
27#include "unicode/unistr.h"
28#include "util_helper.h"
29#include "tools/log.h"
30
31namespace OHOS::Util {
32    using namespace Commonlibrary::Platform;
33
34    TextDecoder::TextDecoder(const std::string &buff, int32_t flags)
35        : encStr_(buff), tranTool_(nullptr, nullptr)
36    {
37        label_ |= flags;
38#if !defined(__ARKUI_CROSS__)
39        SetHwIcuDirectory();
40#endif
41        bool fatal = (flags & static_cast<int32_t>(ConverterFlags::FATAL_FLG)) ==
42             static_cast<int32_t>(ConverterFlags::FATAL_FLG);
43        UErrorCode codeflag = U_ZERO_ERROR;
44        UConverter *conv = CreateConverter(encStr_, codeflag);
45        if (U_FAILURE(codeflag)) {
46            HILOG_ERROR("ucnv_open failed !");
47            return;
48        }
49        if (fatal) {
50            codeflag = U_ZERO_ERROR;
51            ucnv_setToUCallBack(conv, UCNV_TO_U_CALLBACK_STOP, nullptr, nullptr, nullptr, &codeflag);
52        }
53        TransformToolPointer tempTranTool(conv, ConverterClose);
54        tranTool_ = std::move(tempTranTool);
55    }
56
57    //static
58    bool TextDecoder::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
59    {
60        uint32_t index = 0;
61        for (; index + 4 <= utf16Len; index += 4) { // 4: process the data in chunks of 4 elements to improve speed
62            // Check if all four characters in the current block are ASCII characters
63            if (!IsASCIICharacter(utf16Data[index]) ||
64                !IsASCIICharacter(utf16Data[index + 1]) || // 1: the second element of the block
65                !IsASCIICharacter(utf16Data[index + 2]) || // 2: the third element of the block
66                !IsASCIICharacter(utf16Data[index + 3])) { // 3: the fourth element of the block
67                return false;
68            }
69        }
70        // Check remaining characters if they are ASCII
71        for (; index < utf16Len; ++index) {
72            if (!IsASCIICharacter(utf16Data[index])) {
73                return false;
74            }
75        }
76        return true;
77    }
78
79    std::pair<char *, bool> TextDecoder::ConvertToChar(UChar *uchar, size_t length, char *tempCharArray)
80    {
81        uint16_t *uint16Data = reinterpret_cast<uint16_t *>(uchar);
82        if (CanBeCompressed(uint16Data, length)) {
83            if (length <= 0) {
84                HILOG_ERROR("textencoder:: length is error");
85                return std::make_pair(nullptr, false);
86            }
87            char *strUtf8;
88            if (length <= TEMP_CHAR_LENGTH) {
89                strUtf8 = tempCharArray;
90            } else {
91                strUtf8 = new (std::nothrow) char[length];
92                if (strUtf8 == nullptr) {
93                    HILOG_ERROR("textencoder:: data allocation failed");
94                    return std::make_pair(nullptr, false);
95                }
96            }
97            for (size_t i = 0; i < length; ++i) {
98                strUtf8[i] = static_cast<char>(uchar[i]);
99            }
100            return std::make_pair(strUtf8, true);
101        }
102        return std::make_pair(nullptr, false);
103    }
104
105    napi_value TextDecoder::Decode(napi_env env, napi_value src, bool iflag)
106    {
107        uint8_t flags = 0;
108        flags |= (iflag ? 0 : static_cast<uint8_t>(ConverterFlags::FLUSH_FLG));
109        UBool flush = ((flags & static_cast<uint8_t>(ConverterFlags::FLUSH_FLG))) ==
110        static_cast<uint8_t>(ConverterFlags::FLUSH_FLG);
111        napi_typedarray_type type;
112        size_t length = 0;
113        void *data = nullptr;
114        size_t byteOffset = 0;
115        napi_value arrayBuffer = nullptr;
116        NAPI_CALL(env, napi_get_typedarray_info(env, src, &type, &length, &data, &arrayBuffer, &byteOffset));
117        const char *source = ReplaceNull(data, length);
118        size_t limit = GetMinByteSize() * length;
119        size_t len = limit * sizeof(UChar);
120        UChar *arr = nullptr;
121        if (limit > 0) {
122            arr = new (std::nothrow) UChar[limit + 1];
123            if (arr == nullptr) {
124                HILOG_ERROR("decode arr is nullptr");
125                return nullptr;
126            }
127            if (memset_s(arr, len + sizeof(UChar), 0, len + sizeof(UChar)) != EOK) {
128                HILOG_ERROR("decode arr memset_s failed");
129                FreedMemory(arr);
130                return nullptr;
131            }
132        } else {
133            HILOG_DEBUG("limit is error");
134            return nullptr;
135        }
136        UChar *target = arr;
137        size_t tarStartPos = reinterpret_cast<uintptr_t>(arr);
138        UErrorCode codeFlag = U_ZERO_ERROR;
139        ucnv_toUnicode(GetConverterPtr(), &target, target + len, &source, source + length, nullptr, flush, &codeFlag);
140        if (codeFlag != U_ZERO_ERROR) {
141            return ThrowError(env, "TextDecoder decoding error.");
142        }
143        size_t resultLength = 0;
144        bool omitInitialBom = false;
145        DecodeArr decArr(target, tarStartPos, limit);
146        SetBomFlag(arr, codeFlag, decArr, resultLength, omitInitialBom);
147        UChar *arrDat = arr;
148        if (omitInitialBom && resultLength > 0) {
149            arrDat = &arr[2]; // 2: Obtains the 2 value of the array.
150        }
151        std::string tepStr = ConvertToString(arrDat, length);
152        napi_value resultStr = nullptr;
153        NAPI_CALL(env, napi_create_string_utf8(env, tepStr.c_str(), tepStr.size(), &resultStr));
154        FreedMemory(arr);
155        if (flush) {
156            label_ &= static_cast<int32_t>(ConverterFlags::BOM_SEEN_FLG);
157            Reset();
158        }
159        return resultStr;
160    }
161
162    napi_value TextDecoder::GetResultStr(napi_env env, UChar *arrDat,
163                                         size_t length)
164    {
165        napi_value resultStr = nullptr;
166        if (length <= TEMP_CHAR_LENGTH) {
167            char tempCharArray[TEMP_CHAR_LENGTH];
168            std::pair<char *, bool> tempPair = ConvertToChar(arrDat, length, tempCharArray);
169            if (tempPair.second == true) {
170                char *utf8Str = tempPair.first;
171                napi_create_string_utf8(env, utf8Str, length, &resultStr);
172            } else {
173                napi_create_string_utf16(env, reinterpret_cast<char16_t *>(arrDat), length, &resultStr);
174            }
175        } else {
176            std::pair<char *, bool> tempPair = ConvertToChar(arrDat, length, nullptr);
177            if (tempPair.second == true) {
178                char *utf8Str = tempPair.first;
179                napi_create_string_utf8(env, utf8Str, length, &resultStr);
180                NAPI_ASSERT(env, utf8Str != nullptr, "Data allocation failed");
181                delete[] utf8Str;
182            } else {
183                napi_create_string_utf16(env, reinterpret_cast<char16_t *>(arrDat), length, &resultStr);
184            }
185        }
186        return resultStr;
187    }
188
189    napi_value TextDecoder::DecodeToString(napi_env env,
190                                           napi_value src, bool iflag)
191    {
192        uint8_t flags = 0;
193        flags |= (iflag ? 0 : static_cast<uint8_t>(ConverterFlags::FLUSH_FLG));
194        UBool flush = (flags & static_cast<uint8_t>(ConverterFlags::FLUSH_FLG)) ==
195            static_cast<uint8_t>(ConverterFlags::FLUSH_FLG);
196        napi_typedarray_type type;
197        size_t length = 0;
198        void *data = nullptr;
199        size_t byteOffset = 0;
200        napi_value arrayBuffer = nullptr;
201        napi_get_typedarray_info(env, src, &type, &length, &data, &arrayBuffer, &byteOffset);
202        const char *source = static_cast<char *>(data);
203        size_t limit = GetMinByteSize() * length;
204        size_t len = limit * sizeof(UChar);
205        UChar *arr = nullptr;
206        if (limit > 0) {
207            arr = new (std::nothrow) UChar[limit + 1]{0};
208            if (arr == nullptr) {
209                HILOG_DEBUG("arr is nullptr");
210                return nullptr;
211            }
212        } else {
213            HILOG_DEBUG("limit is error");
214            return nullptr;
215        }
216        UChar *target = arr;
217        UErrorCode codeFlag = U_ZERO_ERROR;
218        ucnv_toUnicode(GetConverterPtr(), &target, target + len, &source, source + length, nullptr, flush, &codeFlag);
219        if (codeFlag != U_ZERO_ERROR) {
220            FreedMemory(arr);
221            napi_throw_error(env, "401",
222                "Parameter error. Please check if the decode data matches the encoding format.");
223            return nullptr;
224        }
225        size_t resultLen = target - arr;
226        bool omitInitialBom = false;
227        SetIgnoreBOM(arr, resultLen, omitInitialBom);
228        UChar *arrDat = arr;
229        if (omitInitialBom) {
230            arrDat = &arr[1];
231            resultLen--;
232        }
233        napi_value resultStr = GetResultStr(env, arrDat, resultLen);
234        FreedMemory(arr);
235        if (flush) {
236            label_ &= ~static_cast<int32_t>(ConverterFlags::BOM_SEEN_FLG);
237            Reset();
238        }
239        return resultStr;
240    }
241
242    size_t TextDecoder::GetMinByteSize() const
243    {
244        if (tranTool_ == nullptr) {
245            return 0;
246        }
247        size_t res = static_cast<size_t>(ucnv_getMinCharSize(tranTool_.get()));
248        return res;
249    }
250
251    void TextDecoder::Reset() const
252    {
253        if (tranTool_ == nullptr) {
254            return;
255        }
256        ucnv_reset(tranTool_.get());
257    }
258
259    void TextDecoder::FreedMemory(UChar *&pData)
260    {
261        if (pData != nullptr) {
262            delete[] pData;
263            pData = nullptr;
264        }
265    }
266
267    void TextDecoder::SetBomFlag(const UChar *arr, const UErrorCode codeFlag, const DecodeArr decArr,
268                                 size_t &rstLen, bool &bomFlag)
269    {
270        if (arr == nullptr) {
271            return;
272        }
273        if (U_SUCCESS(codeFlag)) {
274            if (decArr.limitLen > 0) {
275                rstLen = reinterpret_cast<uintptr_t>(decArr.target) - decArr.tarStartPos;
276                if (rstLen > 0 && IsUnicode() && !IsIgnoreBom() && !IsBomFlag()) {
277                    bomFlag = (arr[0] == 0xFEFF) ? true : false;
278                    label_ |= static_cast<int32_t>(ConverterFlags::BOM_SEEN_FLG);
279                }
280            }
281        }
282    }
283
284    void TextDecoder::SetIgnoreBOM(const UChar *arr, size_t resultLen, bool &bomFlag)
285    {
286        switch (ucnv_getType(GetConverterPtr())) {
287            case UCNV_UTF8:
288            case UCNV_UTF16_BigEndian:
289            case UCNV_UTF16_LittleEndian:
290                label_ |= static_cast<int32_t>(ConverterFlags::UNICODE_FLG);
291                break;
292            default:
293                break;
294        }
295        if (resultLen > 0 && IsUnicode() && IsIgnoreBom()) {
296            bomFlag = (arr[0] == 0xFEFF) ? true : false;
297        }
298        label_ |= static_cast<int32_t>(ConverterFlags::BOM_SEEN_FLG);
299    }
300
301    napi_value TextDecoder::ThrowError(napi_env env, const char* errMessage)
302    {
303        napi_value utilError = nullptr;
304        napi_value code = nullptr;
305        uint32_t errCode = 10200019;
306        napi_create_uint32(env, errCode, &code);
307        napi_value name = nullptr;
308        std::string errName = "BusinessError";
309        napi_value msg = nullptr;
310        napi_create_string_utf8(env, errMessage, NAPI_AUTO_LENGTH, &msg);
311        napi_create_string_utf8(env, errName.c_str(), NAPI_AUTO_LENGTH, &name);
312        napi_create_error(env, nullptr, msg, &utilError);
313        napi_set_named_property(env, utilError, "code", code);
314        napi_set_named_property(env, utilError, "name", name);
315        napi_throw(env, utilError);
316        napi_value res = nullptr;
317        NAPI_CALL(env, napi_get_undefined(env, &res));
318        return res;
319    }
320
321    const char* TextDecoder::ReplaceNull(void *data, size_t length) const
322    {
323        char *str = static_cast<char*>(data);
324        if (encStr_ == "utf-8") {
325            for (size_t i = 0; i < length; ++i) {
326                if (str[i] == '\0') {
327                    str[i] = ' ';
328                }
329            }
330        }
331        return const_cast<const char*>(str);
332    }
333}
334