1/*
2 * Copyright (c) 2023-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include "ecmascript/base/json_helper.h"
17#include "ecmascript/base/utf_helper.h"
18
19#include <algorithm>
20#include <iomanip>
21#include <sstream>
22
23namespace panda::ecmascript::base {
24constexpr unsigned char CODE_SPACE = 0x20;
25constexpr char ZERO_FIRST = static_cast<char>(0xc0); // \u0000 => c0 80
26constexpr char ALONE_SURROGATE_3B_FIRST = static_cast<char>(0xed);
27constexpr char ALONE_SURROGATE_3B_SECOND_START = static_cast<char>(0xa0);
28constexpr char ALONE_SURROGATE_3B_SECOND_END = static_cast<char>(0xbf);
29constexpr char ALONE_SURROGATE_3B_THIRD_START = static_cast<char>(0x80);
30constexpr char ALONE_SURROGATE_3B_THIRD_END = static_cast<char>(0xbf);
31
32bool JsonHelper::IsFastValueToQuotedString(const CString& str)
33{
34    for (const auto ch : str) {
35        switch (ch) {
36            case '\"':
37            case '\\':
38            case '\b':
39            case '\f':
40            case '\n':
41            case '\r':
42            case '\t':
43            case ZERO_FIRST:
44            case ALONE_SURROGATE_3B_FIRST:
45                return false;
46            default:
47                if (ch > 0 && ch < CODE_SPACE) {
48                    return false;
49                }
50                break;
51        }
52    }
53    return true;
54}
55
56// String values are wrapped in QUOTATION MARK (") code units. The code units " and \ are escaped with \ prefixes.
57// Control characters code units are replaced with escape sequences \uHHHH, or with the shorter forms,
58// \b (BACKSPACE), \f (FORM FEED), \n (LINE FEED), \r (CARRIAGE RETURN), \t (CHARACTER TABULATION).
59void JsonHelper::AppendValueToQuotedString(const CString& str, CString& output)
60{
61    output += "\"";
62    bool isFast = IsFastValueToQuotedString(str); // fast mode
63    if (isFast) {
64        output += str;
65        output += "\"";
66        return;
67    }
68    for (uint32_t i = 0; i < str.size(); ++i) {
69        auto ch = str[i];
70        switch (ch) {
71            case '\"':
72                output += "\\\"";
73                break;
74            case '\\':
75                output += "\\\\";
76                break;
77            case '\b':
78                output += "\\b";
79                break;
80            case '\f':
81                output += "\\f";
82                break;
83            case '\n':
84                output += "\\n";
85                break;
86            case '\r':
87                output += "\\r";
88                break;
89            case '\t':
90                output += "\\t";
91                break;
92            case ZERO_FIRST:
93                output += "\\u0000";
94                ++i;
95                break;
96            case ALONE_SURROGATE_3B_FIRST:
97                if (i + 2 < str.size() && // 2: Check 2 more characters
98                    str[i + 1] >= ALONE_SURROGATE_3B_SECOND_START && // 1: The first character after ch
99                    str[i + 1] <= ALONE_SURROGATE_3B_SECOND_END && // 1: The first character after ch
100                    str[i + 2] >= ALONE_SURROGATE_3B_THIRD_START && // 2: The second character after ch
101                    str[i + 2] <= ALONE_SURROGATE_3B_THIRD_END) {   // 2: The second character after ch
102                    auto unicodeRes = utf_helper::ConvertUtf8ToUnicodeChar(
103                        reinterpret_cast<const uint8_t*>(str.c_str() + i), 3); // 3: Parse 3 characters
104                    AppendUnicodeEscape(static_cast<int>(unicodeRes.first), output);
105                    i += 2; // 2 : Skip 2 characters
106                    break;
107                }
108                [[fallthrough]];
109            default:
110                if (ch > 0 && ch < CODE_SPACE) {
111                    AppendUnicodeEscape(static_cast<int>(ch), output);
112                } else {
113                    output += ch;
114                }
115        }
116    }
117    output += "\"";
118}
119} // namespace panda::ecmascript::base