1/* 2 * Copyright (c) 2023-2024 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#include "ecmascript/base/json_helper.h" 17#include "ecmascript/base/utf_helper.h" 18 19#include <algorithm> 20#include <iomanip> 21#include <sstream> 22 23namespace panda::ecmascript::base { 24constexpr unsigned char CODE_SPACE = 0x20; 25constexpr char ZERO_FIRST = static_cast<char>(0xc0); // \u0000 => c0 80 26constexpr char ALONE_SURROGATE_3B_FIRST = static_cast<char>(0xed); 27constexpr char ALONE_SURROGATE_3B_SECOND_START = static_cast<char>(0xa0); 28constexpr char ALONE_SURROGATE_3B_SECOND_END = static_cast<char>(0xbf); 29constexpr char ALONE_SURROGATE_3B_THIRD_START = static_cast<char>(0x80); 30constexpr char ALONE_SURROGATE_3B_THIRD_END = static_cast<char>(0xbf); 31 32bool JsonHelper::IsFastValueToQuotedString(const CString& str) 33{ 34 for (const auto ch : str) { 35 switch (ch) { 36 case '\"': 37 case '\\': 38 case '\b': 39 case '\f': 40 case '\n': 41 case '\r': 42 case '\t': 43 case ZERO_FIRST: 44 case ALONE_SURROGATE_3B_FIRST: 45 return false; 46 default: 47 if (ch > 0 && ch < CODE_SPACE) { 48 return false; 49 } 50 break; 51 } 52 } 53 return true; 54} 55 56// String values are wrapped in QUOTATION MARK (") code units. The code units " and \ are escaped with \ prefixes. 57// Control characters code units are replaced with escape sequences \uHHHH, or with the shorter forms, 58// \b (BACKSPACE), \f (FORM FEED), \n (LINE FEED), \r (CARRIAGE RETURN), \t (CHARACTER TABULATION). 59void JsonHelper::AppendValueToQuotedString(const CString& str, CString& output) 60{ 61 output += "\""; 62 bool isFast = IsFastValueToQuotedString(str); // fast mode 63 if (isFast) { 64 output += str; 65 output += "\""; 66 return; 67 } 68 for (uint32_t i = 0; i < str.size(); ++i) { 69 auto ch = str[i]; 70 switch (ch) { 71 case '\"': 72 output += "\\\""; 73 break; 74 case '\\': 75 output += "\\\\"; 76 break; 77 case '\b': 78 output += "\\b"; 79 break; 80 case '\f': 81 output += "\\f"; 82 break; 83 case '\n': 84 output += "\\n"; 85 break; 86 case '\r': 87 output += "\\r"; 88 break; 89 case '\t': 90 output += "\\t"; 91 break; 92 case ZERO_FIRST: 93 output += "\\u0000"; 94 ++i; 95 break; 96 case ALONE_SURROGATE_3B_FIRST: 97 if (i + 2 < str.size() && // 2: Check 2 more characters 98 str[i + 1] >= ALONE_SURROGATE_3B_SECOND_START && // 1: The first character after ch 99 str[i + 1] <= ALONE_SURROGATE_3B_SECOND_END && // 1: The first character after ch 100 str[i + 2] >= ALONE_SURROGATE_3B_THIRD_START && // 2: The second character after ch 101 str[i + 2] <= ALONE_SURROGATE_3B_THIRD_END) { // 2: The second character after ch 102 auto unicodeRes = utf_helper::ConvertUtf8ToUnicodeChar( 103 reinterpret_cast<const uint8_t*>(str.c_str() + i), 3); // 3: Parse 3 characters 104 AppendUnicodeEscape(static_cast<int>(unicodeRes.first), output); 105 i += 2; // 2 : Skip 2 characters 106 break; 107 } 108 [[fallthrough]]; 109 default: 110 if (ch > 0 && ch < CODE_SPACE) { 111 AppendUnicodeEscape(static_cast<int>(ch), output); 112 } else { 113 output += ch; 114 } 115 } 116 } 117 output += "\""; 118} 119} // namespace panda::ecmascript::base