1/* 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#include "ustring.h" 17 18#include <iterator> 19 20namespace panda::es2panda::util { 21 22std::string StringView::Mutf8() const noexcept 23{ 24 std::string mutf8; 25 mutf8.reserve(sv_.size()); 26 27 Iterator iter(*this); 28 29 while (iter.HasNext()) { 30 Mutf8Encode(&mutf8, iter.Next()); 31 } 32 33 return mutf8; 34} 35 36char32_t StringView::DecodeSurrogates(char32_t high, char32_t low) 37{ 38 constexpr auto OFFSET = 10; 39 char32_t result = (high - Constants::SURROGATE_HIGH_MIN) << OFFSET; 40 result += low - Constants::SURROGATE_LOW_MAX; 41 result += Constants::CELESTIAL_OFFSET; 42 return result; 43} 44 45std::tuple<char32_t, char32_t> StringView::EncodeSurrogate(char32_t cp) 46{ 47 constexpr auto OFFSET = 10; 48 char32_t cu1 = ((cp - Constants::CELESTIAL_OFFSET) >> OFFSET) | Constants::SURROGATE_HIGH_MIN; 49 char32_t cu2 = (cp & Constants::SURROGATE_LOW_MARKER) | Constants::SURROGATE_HIGH_MAX; 50 51 return {cu1, cu2}; 52} 53 54void StringView::Iterator::SkipCp() const 55{ 56 if (!HasNext()) { 57 return; 58 } 59 60 char32_t cu0 = static_cast<uint8_t>(*iter_++); 61 62 if (cu0 < Constants::UTF8_1BYTE_LIMIT) { 63 return; 64 } 65 66 if ((cu0 & Constants::UTF8_3BYTE_HEADER) == Constants::UTF8_2BYTE_HEADER) { 67 iter_ += 1U; 68 return; 69 } 70 71 if ((cu0 & Constants::UTF8_4BYTE_HEADER) == Constants::UTF8_3BYTE_HEADER) { 72 iter_ += 2U; 73 return; 74 } 75 76 if (((cu0 & Constants::UTF8_DECODE_4BYTE_MASK) == Constants::UTF8_4BYTE_HEADER) && 77 (cu0 <= Constants::UTF8_DECODE_4BYTE_LIMIT)) { 78 iter_ += 3U; 79 return; 80 } 81} 82 83} // namespace panda::es2panda::util 84 85// NOLINTNEXTLINE(cert-dcl58-cpp) 86namespace std { 87 88ostream &operator<<(ostream &os, const panda::es2panda::util::StringView &us) 89{ 90 os << us.Utf8(); 91 return os; 92} 93 94} // namespace std 95