16d528ed9Sopenharmony_ci// Copyright 2013 The Chromium Authors. All rights reserved. 26d528ed9Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 36d528ed9Sopenharmony_ci// found in the LICENSE file. 46d528ed9Sopenharmony_ci 56d528ed9Sopenharmony_ci#include "base/strings/string_util.h" 66d528ed9Sopenharmony_ci 76d528ed9Sopenharmony_ci#include <ctype.h> 86d528ed9Sopenharmony_ci#include <errno.h> 96d528ed9Sopenharmony_ci#include <math.h> 106d528ed9Sopenharmony_ci#include <stdarg.h> 116d528ed9Sopenharmony_ci#include <stdint.h> 126d528ed9Sopenharmony_ci#include <stdio.h> 136d528ed9Sopenharmony_ci#include <stdlib.h> 146d528ed9Sopenharmony_ci#include <string.h> 156d528ed9Sopenharmony_ci#include <time.h> 166d528ed9Sopenharmony_ci#include <wchar.h> 176d528ed9Sopenharmony_ci#include <wctype.h> 186d528ed9Sopenharmony_ci 196d528ed9Sopenharmony_ci#include <algorithm> 206d528ed9Sopenharmony_ci#include <iterator> 216d528ed9Sopenharmony_ci#include <limits> 226d528ed9Sopenharmony_ci#include <string> 236d528ed9Sopenharmony_ci#include <vector> 246d528ed9Sopenharmony_ci 256d528ed9Sopenharmony_ci#include "base/logging.h" 266d528ed9Sopenharmony_ci#include "base/strings/utf_string_conversion_utils.h" 276d528ed9Sopenharmony_ci#include "base/strings/utf_string_conversions.h" 286d528ed9Sopenharmony_ci#include "base/third_party/icu/icu_utf.h" 296d528ed9Sopenharmony_ci#include "util/build_config.h" 306d528ed9Sopenharmony_ci 316d528ed9Sopenharmony_cinamespace base { 326d528ed9Sopenharmony_ci 336d528ed9Sopenharmony_cinamespace { 346d528ed9Sopenharmony_ci 356d528ed9Sopenharmony_ci// Used by ReplaceStringPlaceholders to track the position in the string of 366d528ed9Sopenharmony_ci// replaced parameters. 376d528ed9Sopenharmony_cistruct ReplacementOffset { 386d528ed9Sopenharmony_ci ReplacementOffset(uintptr_t parameter, size_t offset) 396d528ed9Sopenharmony_ci : parameter(parameter), offset(offset) {} 406d528ed9Sopenharmony_ci 416d528ed9Sopenharmony_ci // Index of the parameter. 426d528ed9Sopenharmony_ci uintptr_t parameter; 436d528ed9Sopenharmony_ci 446d528ed9Sopenharmony_ci // Starting position in the string. 456d528ed9Sopenharmony_ci size_t offset; 466d528ed9Sopenharmony_ci}; 476d528ed9Sopenharmony_ci 486d528ed9Sopenharmony_cistatic bool CompareParameter(const ReplacementOffset& elem1, 496d528ed9Sopenharmony_ci const ReplacementOffset& elem2) { 506d528ed9Sopenharmony_ci return elem1.parameter < elem2.parameter; 516d528ed9Sopenharmony_ci} 526d528ed9Sopenharmony_ci 536d528ed9Sopenharmony_ci// Assuming that a pointer is the size of a "machine word", then 546d528ed9Sopenharmony_ci// uintptr_t is an integer type that is also a machine word. 556d528ed9Sopenharmony_citypedef uintptr_t MachineWord; 566d528ed9Sopenharmony_ciconst uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1; 576d528ed9Sopenharmony_ci 586d528ed9Sopenharmony_ciinline bool IsAlignedToMachineWord(const void* pointer) { 596d528ed9Sopenharmony_ci return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask); 606d528ed9Sopenharmony_ci} 616d528ed9Sopenharmony_ci 626d528ed9Sopenharmony_citemplate <typename T> 636d528ed9Sopenharmony_ciinline T* AlignToMachineWord(T* pointer) { 646d528ed9Sopenharmony_ci return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) & 656d528ed9Sopenharmony_ci ~kMachineWordAlignmentMask); 666d528ed9Sopenharmony_ci} 676d528ed9Sopenharmony_ci 686d528ed9Sopenharmony_citemplate <size_t size, typename CharacterType> 696d528ed9Sopenharmony_cistruct NonASCIIMask; 706d528ed9Sopenharmony_citemplate <> 716d528ed9Sopenharmony_cistruct NonASCIIMask<4, char16_t> { 726d528ed9Sopenharmony_ci static inline uint32_t value() { return 0xFF80FF80U; } 736d528ed9Sopenharmony_ci}; 746d528ed9Sopenharmony_citemplate <> 756d528ed9Sopenharmony_cistruct NonASCIIMask<4, char> { 766d528ed9Sopenharmony_ci static inline uint32_t value() { return 0x80808080U; } 776d528ed9Sopenharmony_ci}; 786d528ed9Sopenharmony_citemplate <> 796d528ed9Sopenharmony_cistruct NonASCIIMask<8, char16_t> { 806d528ed9Sopenharmony_ci static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; } 816d528ed9Sopenharmony_ci}; 826d528ed9Sopenharmony_citemplate <> 836d528ed9Sopenharmony_cistruct NonASCIIMask<8, char> { 846d528ed9Sopenharmony_ci static inline uint64_t value() { return 0x8080808080808080ULL; } 856d528ed9Sopenharmony_ci}; 866d528ed9Sopenharmony_ci 876d528ed9Sopenharmony_ci} // namespace 886d528ed9Sopenharmony_ci 896d528ed9Sopenharmony_cinamespace { 906d528ed9Sopenharmony_ci 916d528ed9Sopenharmony_citemplate <typename StringType> 926d528ed9Sopenharmony_ciStringType ToLowerASCIIImpl( 936d528ed9Sopenharmony_ci std::basic_string_view<typename StringType::value_type> str) { 946d528ed9Sopenharmony_ci StringType ret; 956d528ed9Sopenharmony_ci ret.reserve(str.size()); 966d528ed9Sopenharmony_ci for (size_t i = 0; i < str.size(); i++) 976d528ed9Sopenharmony_ci ret.push_back(ToLowerASCII(str[i])); 986d528ed9Sopenharmony_ci return ret; 996d528ed9Sopenharmony_ci} 1006d528ed9Sopenharmony_ci 1016d528ed9Sopenharmony_citemplate <typename StringType> 1026d528ed9Sopenharmony_ciStringType ToUpperASCIIImpl( 1036d528ed9Sopenharmony_ci std::basic_string_view<typename StringType::value_type> str) { 1046d528ed9Sopenharmony_ci StringType ret; 1056d528ed9Sopenharmony_ci ret.reserve(str.size()); 1066d528ed9Sopenharmony_ci for (size_t i = 0; i < str.size(); i++) 1076d528ed9Sopenharmony_ci ret.push_back(ToUpperASCII(str[i])); 1086d528ed9Sopenharmony_ci return ret; 1096d528ed9Sopenharmony_ci} 1106d528ed9Sopenharmony_ci 1116d528ed9Sopenharmony_ci} // namespace 1126d528ed9Sopenharmony_ci 1136d528ed9Sopenharmony_cistd::string ToLowerASCII(std::string_view str) { 1146d528ed9Sopenharmony_ci return ToLowerASCIIImpl<std::string>(str); 1156d528ed9Sopenharmony_ci} 1166d528ed9Sopenharmony_ci 1176d528ed9Sopenharmony_cistd::u16string ToLowerASCII(std::u16string_view str) { 1186d528ed9Sopenharmony_ci return ToLowerASCIIImpl<std::u16string>(str); 1196d528ed9Sopenharmony_ci} 1206d528ed9Sopenharmony_ci 1216d528ed9Sopenharmony_cistd::string ToUpperASCII(std::string_view str) { 1226d528ed9Sopenharmony_ci return ToUpperASCIIImpl<std::string>(str); 1236d528ed9Sopenharmony_ci} 1246d528ed9Sopenharmony_ci 1256d528ed9Sopenharmony_cistd::u16string ToUpperASCII(std::u16string_view str) { 1266d528ed9Sopenharmony_ci return ToUpperASCIIImpl<std::u16string>(str); 1276d528ed9Sopenharmony_ci} 1286d528ed9Sopenharmony_ci 1296d528ed9Sopenharmony_cibool starts_with(const std::string_view str1, const std::string_view str2) { 1306d528ed9Sopenharmony_ci if (str2.length() > str1.length()) { 1316d528ed9Sopenharmony_ci return false; 1326d528ed9Sopenharmony_ci } 1336d528ed9Sopenharmony_ci return str1.compare(0, str2.length(), str2) == 0; 1346d528ed9Sopenharmony_ci} 1356d528ed9Sopenharmony_ci 1366d528ed9Sopenharmony_cibool ends_with(const std::string_view str1, const std::string_view str2) { 1376d528ed9Sopenharmony_ci if (str2.empty()) { 1386d528ed9Sopenharmony_ci return true; 1396d528ed9Sopenharmony_ci } 1406d528ed9Sopenharmony_ci if (str1.length() < str2.length()) { 1416d528ed9Sopenharmony_ci return false; 1426d528ed9Sopenharmony_ci } 1436d528ed9Sopenharmony_ci return str1.substr(str1.length() - str2.length()) == str2; 1446d528ed9Sopenharmony_ci} 1456d528ed9Sopenharmony_ci 1466d528ed9Sopenharmony_citemplate <class StringType> 1476d528ed9Sopenharmony_ciint CompareCaseInsensitiveASCIIT( 1486d528ed9Sopenharmony_ci std::basic_string_view<typename StringType::value_type> a, 1496d528ed9Sopenharmony_ci std::basic_string_view<typename StringType::value_type> b) { 1506d528ed9Sopenharmony_ci // Find the first characters that aren't equal and compare them. If the end 1516d528ed9Sopenharmony_ci // of one of the strings is found before a nonequal character, the lengths 1526d528ed9Sopenharmony_ci // of the strings are compared. 1536d528ed9Sopenharmony_ci size_t i = 0; 1546d528ed9Sopenharmony_ci while (i < a.length() && i < b.length()) { 1556d528ed9Sopenharmony_ci typename StringType::value_type lower_a = ToLowerASCII(a[i]); 1566d528ed9Sopenharmony_ci typename StringType::value_type lower_b = ToLowerASCII(b[i]); 1576d528ed9Sopenharmony_ci if (lower_a < lower_b) 1586d528ed9Sopenharmony_ci return -1; 1596d528ed9Sopenharmony_ci if (lower_a > lower_b) 1606d528ed9Sopenharmony_ci return 1; 1616d528ed9Sopenharmony_ci i++; 1626d528ed9Sopenharmony_ci } 1636d528ed9Sopenharmony_ci 1646d528ed9Sopenharmony_ci // End of one string hit before finding a different character. Expect the 1656d528ed9Sopenharmony_ci // common case to be "strings equal" at this point so check that first. 1666d528ed9Sopenharmony_ci if (a.length() == b.length()) 1676d528ed9Sopenharmony_ci return 0; 1686d528ed9Sopenharmony_ci 1696d528ed9Sopenharmony_ci if (a.length() < b.length()) 1706d528ed9Sopenharmony_ci return -1; 1716d528ed9Sopenharmony_ci return 1; 1726d528ed9Sopenharmony_ci} 1736d528ed9Sopenharmony_ci 1746d528ed9Sopenharmony_ciint CompareCaseInsensitiveASCII(std::string_view a, std::string_view b) { 1756d528ed9Sopenharmony_ci return CompareCaseInsensitiveASCIIT<std::string>(a, b); 1766d528ed9Sopenharmony_ci} 1776d528ed9Sopenharmony_ci 1786d528ed9Sopenharmony_ciint CompareCaseInsensitiveASCII(std::u16string_view a, std::u16string_view b) { 1796d528ed9Sopenharmony_ci return CompareCaseInsensitiveASCIIT<std::u16string>(a, b); 1806d528ed9Sopenharmony_ci} 1816d528ed9Sopenharmony_ci 1826d528ed9Sopenharmony_cibool EqualsCaseInsensitiveASCII(std::string_view a, std::string_view b) { 1836d528ed9Sopenharmony_ci if (a.length() != b.length()) 1846d528ed9Sopenharmony_ci return false; 1856d528ed9Sopenharmony_ci return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0; 1866d528ed9Sopenharmony_ci} 1876d528ed9Sopenharmony_ci 1886d528ed9Sopenharmony_cibool EqualsCaseInsensitiveASCII(std::u16string_view a, std::u16string_view b) { 1896d528ed9Sopenharmony_ci if (a.length() != b.length()) 1906d528ed9Sopenharmony_ci return false; 1916d528ed9Sopenharmony_ci return CompareCaseInsensitiveASCIIT<std::u16string>(a, b) == 0; 1926d528ed9Sopenharmony_ci} 1936d528ed9Sopenharmony_ci 1946d528ed9Sopenharmony_citemplate <class StringType> 1956d528ed9Sopenharmony_cibool ReplaceCharsT( 1966d528ed9Sopenharmony_ci const StringType& input, 1976d528ed9Sopenharmony_ci std::basic_string_view<typename StringType::value_type> find_any_of_these, 1986d528ed9Sopenharmony_ci std::basic_string_view<typename StringType::value_type> replace_with, 1996d528ed9Sopenharmony_ci StringType* output); 2006d528ed9Sopenharmony_ci 2016d528ed9Sopenharmony_cibool ReplaceChars(const std::u16string& input, 2026d528ed9Sopenharmony_ci std::u16string_view replace_chars, 2036d528ed9Sopenharmony_ci const std::u16string& replace_with, 2046d528ed9Sopenharmony_ci std::u16string* output) { 2056d528ed9Sopenharmony_ci return ReplaceCharsT(input, replace_chars, std::u16string_view(replace_with), 2066d528ed9Sopenharmony_ci output); 2076d528ed9Sopenharmony_ci} 2086d528ed9Sopenharmony_ci 2096d528ed9Sopenharmony_cibool ReplaceChars(const std::string& input, 2106d528ed9Sopenharmony_ci std::string_view replace_chars, 2116d528ed9Sopenharmony_ci const std::string& replace_with, 2126d528ed9Sopenharmony_ci std::string* output) { 2136d528ed9Sopenharmony_ci return ReplaceCharsT(input, replace_chars, std::string_view(replace_with), 2146d528ed9Sopenharmony_ci output); 2156d528ed9Sopenharmony_ci} 2166d528ed9Sopenharmony_ci 2176d528ed9Sopenharmony_cibool RemoveChars(const std::u16string& input, 2186d528ed9Sopenharmony_ci std::u16string_view remove_chars, 2196d528ed9Sopenharmony_ci std::u16string* output) { 2206d528ed9Sopenharmony_ci return ReplaceCharsT(input, remove_chars, std::u16string_view(), output); 2216d528ed9Sopenharmony_ci} 2226d528ed9Sopenharmony_ci 2236d528ed9Sopenharmony_cibool RemoveChars(const std::string& input, 2246d528ed9Sopenharmony_ci std::string_view remove_chars, 2256d528ed9Sopenharmony_ci std::string* output) { 2266d528ed9Sopenharmony_ci return ReplaceCharsT(input, remove_chars, std::string_view(), output); 2276d528ed9Sopenharmony_ci} 2286d528ed9Sopenharmony_ci 2296d528ed9Sopenharmony_citemplate <typename Str> 2306d528ed9Sopenharmony_ciTrimPositions TrimStringT( 2316d528ed9Sopenharmony_ci const Str& input, 2326d528ed9Sopenharmony_ci std::basic_string_view<typename Str::value_type> trim_chars, 2336d528ed9Sopenharmony_ci TrimPositions positions, 2346d528ed9Sopenharmony_ci Str* output) { 2356d528ed9Sopenharmony_ci // Find the edges of leading/trailing whitespace as desired. Need to use 2366d528ed9Sopenharmony_ci // a std::string_view version of input to be able to call find* on it with the 2376d528ed9Sopenharmony_ci // std::string_view version of trim_chars (normally the trim_chars will be a 2386d528ed9Sopenharmony_ci // constant so avoid making a copy). 2396d528ed9Sopenharmony_ci std::basic_string_view<typename Str::value_type> input_piece(input); 2406d528ed9Sopenharmony_ci const size_t last_char = input.length() - 1; 2416d528ed9Sopenharmony_ci const size_t first_good_char = (positions & TRIM_LEADING) 2426d528ed9Sopenharmony_ci ? input_piece.find_first_not_of(trim_chars) 2436d528ed9Sopenharmony_ci : 0; 2446d528ed9Sopenharmony_ci const size_t last_good_char = (positions & TRIM_TRAILING) 2456d528ed9Sopenharmony_ci ? input_piece.find_last_not_of(trim_chars) 2466d528ed9Sopenharmony_ci : last_char; 2476d528ed9Sopenharmony_ci 2486d528ed9Sopenharmony_ci // When the string was all trimmed, report that we stripped off characters 2496d528ed9Sopenharmony_ci // from whichever position the caller was interested in. For empty input, we 2506d528ed9Sopenharmony_ci // stripped no characters, but we still need to clear |output|. 2516d528ed9Sopenharmony_ci if (input.empty() || (first_good_char == Str::npos) || 2526d528ed9Sopenharmony_ci (last_good_char == Str::npos)) { 2536d528ed9Sopenharmony_ci bool input_was_empty = input.empty(); // in case output == &input 2546d528ed9Sopenharmony_ci output->clear(); 2556d528ed9Sopenharmony_ci return input_was_empty ? TRIM_NONE : positions; 2566d528ed9Sopenharmony_ci } 2576d528ed9Sopenharmony_ci 2586d528ed9Sopenharmony_ci // Trim. 2596d528ed9Sopenharmony_ci *output = input.substr(first_good_char, last_good_char - first_good_char + 1); 2606d528ed9Sopenharmony_ci 2616d528ed9Sopenharmony_ci // Return where we trimmed from. 2626d528ed9Sopenharmony_ci return static_cast<TrimPositions>( 2636d528ed9Sopenharmony_ci ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | 2646d528ed9Sopenharmony_ci ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); 2656d528ed9Sopenharmony_ci} 2666d528ed9Sopenharmony_ci 2676d528ed9Sopenharmony_cibool TrimString(const std::u16string& input, 2686d528ed9Sopenharmony_ci std::u16string_view trim_chars, 2696d528ed9Sopenharmony_ci std::u16string* output) { 2706d528ed9Sopenharmony_ci return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 2716d528ed9Sopenharmony_ci} 2726d528ed9Sopenharmony_ci 2736d528ed9Sopenharmony_cibool TrimString(const std::string& input, 2746d528ed9Sopenharmony_ci std::string_view trim_chars, 2756d528ed9Sopenharmony_ci std::string* output) { 2766d528ed9Sopenharmony_ci return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 2776d528ed9Sopenharmony_ci} 2786d528ed9Sopenharmony_ci 2796d528ed9Sopenharmony_citemplate <typename char_type> 2806d528ed9Sopenharmony_cistd::basic_string_view<char_type> TrimStringPieceT( 2816d528ed9Sopenharmony_ci std::basic_string_view<char_type> input, 2826d528ed9Sopenharmony_ci std::basic_string_view<char_type> trim_chars, 2836d528ed9Sopenharmony_ci TrimPositions positions) { 2846d528ed9Sopenharmony_ci size_t begin = 2856d528ed9Sopenharmony_ci (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0; 2866d528ed9Sopenharmony_ci if (begin == std::basic_string_view<char_type>::npos) 2876d528ed9Sopenharmony_ci return std::basic_string_view<char_type>(); // All trimmed. 2886d528ed9Sopenharmony_ci 2896d528ed9Sopenharmony_ci size_t end = (positions & TRIM_TRAILING) 2906d528ed9Sopenharmony_ci ? input.find_last_not_of(trim_chars) + 1 2916d528ed9Sopenharmony_ci : input.size(); 2926d528ed9Sopenharmony_ci return input.substr(begin, end - begin); 2936d528ed9Sopenharmony_ci} 2946d528ed9Sopenharmony_ci 2956d528ed9Sopenharmony_cistd::u16string_view TrimString(std::u16string_view input, 2966d528ed9Sopenharmony_ci std::u16string_view trim_chars, 2976d528ed9Sopenharmony_ci TrimPositions positions) { 2986d528ed9Sopenharmony_ci return TrimStringPieceT(input, trim_chars, positions); 2996d528ed9Sopenharmony_ci} 3006d528ed9Sopenharmony_ci 3016d528ed9Sopenharmony_cistd::string_view TrimString(std::string_view input, 3026d528ed9Sopenharmony_ci std::string_view trim_chars, 3036d528ed9Sopenharmony_ci TrimPositions positions) { 3046d528ed9Sopenharmony_ci return TrimStringPieceT(input, trim_chars, positions); 3056d528ed9Sopenharmony_ci} 3066d528ed9Sopenharmony_ci 3076d528ed9Sopenharmony_civoid TruncateUTF8ToByteSize(const std::string& input, 3086d528ed9Sopenharmony_ci const size_t byte_size, 3096d528ed9Sopenharmony_ci std::string* output) { 3106d528ed9Sopenharmony_ci DCHECK(output); 3116d528ed9Sopenharmony_ci if (byte_size > input.length()) { 3126d528ed9Sopenharmony_ci *output = input; 3136d528ed9Sopenharmony_ci return; 3146d528ed9Sopenharmony_ci } 3156d528ed9Sopenharmony_ci DCHECK_LE(byte_size, 3166d528ed9Sopenharmony_ci static_cast<uint32_t>(std::numeric_limits<int32_t>::max())); 3176d528ed9Sopenharmony_ci // Note: This cast is necessary because CBU8_NEXT uses int32_ts. 3186d528ed9Sopenharmony_ci int32_t truncation_length = static_cast<int32_t>(byte_size); 3196d528ed9Sopenharmony_ci int32_t char_index = truncation_length - 1; 3206d528ed9Sopenharmony_ci const char* data = input.data(); 3216d528ed9Sopenharmony_ci 3226d528ed9Sopenharmony_ci // Using CBU8, we will move backwards from the truncation point 3236d528ed9Sopenharmony_ci // to the beginning of the string looking for a valid UTF8 3246d528ed9Sopenharmony_ci // character. Once a full UTF8 character is found, we will 3256d528ed9Sopenharmony_ci // truncate the string to the end of that character. 3266d528ed9Sopenharmony_ci while (char_index >= 0) { 3276d528ed9Sopenharmony_ci int32_t prev = char_index; 3286d528ed9Sopenharmony_ci base_icu::UChar32 code_point = 0; 3296d528ed9Sopenharmony_ci CBU8_NEXT(data, char_index, truncation_length, code_point); 3306d528ed9Sopenharmony_ci if (!IsValidCharacter(code_point) || !IsValidCodepoint(code_point)) { 3316d528ed9Sopenharmony_ci char_index = prev - 1; 3326d528ed9Sopenharmony_ci } else { 3336d528ed9Sopenharmony_ci break; 3346d528ed9Sopenharmony_ci } 3356d528ed9Sopenharmony_ci } 3366d528ed9Sopenharmony_ci 3376d528ed9Sopenharmony_ci if (char_index >= 0) 3386d528ed9Sopenharmony_ci *output = input.substr(0, char_index); 3396d528ed9Sopenharmony_ci else 3406d528ed9Sopenharmony_ci output->clear(); 3416d528ed9Sopenharmony_ci} 3426d528ed9Sopenharmony_ci 3436d528ed9Sopenharmony_ciTrimPositions TrimWhitespace(const std::u16string& input, 3446d528ed9Sopenharmony_ci TrimPositions positions, 3456d528ed9Sopenharmony_ci std::u16string* output) { 3466d528ed9Sopenharmony_ci return TrimStringT(input, std::u16string_view(kWhitespaceUTF16), positions, 3476d528ed9Sopenharmony_ci output); 3486d528ed9Sopenharmony_ci} 3496d528ed9Sopenharmony_ci 3506d528ed9Sopenharmony_cistd::u16string_view TrimWhitespace(std::u16string_view input, 3516d528ed9Sopenharmony_ci TrimPositions positions) { 3526d528ed9Sopenharmony_ci return TrimStringPieceT(input, std::u16string_view(kWhitespaceUTF16), 3536d528ed9Sopenharmony_ci positions); 3546d528ed9Sopenharmony_ci} 3556d528ed9Sopenharmony_ci 3566d528ed9Sopenharmony_ciTrimPositions TrimWhitespaceASCII(const std::string& input, 3576d528ed9Sopenharmony_ci TrimPositions positions, 3586d528ed9Sopenharmony_ci std::string* output) { 3596d528ed9Sopenharmony_ci return TrimStringT(input, std::string_view(kWhitespaceASCII), positions, 3606d528ed9Sopenharmony_ci output); 3616d528ed9Sopenharmony_ci} 3626d528ed9Sopenharmony_ci 3636d528ed9Sopenharmony_cistd::string_view TrimWhitespaceASCII(std::string_view input, 3646d528ed9Sopenharmony_ci TrimPositions positions) { 3656d528ed9Sopenharmony_ci return TrimStringPieceT(input, std::string_view(kWhitespaceASCII), positions); 3666d528ed9Sopenharmony_ci} 3676d528ed9Sopenharmony_ci 3686d528ed9Sopenharmony_citemplate <typename STR> 3696d528ed9Sopenharmony_ciSTR CollapseWhitespaceT(const STR& text, bool trim_sequences_with_line_breaks) { 3706d528ed9Sopenharmony_ci STR result; 3716d528ed9Sopenharmony_ci result.resize(text.size()); 3726d528ed9Sopenharmony_ci 3736d528ed9Sopenharmony_ci // Set flags to pretend we're already in a trimmed whitespace sequence, so we 3746d528ed9Sopenharmony_ci // will trim any leading whitespace. 3756d528ed9Sopenharmony_ci bool in_whitespace = true; 3766d528ed9Sopenharmony_ci bool already_trimmed = true; 3776d528ed9Sopenharmony_ci 3786d528ed9Sopenharmony_ci int chars_written = 0; 3796d528ed9Sopenharmony_ci for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { 3806d528ed9Sopenharmony_ci if (IsUnicodeWhitespace(*i)) { 3816d528ed9Sopenharmony_ci if (!in_whitespace) { 3826d528ed9Sopenharmony_ci // Reduce all whitespace sequences to a single space. 3836d528ed9Sopenharmony_ci in_whitespace = true; 3846d528ed9Sopenharmony_ci result[chars_written++] = L' '; 3856d528ed9Sopenharmony_ci } 3866d528ed9Sopenharmony_ci if (trim_sequences_with_line_breaks && !already_trimmed && 3876d528ed9Sopenharmony_ci ((*i == '\n') || (*i == '\r'))) { 3886d528ed9Sopenharmony_ci // Whitespace sequences containing CR or LF are eliminated entirely. 3896d528ed9Sopenharmony_ci already_trimmed = true; 3906d528ed9Sopenharmony_ci --chars_written; 3916d528ed9Sopenharmony_ci } 3926d528ed9Sopenharmony_ci } else { 3936d528ed9Sopenharmony_ci // Non-whitespace characters are copied straight across. 3946d528ed9Sopenharmony_ci in_whitespace = false; 3956d528ed9Sopenharmony_ci already_trimmed = false; 3966d528ed9Sopenharmony_ci result[chars_written++] = *i; 3976d528ed9Sopenharmony_ci } 3986d528ed9Sopenharmony_ci } 3996d528ed9Sopenharmony_ci 4006d528ed9Sopenharmony_ci if (in_whitespace && !already_trimmed) { 4016d528ed9Sopenharmony_ci // Any trailing whitespace is eliminated. 4026d528ed9Sopenharmony_ci --chars_written; 4036d528ed9Sopenharmony_ci } 4046d528ed9Sopenharmony_ci 4056d528ed9Sopenharmony_ci result.resize(chars_written); 4066d528ed9Sopenharmony_ci return result; 4076d528ed9Sopenharmony_ci} 4086d528ed9Sopenharmony_ci 4096d528ed9Sopenharmony_cistd::u16string CollapseWhitespace(const std::u16string& text, 4106d528ed9Sopenharmony_ci bool trim_sequences_with_line_breaks) { 4116d528ed9Sopenharmony_ci return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 4126d528ed9Sopenharmony_ci} 4136d528ed9Sopenharmony_ci 4146d528ed9Sopenharmony_cistd::string CollapseWhitespaceASCII(const std::string& text, 4156d528ed9Sopenharmony_ci bool trim_sequences_with_line_breaks) { 4166d528ed9Sopenharmony_ci return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 4176d528ed9Sopenharmony_ci} 4186d528ed9Sopenharmony_ci 4196d528ed9Sopenharmony_cibool ContainsOnlyChars(std::string_view input, std::string_view characters) { 4206d528ed9Sopenharmony_ci return input.find_first_not_of(characters) == std::string_view::npos; 4216d528ed9Sopenharmony_ci} 4226d528ed9Sopenharmony_ci 4236d528ed9Sopenharmony_cibool ContainsOnlyChars(std::u16string_view input, 4246d528ed9Sopenharmony_ci std::u16string_view characters) { 4256d528ed9Sopenharmony_ci return input.find_first_not_of(characters) == std::u16string_view::npos; 4266d528ed9Sopenharmony_ci} 4276d528ed9Sopenharmony_ci 4286d528ed9Sopenharmony_citemplate <class Char> 4296d528ed9Sopenharmony_ciinline bool DoIsStringASCII(const Char* characters, size_t length) { 4306d528ed9Sopenharmony_ci MachineWord all_char_bits = 0; 4316d528ed9Sopenharmony_ci const Char* end = characters + length; 4326d528ed9Sopenharmony_ci 4336d528ed9Sopenharmony_ci // Prologue: align the input. 4346d528ed9Sopenharmony_ci while (!IsAlignedToMachineWord(characters) && characters != end) { 4356d528ed9Sopenharmony_ci all_char_bits |= *characters; 4366d528ed9Sopenharmony_ci ++characters; 4376d528ed9Sopenharmony_ci } 4386d528ed9Sopenharmony_ci 4396d528ed9Sopenharmony_ci // Compare the values of CPU word size. 4406d528ed9Sopenharmony_ci const Char* word_end = AlignToMachineWord(end); 4416d528ed9Sopenharmony_ci const size_t loop_increment = sizeof(MachineWord) / sizeof(Char); 4426d528ed9Sopenharmony_ci while (characters < word_end) { 4436d528ed9Sopenharmony_ci all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); 4446d528ed9Sopenharmony_ci characters += loop_increment; 4456d528ed9Sopenharmony_ci } 4466d528ed9Sopenharmony_ci 4476d528ed9Sopenharmony_ci // Process the remaining bytes. 4486d528ed9Sopenharmony_ci while (characters != end) { 4496d528ed9Sopenharmony_ci all_char_bits |= *characters; 4506d528ed9Sopenharmony_ci ++characters; 4516d528ed9Sopenharmony_ci } 4526d528ed9Sopenharmony_ci 4536d528ed9Sopenharmony_ci MachineWord non_ascii_bit_mask = 4546d528ed9Sopenharmony_ci NonASCIIMask<sizeof(MachineWord), Char>::value(); 4556d528ed9Sopenharmony_ci return !(all_char_bits & non_ascii_bit_mask); 4566d528ed9Sopenharmony_ci} 4576d528ed9Sopenharmony_ci 4586d528ed9Sopenharmony_cibool IsStringASCII(std::string_view str) { 4596d528ed9Sopenharmony_ci return DoIsStringASCII(str.data(), str.length()); 4606d528ed9Sopenharmony_ci} 4616d528ed9Sopenharmony_ci 4626d528ed9Sopenharmony_cibool IsStringASCII(std::u16string_view str) { 4636d528ed9Sopenharmony_ci return DoIsStringASCII(str.data(), str.length()); 4646d528ed9Sopenharmony_ci} 4656d528ed9Sopenharmony_ci 4666d528ed9Sopenharmony_cibool IsStringUTF8(std::string_view str) { 4676d528ed9Sopenharmony_ci const char* src = str.data(); 4686d528ed9Sopenharmony_ci int32_t src_len = static_cast<int32_t>(str.length()); 4696d528ed9Sopenharmony_ci int32_t char_index = 0; 4706d528ed9Sopenharmony_ci 4716d528ed9Sopenharmony_ci while (char_index < src_len) { 4726d528ed9Sopenharmony_ci int32_t code_point; 4736d528ed9Sopenharmony_ci CBU8_NEXT(src, char_index, src_len, code_point); 4746d528ed9Sopenharmony_ci if (!IsValidCharacter(code_point)) 4756d528ed9Sopenharmony_ci return false; 4766d528ed9Sopenharmony_ci } 4776d528ed9Sopenharmony_ci return true; 4786d528ed9Sopenharmony_ci} 4796d528ed9Sopenharmony_ci 4806d528ed9Sopenharmony_ci// Implementation note: Normally this function will be called with a hardcoded 4816d528ed9Sopenharmony_ci// constant for the lowercase_ascii parameter. Constructing a std::string_view 4826d528ed9Sopenharmony_ci// from a C constant requires running strlen, so the result will be two passes 4836d528ed9Sopenharmony_ci// through the buffers, one to file the length of lowercase_ascii, and one to 4846d528ed9Sopenharmony_ci// compare each letter. 4856d528ed9Sopenharmony_ci// 4866d528ed9Sopenharmony_ci// This function could have taken a const char* to avoid this and only do one 4876d528ed9Sopenharmony_ci// pass through the string. But the strlen is faster than the case-insensitive 4886d528ed9Sopenharmony_ci// compares and lets us early-exit in the case that the strings are different 4896d528ed9Sopenharmony_ci// lengths (will often be the case for non-matches). So whether one approach or 4906d528ed9Sopenharmony_ci// the other will be faster depends on the case. 4916d528ed9Sopenharmony_ci// 4926d528ed9Sopenharmony_ci// The hardcoded strings are typically very short so it doesn't matter, and the 4936d528ed9Sopenharmony_ci// string piece gives additional flexibility for the caller (doesn't have to be 4946d528ed9Sopenharmony_ci// null terminated) so we choose the std::string_view route. 4956d528ed9Sopenharmony_citemplate <typename Str> 4966d528ed9Sopenharmony_cistatic inline bool DoLowerCaseEqualsASCII( 4976d528ed9Sopenharmony_ci std::basic_string_view<typename Str::value_type> str, 4986d528ed9Sopenharmony_ci std::string_view lowercase_ascii) { 4996d528ed9Sopenharmony_ci if (str.size() != lowercase_ascii.size()) 5006d528ed9Sopenharmony_ci return false; 5016d528ed9Sopenharmony_ci for (size_t i = 0; i < str.size(); i++) { 5026d528ed9Sopenharmony_ci if (ToLowerASCII(str[i]) != lowercase_ascii[i]) 5036d528ed9Sopenharmony_ci return false; 5046d528ed9Sopenharmony_ci } 5056d528ed9Sopenharmony_ci return true; 5066d528ed9Sopenharmony_ci} 5076d528ed9Sopenharmony_ci 5086d528ed9Sopenharmony_cibool LowerCaseEqualsASCII(std::string_view str, 5096d528ed9Sopenharmony_ci std::string_view lowercase_ascii) { 5106d528ed9Sopenharmony_ci return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii); 5116d528ed9Sopenharmony_ci} 5126d528ed9Sopenharmony_ci 5136d528ed9Sopenharmony_cibool LowerCaseEqualsASCII(std::u16string_view str, 5146d528ed9Sopenharmony_ci std::string_view lowercase_ascii) { 5156d528ed9Sopenharmony_ci return DoLowerCaseEqualsASCII<std::u16string>(str, lowercase_ascii); 5166d528ed9Sopenharmony_ci} 5176d528ed9Sopenharmony_ci 5186d528ed9Sopenharmony_cibool EqualsASCII(std::u16string_view str, std::string_view ascii) { 5196d528ed9Sopenharmony_ci if (str.length() != ascii.length()) 5206d528ed9Sopenharmony_ci return false; 5216d528ed9Sopenharmony_ci return std::equal(ascii.begin(), ascii.end(), str.begin()); 5226d528ed9Sopenharmony_ci} 5236d528ed9Sopenharmony_ci 5246d528ed9Sopenharmony_citemplate <typename char_type> 5256d528ed9Sopenharmony_cibool StartsWithCaseInsensitiveASCIIT( 5266d528ed9Sopenharmony_ci std::basic_string_view<char_type> str, 5276d528ed9Sopenharmony_ci std::basic_string_view<char_type> search_for) { 5286d528ed9Sopenharmony_ci if (search_for.size() > str.size()) 5296d528ed9Sopenharmony_ci return false; 5306d528ed9Sopenharmony_ci 5316d528ed9Sopenharmony_ci std::basic_string_view<char_type> source = str.substr(0, search_for.size()); 5326d528ed9Sopenharmony_ci 5336d528ed9Sopenharmony_ci return std::equal(search_for.begin(), search_for.end(), source.begin(), 5346d528ed9Sopenharmony_ci CaseInsensitiveCompareASCII<char_type>()); 5356d528ed9Sopenharmony_ci} 5366d528ed9Sopenharmony_ci 5376d528ed9Sopenharmony_cibool StartsWithCaseInsensitiveASCII(std::string_view str, 5386d528ed9Sopenharmony_ci std::string_view search_for) { 5396d528ed9Sopenharmony_ci return StartsWithCaseInsensitiveASCIIT(str, search_for); 5406d528ed9Sopenharmony_ci} 5416d528ed9Sopenharmony_ci 5426d528ed9Sopenharmony_cibool StartsWithCaseInsensitiveASCII(std::u16string_view str, 5436d528ed9Sopenharmony_ci std::u16string_view search_for) { 5446d528ed9Sopenharmony_ci return StartsWithCaseInsensitiveASCIIT(str, search_for); 5456d528ed9Sopenharmony_ci} 5466d528ed9Sopenharmony_ci 5476d528ed9Sopenharmony_citemplate <typename char_type> 5486d528ed9Sopenharmony_cibool EndsWithCaseInsensitiveASCIIT( 5496d528ed9Sopenharmony_ci std::basic_string_view<char_type> str, 5506d528ed9Sopenharmony_ci std::basic_string_view<char_type> search_for) { 5516d528ed9Sopenharmony_ci if (search_for.size() > str.size()) 5526d528ed9Sopenharmony_ci return false; 5536d528ed9Sopenharmony_ci 5546d528ed9Sopenharmony_ci std::basic_string_view<char_type> source = 5556d528ed9Sopenharmony_ci str.substr(str.size() - search_for.size(), search_for.size()); 5566d528ed9Sopenharmony_ci 5576d528ed9Sopenharmony_ci return std::equal(source.begin(), source.end(), search_for.begin(), 5586d528ed9Sopenharmony_ci CaseInsensitiveCompareASCII<char_type>()); 5596d528ed9Sopenharmony_ci} 5606d528ed9Sopenharmony_ci 5616d528ed9Sopenharmony_cibool EndsWithCaseInsensitiveASCII(std::string_view str, 5626d528ed9Sopenharmony_ci std::string_view search_for) { 5636d528ed9Sopenharmony_ci return EndsWithCaseInsensitiveASCIIT(str, search_for); 5646d528ed9Sopenharmony_ci} 5656d528ed9Sopenharmony_ci 5666d528ed9Sopenharmony_cibool EndsWithCaseInsensitiveASCII(std::u16string_view str, 5676d528ed9Sopenharmony_ci std::u16string_view search_for) { 5686d528ed9Sopenharmony_ci return EndsWithCaseInsensitiveASCIIT(str, search_for); 5696d528ed9Sopenharmony_ci} 5706d528ed9Sopenharmony_ci 5716d528ed9Sopenharmony_cichar HexDigitToInt(char16_t c) { 5726d528ed9Sopenharmony_ci DCHECK(IsHexDigit(c)); 5736d528ed9Sopenharmony_ci if (c >= '0' && c <= '9') 5746d528ed9Sopenharmony_ci return static_cast<char>(c - '0'); 5756d528ed9Sopenharmony_ci if (c >= 'A' && c <= 'F') 5766d528ed9Sopenharmony_ci return static_cast<char>(c - 'A' + 10); 5776d528ed9Sopenharmony_ci if (c >= 'a' && c <= 'f') 5786d528ed9Sopenharmony_ci return static_cast<char>(c - 'a' + 10); 5796d528ed9Sopenharmony_ci return 0; 5806d528ed9Sopenharmony_ci} 5816d528ed9Sopenharmony_ci 5826d528ed9Sopenharmony_cibool IsUnicodeWhitespace(char16_t c) { 5836d528ed9Sopenharmony_ci // kWhitespaceWide is a NULL-terminated string 5846d528ed9Sopenharmony_ci for (const char16_t* cur = kWhitespaceUTF16; *cur; ++cur) { 5856d528ed9Sopenharmony_ci if (*cur == c) 5866d528ed9Sopenharmony_ci return true; 5876d528ed9Sopenharmony_ci } 5886d528ed9Sopenharmony_ci return false; 5896d528ed9Sopenharmony_ci} 5906d528ed9Sopenharmony_ci 5916d528ed9Sopenharmony_cistatic const char* const kByteStringsUnlocalized[] = {" B", " kB", " MB", 5926d528ed9Sopenharmony_ci " GB", " TB", " PB"}; 5936d528ed9Sopenharmony_ci 5946d528ed9Sopenharmony_cistd::u16string FormatBytesUnlocalized(int64_t bytes) { 5956d528ed9Sopenharmony_ci double unit_amount = static_cast<double>(bytes); 5966d528ed9Sopenharmony_ci size_t dimension = 0; 5976d528ed9Sopenharmony_ci const int kKilo = 1024; 5986d528ed9Sopenharmony_ci while (unit_amount >= kKilo && 5996d528ed9Sopenharmony_ci dimension < std::size(kByteStringsUnlocalized) - 1) { 6006d528ed9Sopenharmony_ci unit_amount /= kKilo; 6016d528ed9Sopenharmony_ci dimension++; 6026d528ed9Sopenharmony_ci } 6036d528ed9Sopenharmony_ci 6046d528ed9Sopenharmony_ci char buf[64]; 6056d528ed9Sopenharmony_ci if (bytes != 0 && dimension > 0 && unit_amount < 100) { 6066d528ed9Sopenharmony_ci base::snprintf(buf, std::size(buf), "%.1lf%s", unit_amount, 6076d528ed9Sopenharmony_ci kByteStringsUnlocalized[dimension]); 6086d528ed9Sopenharmony_ci } else { 6096d528ed9Sopenharmony_ci base::snprintf(buf, std::size(buf), "%.0lf%s", unit_amount, 6106d528ed9Sopenharmony_ci kByteStringsUnlocalized[dimension]); 6116d528ed9Sopenharmony_ci } 6126d528ed9Sopenharmony_ci 6136d528ed9Sopenharmony_ci return ASCIIToUTF16(buf); 6146d528ed9Sopenharmony_ci} 6156d528ed9Sopenharmony_ci 6166d528ed9Sopenharmony_ci// A Matcher for DoReplaceMatchesAfterOffset() that matches substrings. 6176d528ed9Sopenharmony_citemplate <class StringType> 6186d528ed9Sopenharmony_cistruct SubstringMatcher { 6196d528ed9Sopenharmony_ci std::basic_string_view<typename StringType::value_type> find_this; 6206d528ed9Sopenharmony_ci 6216d528ed9Sopenharmony_ci size_t Find(const StringType& input, size_t pos) { 6226d528ed9Sopenharmony_ci return input.find(find_this.data(), pos, find_this.length()); 6236d528ed9Sopenharmony_ci } 6246d528ed9Sopenharmony_ci size_t MatchSize() { return find_this.length(); } 6256d528ed9Sopenharmony_ci}; 6266d528ed9Sopenharmony_ci 6276d528ed9Sopenharmony_ci// A Matcher for DoReplaceMatchesAfterOffset() that matches single characters. 6286d528ed9Sopenharmony_citemplate <class StringType> 6296d528ed9Sopenharmony_cistruct CharacterMatcher { 6306d528ed9Sopenharmony_ci std::basic_string_view<typename StringType::value_type> find_any_of_these; 6316d528ed9Sopenharmony_ci 6326d528ed9Sopenharmony_ci size_t Find(const StringType& input, size_t pos) { 6336d528ed9Sopenharmony_ci return input.find_first_of(find_any_of_these.data(), pos, 6346d528ed9Sopenharmony_ci find_any_of_these.length()); 6356d528ed9Sopenharmony_ci } 6366d528ed9Sopenharmony_ci constexpr size_t MatchSize() { return 1; } 6376d528ed9Sopenharmony_ci}; 6386d528ed9Sopenharmony_ci 6396d528ed9Sopenharmony_cienum class ReplaceType { REPLACE_ALL, REPLACE_FIRST }; 6406d528ed9Sopenharmony_ci 6416d528ed9Sopenharmony_ci// Runs in O(n) time in the length of |str|, and transforms the string without 6426d528ed9Sopenharmony_ci// reallocating when possible. Returns |true| if any matches were found. 6436d528ed9Sopenharmony_ci// 6446d528ed9Sopenharmony_ci// This is parameterized on a |Matcher| traits type, so that it can be the 6456d528ed9Sopenharmony_ci// implementation for both ReplaceChars() and ReplaceSubstringsAfterOffset(). 6466d528ed9Sopenharmony_citemplate <class StringType, class Matcher> 6476d528ed9Sopenharmony_cibool DoReplaceMatchesAfterOffset( 6486d528ed9Sopenharmony_ci StringType* str, 6496d528ed9Sopenharmony_ci size_t initial_offset, 6506d528ed9Sopenharmony_ci Matcher matcher, 6516d528ed9Sopenharmony_ci std::basic_string_view<typename StringType::value_type> replace_with, 6526d528ed9Sopenharmony_ci ReplaceType replace_type) { 6536d528ed9Sopenharmony_ci using CharTraits = typename StringType::traits_type; 6546d528ed9Sopenharmony_ci 6556d528ed9Sopenharmony_ci const size_t find_length = matcher.MatchSize(); 6566d528ed9Sopenharmony_ci if (!find_length) 6576d528ed9Sopenharmony_ci return false; 6586d528ed9Sopenharmony_ci 6596d528ed9Sopenharmony_ci // If the find string doesn't appear, there's nothing to do. 6606d528ed9Sopenharmony_ci size_t first_match = matcher.Find(*str, initial_offset); 6616d528ed9Sopenharmony_ci if (first_match == StringType::npos) 6626d528ed9Sopenharmony_ci return false; 6636d528ed9Sopenharmony_ci 6646d528ed9Sopenharmony_ci // If we're only replacing one instance, there's no need to do anything 6656d528ed9Sopenharmony_ci // complicated. 6666d528ed9Sopenharmony_ci const size_t replace_length = replace_with.length(); 6676d528ed9Sopenharmony_ci if (replace_type == ReplaceType::REPLACE_FIRST) { 6686d528ed9Sopenharmony_ci str->replace(first_match, find_length, replace_with.data(), replace_length); 6696d528ed9Sopenharmony_ci return true; 6706d528ed9Sopenharmony_ci } 6716d528ed9Sopenharmony_ci 6726d528ed9Sopenharmony_ci // If the find and replace strings are the same length, we can simply use 6736d528ed9Sopenharmony_ci // replace() on each instance, and finish the entire operation in O(n) time. 6746d528ed9Sopenharmony_ci if (find_length == replace_length) { 6756d528ed9Sopenharmony_ci auto* buffer = &((*str)[0]); 6766d528ed9Sopenharmony_ci for (size_t offset = first_match; offset != StringType::npos; 6776d528ed9Sopenharmony_ci offset = matcher.Find(*str, offset + replace_length)) { 6786d528ed9Sopenharmony_ci CharTraits::copy(buffer + offset, replace_with.data(), replace_length); 6796d528ed9Sopenharmony_ci } 6806d528ed9Sopenharmony_ci return true; 6816d528ed9Sopenharmony_ci } 6826d528ed9Sopenharmony_ci 6836d528ed9Sopenharmony_ci // Since the find and replace strings aren't the same length, a loop like the 6846d528ed9Sopenharmony_ci // one above would be O(n^2) in the worst case, as replace() will shift the 6856d528ed9Sopenharmony_ci // entire remaining string each time. We need to be more clever to keep things 6866d528ed9Sopenharmony_ci // O(n). 6876d528ed9Sopenharmony_ci // 6886d528ed9Sopenharmony_ci // When the string is being shortened, it's possible to just shift the matches 6896d528ed9Sopenharmony_ci // down in one pass while finding, and truncate the length at the end of the 6906d528ed9Sopenharmony_ci // search. 6916d528ed9Sopenharmony_ci // 6926d528ed9Sopenharmony_ci // If the string is being lengthened, more work is required. The strategy used 6936d528ed9Sopenharmony_ci // here is to make two find() passes through the string. The first pass counts 6946d528ed9Sopenharmony_ci // the number of matches to determine the new size. The second pass will 6956d528ed9Sopenharmony_ci // either construct the new string into a new buffer (if the existing buffer 6966d528ed9Sopenharmony_ci // lacked capacity), or else -- if there is room -- create a region of scratch 6976d528ed9Sopenharmony_ci // space after |first_match| by shifting the tail of the string to a higher 6986d528ed9Sopenharmony_ci // index, and doing in-place moves from the tail to lower indices thereafter. 6996d528ed9Sopenharmony_ci size_t str_length = str->length(); 7006d528ed9Sopenharmony_ci size_t expansion = 0; 7016d528ed9Sopenharmony_ci if (replace_length > find_length) { 7026d528ed9Sopenharmony_ci // This operation lengthens the string; determine the new length by counting 7036d528ed9Sopenharmony_ci // matches. 7046d528ed9Sopenharmony_ci const size_t expansion_per_match = (replace_length - find_length); 7056d528ed9Sopenharmony_ci size_t num_matches = 0; 7066d528ed9Sopenharmony_ci for (size_t match = first_match; match != StringType::npos; 7076d528ed9Sopenharmony_ci match = matcher.Find(*str, match + find_length)) { 7086d528ed9Sopenharmony_ci expansion += expansion_per_match; 7096d528ed9Sopenharmony_ci ++num_matches; 7106d528ed9Sopenharmony_ci } 7116d528ed9Sopenharmony_ci const size_t final_length = str_length + expansion; 7126d528ed9Sopenharmony_ci 7136d528ed9Sopenharmony_ci if (str->capacity() < final_length) { 7146d528ed9Sopenharmony_ci // If we'd have to allocate a new buffer to grow the string, build the 7156d528ed9Sopenharmony_ci // result directly into the new allocation via append(). 7166d528ed9Sopenharmony_ci StringType src(str->get_allocator()); 7176d528ed9Sopenharmony_ci str->swap(src); 7186d528ed9Sopenharmony_ci str->reserve(final_length); 7196d528ed9Sopenharmony_ci 7206d528ed9Sopenharmony_ci size_t pos = 0; 7216d528ed9Sopenharmony_ci for (size_t match = first_match;; match = matcher.Find(src, pos)) { 7226d528ed9Sopenharmony_ci str->append(src, pos, match - pos); 7236d528ed9Sopenharmony_ci str->append(replace_with.data(), replace_length); 7246d528ed9Sopenharmony_ci pos = match + find_length; 7256d528ed9Sopenharmony_ci 7266d528ed9Sopenharmony_ci // A mid-loop test/break enables skipping the final Find() call; the 7276d528ed9Sopenharmony_ci // number of matches is known, so don't search past the last one. 7286d528ed9Sopenharmony_ci if (!--num_matches) 7296d528ed9Sopenharmony_ci break; 7306d528ed9Sopenharmony_ci } 7316d528ed9Sopenharmony_ci 7326d528ed9Sopenharmony_ci // Handle substring after the final match. 7336d528ed9Sopenharmony_ci str->append(src, pos, str_length - pos); 7346d528ed9Sopenharmony_ci return true; 7356d528ed9Sopenharmony_ci } 7366d528ed9Sopenharmony_ci 7376d528ed9Sopenharmony_ci // Prepare for the copy/move loop below -- expand the string to its final 7386d528ed9Sopenharmony_ci // size by shifting the data after the first match to the end of the resized 7396d528ed9Sopenharmony_ci // string. 7406d528ed9Sopenharmony_ci size_t shift_src = first_match + find_length; 7416d528ed9Sopenharmony_ci size_t shift_dst = shift_src + expansion; 7426d528ed9Sopenharmony_ci 7436d528ed9Sopenharmony_ci // Big |expansion| factors (relative to |str_length|) require padding up to 7446d528ed9Sopenharmony_ci // |shift_dst|. 7456d528ed9Sopenharmony_ci if (shift_dst > str_length) 7466d528ed9Sopenharmony_ci str->resize(shift_dst); 7476d528ed9Sopenharmony_ci 7486d528ed9Sopenharmony_ci str->replace(shift_dst, str_length - shift_src, *str, shift_src, 7496d528ed9Sopenharmony_ci str_length - shift_src); 7506d528ed9Sopenharmony_ci str_length = final_length; 7516d528ed9Sopenharmony_ci } 7526d528ed9Sopenharmony_ci 7536d528ed9Sopenharmony_ci // We can alternate replacement and move operations. This won't overwrite the 7546d528ed9Sopenharmony_ci // unsearched region of the string so long as |write_offset| <= |read_offset|; 7556d528ed9Sopenharmony_ci // that condition is always satisfied because: 7566d528ed9Sopenharmony_ci // 7576d528ed9Sopenharmony_ci // (a) If the string is being shortened, |expansion| is zero and 7586d528ed9Sopenharmony_ci // |write_offset| grows slower than |read_offset|. 7596d528ed9Sopenharmony_ci // 7606d528ed9Sopenharmony_ci // (b) If the string is being lengthened, |write_offset| grows faster than 7616d528ed9Sopenharmony_ci // |read_offset|, but |expansion| is big enough so that |write_offset| 7626d528ed9Sopenharmony_ci // will only catch up to |read_offset| at the point of the last match. 7636d528ed9Sopenharmony_ci auto* buffer = &((*str)[0]); 7646d528ed9Sopenharmony_ci size_t write_offset = first_match; 7656d528ed9Sopenharmony_ci size_t read_offset = first_match + expansion; 7666d528ed9Sopenharmony_ci do { 7676d528ed9Sopenharmony_ci if (replace_length) { 7686d528ed9Sopenharmony_ci CharTraits::copy(buffer + write_offset, replace_with.data(), 7696d528ed9Sopenharmony_ci replace_length); 7706d528ed9Sopenharmony_ci write_offset += replace_length; 7716d528ed9Sopenharmony_ci } 7726d528ed9Sopenharmony_ci read_offset += find_length; 7736d528ed9Sopenharmony_ci 7746d528ed9Sopenharmony_ci // min() clamps StringType::npos (the largest unsigned value) to str_length. 7756d528ed9Sopenharmony_ci size_t match = std::min(matcher.Find(*str, read_offset), str_length); 7766d528ed9Sopenharmony_ci 7776d528ed9Sopenharmony_ci size_t length = match - read_offset; 7786d528ed9Sopenharmony_ci if (length) { 7796d528ed9Sopenharmony_ci CharTraits::move(buffer + write_offset, buffer + read_offset, length); 7806d528ed9Sopenharmony_ci write_offset += length; 7816d528ed9Sopenharmony_ci read_offset += length; 7826d528ed9Sopenharmony_ci } 7836d528ed9Sopenharmony_ci } while (read_offset < str_length); 7846d528ed9Sopenharmony_ci 7856d528ed9Sopenharmony_ci // If we're shortening the string, truncate it now. 7866d528ed9Sopenharmony_ci str->resize(write_offset); 7876d528ed9Sopenharmony_ci return true; 7886d528ed9Sopenharmony_ci} 7896d528ed9Sopenharmony_ci 7906d528ed9Sopenharmony_citemplate <class StringType> 7916d528ed9Sopenharmony_cibool ReplaceCharsT( 7926d528ed9Sopenharmony_ci const StringType& input, 7936d528ed9Sopenharmony_ci std::basic_string_view<typename StringType::value_type> find_any_of_these, 7946d528ed9Sopenharmony_ci std::basic_string_view<typename StringType::value_type> replace_with, 7956d528ed9Sopenharmony_ci StringType* output) { 7966d528ed9Sopenharmony_ci // Commonly, this is called with output and input being the same string; in 7976d528ed9Sopenharmony_ci // that case, this assignment is inexpensive. 7986d528ed9Sopenharmony_ci *output = input; 7996d528ed9Sopenharmony_ci 8006d528ed9Sopenharmony_ci return DoReplaceMatchesAfterOffset( 8016d528ed9Sopenharmony_ci output, 0, CharacterMatcher<StringType>{find_any_of_these}, replace_with, 8026d528ed9Sopenharmony_ci ReplaceType::REPLACE_ALL); 8036d528ed9Sopenharmony_ci} 8046d528ed9Sopenharmony_ci 8056d528ed9Sopenharmony_civoid ReplaceFirstSubstringAfterOffset(std::u16string* str, 8066d528ed9Sopenharmony_ci size_t start_offset, 8076d528ed9Sopenharmony_ci std::u16string_view find_this, 8086d528ed9Sopenharmony_ci std::u16string_view replace_with) { 8096d528ed9Sopenharmony_ci DoReplaceMatchesAfterOffset(str, start_offset, 8106d528ed9Sopenharmony_ci SubstringMatcher<std::u16string>{find_this}, 8116d528ed9Sopenharmony_ci replace_with, ReplaceType::REPLACE_FIRST); 8126d528ed9Sopenharmony_ci} 8136d528ed9Sopenharmony_ci 8146d528ed9Sopenharmony_civoid ReplaceFirstSubstringAfterOffset(std::string* str, 8156d528ed9Sopenharmony_ci size_t start_offset, 8166d528ed9Sopenharmony_ci std::string_view find_this, 8176d528ed9Sopenharmony_ci std::string_view replace_with) { 8186d528ed9Sopenharmony_ci DoReplaceMatchesAfterOffset(str, start_offset, 8196d528ed9Sopenharmony_ci SubstringMatcher<std::string>{find_this}, 8206d528ed9Sopenharmony_ci replace_with, ReplaceType::REPLACE_FIRST); 8216d528ed9Sopenharmony_ci} 8226d528ed9Sopenharmony_ci 8236d528ed9Sopenharmony_civoid ReplaceSubstringsAfterOffset(std::u16string* str, 8246d528ed9Sopenharmony_ci size_t start_offset, 8256d528ed9Sopenharmony_ci std::u16string_view find_this, 8266d528ed9Sopenharmony_ci std::u16string_view replace_with) { 8276d528ed9Sopenharmony_ci DoReplaceMatchesAfterOffset(str, start_offset, 8286d528ed9Sopenharmony_ci SubstringMatcher<std::u16string>{find_this}, 8296d528ed9Sopenharmony_ci replace_with, ReplaceType::REPLACE_ALL); 8306d528ed9Sopenharmony_ci} 8316d528ed9Sopenharmony_ci 8326d528ed9Sopenharmony_civoid ReplaceSubstringsAfterOffset(std::string* str, 8336d528ed9Sopenharmony_ci size_t start_offset, 8346d528ed9Sopenharmony_ci std::string_view find_this, 8356d528ed9Sopenharmony_ci std::string_view replace_with) { 8366d528ed9Sopenharmony_ci DoReplaceMatchesAfterOffset(str, start_offset, 8376d528ed9Sopenharmony_ci SubstringMatcher<std::string>{find_this}, 8386d528ed9Sopenharmony_ci replace_with, ReplaceType::REPLACE_ALL); 8396d528ed9Sopenharmony_ci} 8406d528ed9Sopenharmony_ci 8416d528ed9Sopenharmony_citemplate <class string_type> 8426d528ed9Sopenharmony_ciinline typename string_type::value_type* WriteIntoT(string_type* str, 8436d528ed9Sopenharmony_ci size_t length_with_null) { 8446d528ed9Sopenharmony_ci DCHECK_GT(length_with_null, 1u); 8456d528ed9Sopenharmony_ci str->reserve(length_with_null); 8466d528ed9Sopenharmony_ci str->resize(length_with_null - 1); 8476d528ed9Sopenharmony_ci return &((*str)[0]); 8486d528ed9Sopenharmony_ci} 8496d528ed9Sopenharmony_ci 8506d528ed9Sopenharmony_cichar* WriteInto(std::string* str, size_t length_with_null) { 8516d528ed9Sopenharmony_ci return WriteIntoT(str, length_with_null); 8526d528ed9Sopenharmony_ci} 8536d528ed9Sopenharmony_ci 8546d528ed9Sopenharmony_cichar16_t* WriteInto(std::u16string* str, size_t length_with_null) { 8556d528ed9Sopenharmony_ci return WriteIntoT(str, length_with_null); 8566d528ed9Sopenharmony_ci} 8576d528ed9Sopenharmony_ci 8586d528ed9Sopenharmony_ci#if defined(_MSC_VER) && !defined(__clang__) 8596d528ed9Sopenharmony_ci// Work around VC++ code-gen bug. https://crbug.com/804884 8606d528ed9Sopenharmony_ci#pragma optimize("", off) 8616d528ed9Sopenharmony_ci#endif 8626d528ed9Sopenharmony_ci 8636d528ed9Sopenharmony_ci// Generic version for all JoinString overloads. |list_type| must be a sequence 8646d528ed9Sopenharmony_ci// (std::vector or std::initializer_list) of strings/string_views of any type. 8656d528ed9Sopenharmony_citemplate <typename char_type, typename list_type> 8666d528ed9Sopenharmony_cistatic std::basic_string<char_type> JoinStringT( 8676d528ed9Sopenharmony_ci const list_type& parts, 8686d528ed9Sopenharmony_ci std::basic_string_view<char_type> sep) { 8696d528ed9Sopenharmony_ci if (parts.size() == 0) 8706d528ed9Sopenharmony_ci return std::basic_string<char_type>(); 8716d528ed9Sopenharmony_ci 8726d528ed9Sopenharmony_ci // Pre-allocate the eventual size of the string. Start with the size of all of 8736d528ed9Sopenharmony_ci // the separators (note that this *assumes* parts.size() > 0). 8746d528ed9Sopenharmony_ci size_t total_size = (parts.size() - 1) * sep.size(); 8756d528ed9Sopenharmony_ci for (const auto& part : parts) 8766d528ed9Sopenharmony_ci total_size += part.size(); 8776d528ed9Sopenharmony_ci std::basic_string<char_type> result; 8786d528ed9Sopenharmony_ci result.reserve(total_size); 8796d528ed9Sopenharmony_ci 8806d528ed9Sopenharmony_ci auto iter = parts.begin(); 8816d528ed9Sopenharmony_ci DCHECK(iter != parts.end()); 8826d528ed9Sopenharmony_ci result.append(*iter); 8836d528ed9Sopenharmony_ci ++iter; 8846d528ed9Sopenharmony_ci 8856d528ed9Sopenharmony_ci for (; iter != parts.end(); ++iter) { 8866d528ed9Sopenharmony_ci result.append(sep); 8876d528ed9Sopenharmony_ci result.append(*iter); 8886d528ed9Sopenharmony_ci } 8896d528ed9Sopenharmony_ci 8906d528ed9Sopenharmony_ci // Sanity-check that we pre-allocated correctly. 8916d528ed9Sopenharmony_ci DCHECK_EQ(total_size, result.size()); 8926d528ed9Sopenharmony_ci 8936d528ed9Sopenharmony_ci return result; 8946d528ed9Sopenharmony_ci} 8956d528ed9Sopenharmony_ci 8966d528ed9Sopenharmony_cistd::string JoinString(const std::vector<std::string>& parts, 8976d528ed9Sopenharmony_ci std::string_view separator) { 8986d528ed9Sopenharmony_ci return JoinStringT(parts, separator); 8996d528ed9Sopenharmony_ci} 9006d528ed9Sopenharmony_ci 9016d528ed9Sopenharmony_cistd::u16string JoinString(const std::vector<std::u16string>& parts, 9026d528ed9Sopenharmony_ci std::u16string_view separator) { 9036d528ed9Sopenharmony_ci return JoinStringT(parts, separator); 9046d528ed9Sopenharmony_ci} 9056d528ed9Sopenharmony_ci 9066d528ed9Sopenharmony_ci#if defined(_MSC_VER) && !defined(__clang__) 9076d528ed9Sopenharmony_ci// Work around VC++ code-gen bug. https://crbug.com/804884 9086d528ed9Sopenharmony_ci#pragma optimize("", on) 9096d528ed9Sopenharmony_ci#endif 9106d528ed9Sopenharmony_ci 9116d528ed9Sopenharmony_cistd::string JoinString(const std::vector<std::string_view>& parts, 9126d528ed9Sopenharmony_ci std::string_view separator) { 9136d528ed9Sopenharmony_ci return JoinStringT(parts, separator); 9146d528ed9Sopenharmony_ci} 9156d528ed9Sopenharmony_ci 9166d528ed9Sopenharmony_cistd::u16string JoinString(const std::vector<std::u16string_view>& parts, 9176d528ed9Sopenharmony_ci std::u16string_view separator) { 9186d528ed9Sopenharmony_ci return JoinStringT(parts, separator); 9196d528ed9Sopenharmony_ci} 9206d528ed9Sopenharmony_ci 9216d528ed9Sopenharmony_cistd::string JoinString(std::initializer_list<std::string_view> parts, 9226d528ed9Sopenharmony_ci std::string_view separator) { 9236d528ed9Sopenharmony_ci return JoinStringT(parts, separator); 9246d528ed9Sopenharmony_ci} 9256d528ed9Sopenharmony_ci 9266d528ed9Sopenharmony_cistd::u16string JoinString(std::initializer_list<std::u16string_view> parts, 9276d528ed9Sopenharmony_ci std::u16string_view separator) { 9286d528ed9Sopenharmony_ci return JoinStringT(parts, separator); 9296d528ed9Sopenharmony_ci} 9306d528ed9Sopenharmony_ci 9316d528ed9Sopenharmony_citemplate <class FormatStringType, class OutStringType> 9326d528ed9Sopenharmony_ciOutStringType DoReplaceStringPlaceholders( 9336d528ed9Sopenharmony_ci const FormatStringType& format_string, 9346d528ed9Sopenharmony_ci const std::vector<OutStringType>& subst, 9356d528ed9Sopenharmony_ci std::vector<size_t>* offsets) { 9366d528ed9Sopenharmony_ci size_t substitutions = subst.size(); 9376d528ed9Sopenharmony_ci DCHECK_LT(substitutions, 10U); 9386d528ed9Sopenharmony_ci 9396d528ed9Sopenharmony_ci size_t sub_length = 0; 9406d528ed9Sopenharmony_ci for (const auto& cur : subst) 9416d528ed9Sopenharmony_ci sub_length += cur.length(); 9426d528ed9Sopenharmony_ci 9436d528ed9Sopenharmony_ci OutStringType formatted; 9446d528ed9Sopenharmony_ci formatted.reserve(format_string.length() + sub_length); 9456d528ed9Sopenharmony_ci 9466d528ed9Sopenharmony_ci std::vector<ReplacementOffset> r_offsets; 9476d528ed9Sopenharmony_ci for (auto i = format_string.begin(); i != format_string.end(); ++i) { 9486d528ed9Sopenharmony_ci if ('$' == *i) { 9496d528ed9Sopenharmony_ci if (i + 1 != format_string.end()) { 9506d528ed9Sopenharmony_ci ++i; 9516d528ed9Sopenharmony_ci if ('$' == *i) { 9526d528ed9Sopenharmony_ci while (i != format_string.end() && '$' == *i) { 9536d528ed9Sopenharmony_ci formatted.push_back('$'); 9546d528ed9Sopenharmony_ci ++i; 9556d528ed9Sopenharmony_ci } 9566d528ed9Sopenharmony_ci --i; 9576d528ed9Sopenharmony_ci } else { 9586d528ed9Sopenharmony_ci if (*i < '1' || *i > '9') { 9596d528ed9Sopenharmony_ci DLOG(ERROR) << "Invalid placeholder: $" << std::to_string(*i); 9606d528ed9Sopenharmony_ci continue; 9616d528ed9Sopenharmony_ci } 9626d528ed9Sopenharmony_ci uintptr_t index = *i - '1'; 9636d528ed9Sopenharmony_ci if (offsets) { 9646d528ed9Sopenharmony_ci ReplacementOffset r_offset(index, 9656d528ed9Sopenharmony_ci static_cast<int>(formatted.size())); 9666d528ed9Sopenharmony_ci r_offsets.insert( 9676d528ed9Sopenharmony_ci std::upper_bound(r_offsets.begin(), r_offsets.end(), r_offset, 9686d528ed9Sopenharmony_ci &CompareParameter), 9696d528ed9Sopenharmony_ci r_offset); 9706d528ed9Sopenharmony_ci } 9716d528ed9Sopenharmony_ci if (index < substitutions) 9726d528ed9Sopenharmony_ci formatted.append(subst.at(index)); 9736d528ed9Sopenharmony_ci } 9746d528ed9Sopenharmony_ci } 9756d528ed9Sopenharmony_ci } else { 9766d528ed9Sopenharmony_ci formatted.push_back(*i); 9776d528ed9Sopenharmony_ci } 9786d528ed9Sopenharmony_ci } 9796d528ed9Sopenharmony_ci if (offsets) { 9806d528ed9Sopenharmony_ci for (const auto& cur : r_offsets) 9816d528ed9Sopenharmony_ci offsets->push_back(cur.offset); 9826d528ed9Sopenharmony_ci } 9836d528ed9Sopenharmony_ci return formatted; 9846d528ed9Sopenharmony_ci} 9856d528ed9Sopenharmony_ci 9866d528ed9Sopenharmony_cistd::u16string ReplaceStringPlaceholders( 9876d528ed9Sopenharmony_ci const std::u16string& format_string, 9886d528ed9Sopenharmony_ci const std::vector<std::u16string>& subst, 9896d528ed9Sopenharmony_ci std::vector<size_t>* offsets) { 9906d528ed9Sopenharmony_ci return DoReplaceStringPlaceholders(format_string, subst, offsets); 9916d528ed9Sopenharmony_ci} 9926d528ed9Sopenharmony_ci 9936d528ed9Sopenharmony_cistd::string ReplaceStringPlaceholders(std::string_view format_string, 9946d528ed9Sopenharmony_ci const std::vector<std::string>& subst, 9956d528ed9Sopenharmony_ci std::vector<size_t>* offsets) { 9966d528ed9Sopenharmony_ci return DoReplaceStringPlaceholders(format_string, subst, offsets); 9976d528ed9Sopenharmony_ci} 9986d528ed9Sopenharmony_ci 9996d528ed9Sopenharmony_cistd::u16string ReplaceStringPlaceholders(const std::u16string& format_string, 10006d528ed9Sopenharmony_ci const std::u16string& a, 10016d528ed9Sopenharmony_ci size_t* offset) { 10026d528ed9Sopenharmony_ci std::vector<size_t> offsets; 10036d528ed9Sopenharmony_ci std::vector<std::u16string> subst; 10046d528ed9Sopenharmony_ci subst.push_back(a); 10056d528ed9Sopenharmony_ci std::u16string result = 10066d528ed9Sopenharmony_ci ReplaceStringPlaceholders(format_string, subst, &offsets); 10076d528ed9Sopenharmony_ci 10086d528ed9Sopenharmony_ci DCHECK_EQ(1U, offsets.size()); 10096d528ed9Sopenharmony_ci if (offset) 10106d528ed9Sopenharmony_ci *offset = offsets[0]; 10116d528ed9Sopenharmony_ci return result; 10126d528ed9Sopenharmony_ci} 10136d528ed9Sopenharmony_ci 10146d528ed9Sopenharmony_ci} // namespace base 1015