16d528ed9Sopenharmony_ci// Copyright 2013 The Chromium Authors. All rights reserved. 26d528ed9Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 36d528ed9Sopenharmony_ci// found in the LICENSE file. 46d528ed9Sopenharmony_ci// 56d528ed9Sopenharmony_ci// This file defines utility functions for working with strings. 66d528ed9Sopenharmony_ci 76d528ed9Sopenharmony_ci#ifndef BASE_STRINGS_STRING_UTIL_H_ 86d528ed9Sopenharmony_ci#define BASE_STRINGS_STRING_UTIL_H_ 96d528ed9Sopenharmony_ci 106d528ed9Sopenharmony_ci#include <ctype.h> 116d528ed9Sopenharmony_ci#include <stdarg.h> // va_list 126d528ed9Sopenharmony_ci#include <stddef.h> 136d528ed9Sopenharmony_ci#include <stdint.h> 146d528ed9Sopenharmony_ci 156d528ed9Sopenharmony_ci#include <initializer_list> 166d528ed9Sopenharmony_ci#include <string> 176d528ed9Sopenharmony_ci#include <string_view> 186d528ed9Sopenharmony_ci#include <vector> 196d528ed9Sopenharmony_ci 206d528ed9Sopenharmony_ci#include "base/compiler_specific.h" 216d528ed9Sopenharmony_ci#include "util/build_config.h" 226d528ed9Sopenharmony_ci 236d528ed9Sopenharmony_cinamespace base { 246d528ed9Sopenharmony_ci 256d528ed9Sopenharmony_ci// C standard-library functions that aren't cross-platform are provided as 266d528ed9Sopenharmony_ci// "base::...", and their prototypes are listed below. These functions are 276d528ed9Sopenharmony_ci// then implemented as inline calls to the platform-specific equivalents in the 286d528ed9Sopenharmony_ci// platform-specific headers. 296d528ed9Sopenharmony_ci 306d528ed9Sopenharmony_ci// Wrapper for vsnprintf that always null-terminates and always returns the 316d528ed9Sopenharmony_ci// number of characters that would be in an untruncated formatted 326d528ed9Sopenharmony_ci// string, even when truncation occurs. 336d528ed9Sopenharmony_ciint vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) 346d528ed9Sopenharmony_ci PRINTF_FORMAT(3, 0); 356d528ed9Sopenharmony_ci 366d528ed9Sopenharmony_ci// Some of these implementations need to be inlined. 376d528ed9Sopenharmony_ci 386d528ed9Sopenharmony_ci// We separate the declaration from the implementation of this inline 396d528ed9Sopenharmony_ci// function just so the PRINTF_FORMAT works. 406d528ed9Sopenharmony_ciinline int snprintf(char* buffer, 416d528ed9Sopenharmony_ci size_t size, 426d528ed9Sopenharmony_ci _Printf_format_string_ const char* format, 436d528ed9Sopenharmony_ci ...) PRINTF_FORMAT(3, 4); 446d528ed9Sopenharmony_ciinline int snprintf(char* buffer, 456d528ed9Sopenharmony_ci size_t size, 466d528ed9Sopenharmony_ci _Printf_format_string_ const char* format, 476d528ed9Sopenharmony_ci ...) { 486d528ed9Sopenharmony_ci va_list arguments; 496d528ed9Sopenharmony_ci va_start(arguments, format); 506d528ed9Sopenharmony_ci int result = vsnprintf(buffer, size, format, arguments); 516d528ed9Sopenharmony_ci va_end(arguments); 526d528ed9Sopenharmony_ci return result; 536d528ed9Sopenharmony_ci} 546d528ed9Sopenharmony_ci 556d528ed9Sopenharmony_ci// ASCII-specific tolower. The standard library's tolower is locale sensitive, 566d528ed9Sopenharmony_ci// so we don't want to use it here. 576d528ed9Sopenharmony_ciinline char ToLowerASCII(char c) { 586d528ed9Sopenharmony_ci return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; 596d528ed9Sopenharmony_ci} 606d528ed9Sopenharmony_ciinline char16_t ToLowerASCII(char16_t c) { 616d528ed9Sopenharmony_ci return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; 626d528ed9Sopenharmony_ci} 636d528ed9Sopenharmony_ci 646d528ed9Sopenharmony_ci// ASCII-specific toupper. The standard library's toupper is locale sensitive, 656d528ed9Sopenharmony_ci// so we don't want to use it here. 666d528ed9Sopenharmony_ciinline char ToUpperASCII(char c) { 676d528ed9Sopenharmony_ci return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; 686d528ed9Sopenharmony_ci} 696d528ed9Sopenharmony_ciinline char16_t ToUpperASCII(char16_t c) { 706d528ed9Sopenharmony_ci return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; 716d528ed9Sopenharmony_ci} 726d528ed9Sopenharmony_ci 736d528ed9Sopenharmony_ci// Converts the given string to it's ASCII-lowercase equivalent. 746d528ed9Sopenharmony_cistd::string ToLowerASCII(std::string_view str); 756d528ed9Sopenharmony_cistd::u16string ToLowerASCII(std::u16string_view str); 766d528ed9Sopenharmony_ci 776d528ed9Sopenharmony_ci// Converts the given string to it's ASCII-uppercase equivalent. 786d528ed9Sopenharmony_cistd::string ToUpperASCII(std::string_view str); 796d528ed9Sopenharmony_cistd::u16string ToUpperASCII(std::u16string_view str); 806d528ed9Sopenharmony_ci 816d528ed9Sopenharmony_ci// Functor for case-insensitive ASCII comparisons for STL algorithms like 826d528ed9Sopenharmony_ci// std::search. 836d528ed9Sopenharmony_ci// 846d528ed9Sopenharmony_ci// Note that a full Unicode version of this functor is not possible to write 856d528ed9Sopenharmony_ci// because case mappings might change the number of characters, depend on 866d528ed9Sopenharmony_ci// context (combining accents), and require handling UTF-16. If you need 876d528ed9Sopenharmony_ci// proper Unicode support, use base::i18n::ToLower/FoldCase and then just 886d528ed9Sopenharmony_ci// use a normal operator== on the result. 896d528ed9Sopenharmony_citemplate <typename Char> 906d528ed9Sopenharmony_cistruct CaseInsensitiveCompareASCII { 916d528ed9Sopenharmony_ci public: 926d528ed9Sopenharmony_ci bool operator()(Char x, Char y) const { 936d528ed9Sopenharmony_ci return ToLowerASCII(x) == ToLowerASCII(y); 946d528ed9Sopenharmony_ci } 956d528ed9Sopenharmony_ci}; 966d528ed9Sopenharmony_ci 976d528ed9Sopenharmony_ci// Like strcasecmp for case-insensitive ASCII characters only. Returns: 986d528ed9Sopenharmony_ci// -1 (a < b) 996d528ed9Sopenharmony_ci// 0 (a == b) 1006d528ed9Sopenharmony_ci// 1 (a > b) 1016d528ed9Sopenharmony_ci// (unlike strcasecmp which can return values greater or less than 1/-1). For 1026d528ed9Sopenharmony_ci// full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase 1036d528ed9Sopenharmony_ci// and then just call the normal string operators on the result. 1046d528ed9Sopenharmony_ciint CompareCaseInsensitiveASCII(std::string_view a, std::string_view b); 1056d528ed9Sopenharmony_ciint CompareCaseInsensitiveASCII(std::u16string_view a, std::u16string_view b); 1066d528ed9Sopenharmony_ci 1076d528ed9Sopenharmony_ci// Equality for ASCII case-insensitive comparisons. For full Unicode support, 1086d528ed9Sopenharmony_ci// use base::i18n::ToLower or base::i18h::FoldCase and then compare with either 1096d528ed9Sopenharmony_ci// == or !=. 1106d528ed9Sopenharmony_cibool EqualsCaseInsensitiveASCII(std::string_view a, std::string_view b); 1116d528ed9Sopenharmony_cibool EqualsCaseInsensitiveASCII(std::u16string_view a, std::u16string_view b); 1126d528ed9Sopenharmony_ci 1136d528ed9Sopenharmony_ci// Contains the set of characters representing whitespace in the corresponding 1146d528ed9Sopenharmony_ci// encoding. Null-terminated. The ASCII versions are the whitespaces as defined 1156d528ed9Sopenharmony_ci// by HTML5, and don't include control characters. 1166d528ed9Sopenharmony_ciextern const char16_t kWhitespaceUTF16[]; // Includes Unicode. 1176d528ed9Sopenharmony_ciextern const char kWhitespaceASCII[]; 1186d528ed9Sopenharmony_ciextern const char16_t kWhitespaceASCIIAs16[]; // No unicode. 1196d528ed9Sopenharmony_ci 1206d528ed9Sopenharmony_ci// Null-terminated string representing the UTF-8 byte order mark. 1216d528ed9Sopenharmony_ciextern const char kUtf8ByteOrderMark[]; 1226d528ed9Sopenharmony_ci 1236d528ed9Sopenharmony_ci// Removes characters in |remove_chars| from anywhere in |input|. Returns true 1246d528ed9Sopenharmony_ci// if any characters were removed. |remove_chars| must be null-terminated. 1256d528ed9Sopenharmony_ci// NOTE: Safe to use the same variable for both |input| and |output|. 1266d528ed9Sopenharmony_cibool RemoveChars(const std::u16string& input, 1276d528ed9Sopenharmony_ci std::u16string_view remove_chars, 1286d528ed9Sopenharmony_ci std::u16string* output); 1296d528ed9Sopenharmony_cibool RemoveChars(const std::string& input, 1306d528ed9Sopenharmony_ci std::string_view remove_chars, 1316d528ed9Sopenharmony_ci std::string* output); 1326d528ed9Sopenharmony_ci 1336d528ed9Sopenharmony_ci// Replaces characters in |replace_chars| from anywhere in |input| with 1346d528ed9Sopenharmony_ci// |replace_with|. Each character in |replace_chars| will be replaced with 1356d528ed9Sopenharmony_ci// the |replace_with| string. Returns true if any characters were replaced. 1366d528ed9Sopenharmony_ci// |replace_chars| must be null-terminated. 1376d528ed9Sopenharmony_ci// NOTE: Safe to use the same variable for both |input| and |output|. 1386d528ed9Sopenharmony_cibool ReplaceChars(const std::u16string& input, 1396d528ed9Sopenharmony_ci std::u16string_view replace_chars, 1406d528ed9Sopenharmony_ci const std::u16string& replace_with, 1416d528ed9Sopenharmony_ci std::u16string* output); 1426d528ed9Sopenharmony_cibool ReplaceChars(const std::string& input, 1436d528ed9Sopenharmony_ci std::string_view replace_chars, 1446d528ed9Sopenharmony_ci const std::string& replace_with, 1456d528ed9Sopenharmony_ci std::string* output); 1466d528ed9Sopenharmony_ci 1476d528ed9Sopenharmony_cienum TrimPositions { 1486d528ed9Sopenharmony_ci TRIM_NONE = 0, 1496d528ed9Sopenharmony_ci TRIM_LEADING = 1 << 0, 1506d528ed9Sopenharmony_ci TRIM_TRAILING = 1 << 1, 1516d528ed9Sopenharmony_ci TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, 1526d528ed9Sopenharmony_ci}; 1536d528ed9Sopenharmony_ci 1546d528ed9Sopenharmony_ci// Removes characters in |trim_chars| from the beginning and end of |input|. 1556d528ed9Sopenharmony_ci// The 8-bit version only works on 8-bit characters, not UTF-8. Returns true if 1566d528ed9Sopenharmony_ci// any characters were removed. 1576d528ed9Sopenharmony_ci// 1586d528ed9Sopenharmony_ci// It is safe to use the same variable for both |input| and |output| (this is 1596d528ed9Sopenharmony_ci// the normal usage to trim in-place). 1606d528ed9Sopenharmony_cibool TrimString(const std::u16string& input, 1616d528ed9Sopenharmony_ci std::u16string_view trim_chars, 1626d528ed9Sopenharmony_ci std::u16string* output); 1636d528ed9Sopenharmony_cibool TrimString(const std::string& input, 1646d528ed9Sopenharmony_ci std::string_view trim_chars, 1656d528ed9Sopenharmony_ci std::string* output); 1666d528ed9Sopenharmony_ci 1676d528ed9Sopenharmony_ci// std::string_view versions of the above. The returned pieces refer to the 1686d528ed9Sopenharmony_ci// original buffer. 1696d528ed9Sopenharmony_cistd::u16string_view TrimString(std::u16string_view input, 1706d528ed9Sopenharmony_ci std::u16string_view trim_chars, 1716d528ed9Sopenharmony_ci TrimPositions positions); 1726d528ed9Sopenharmony_cistd::string_view TrimString(std::string_view input, 1736d528ed9Sopenharmony_ci std::string_view trim_chars, 1746d528ed9Sopenharmony_ci TrimPositions positions); 1756d528ed9Sopenharmony_ci 1766d528ed9Sopenharmony_ci// Truncates a string to the nearest UTF-8 character that will leave 1776d528ed9Sopenharmony_ci// the string less than or equal to the specified byte size. 1786d528ed9Sopenharmony_civoid TruncateUTF8ToByteSize(const std::string& input, 1796d528ed9Sopenharmony_ci const size_t byte_size, 1806d528ed9Sopenharmony_ci std::string* output); 1816d528ed9Sopenharmony_ci 1826d528ed9Sopenharmony_ci// Trims any whitespace from either end of the input string. 1836d528ed9Sopenharmony_ci// 1846d528ed9Sopenharmony_ci// The std::string_view versions return a substring referencing the input 1856d528ed9Sopenharmony_ci// buffer. The ASCII versions look only for ASCII whitespace. 1866d528ed9Sopenharmony_ci// 1876d528ed9Sopenharmony_ci// The std::string versions return where whitespace was found. 1886d528ed9Sopenharmony_ci// NOTE: Safe to use the same variable for both input and output. 1896d528ed9Sopenharmony_ciTrimPositions TrimWhitespace(const std::u16string& input, 1906d528ed9Sopenharmony_ci TrimPositions positions, 1916d528ed9Sopenharmony_ci std::u16string* output); 1926d528ed9Sopenharmony_cistd::u16string_view TrimWhitespace(std::u16string_view input, 1936d528ed9Sopenharmony_ci TrimPositions positions); 1946d528ed9Sopenharmony_ciTrimPositions TrimWhitespaceASCII(const std::string& input, 1956d528ed9Sopenharmony_ci TrimPositions positions, 1966d528ed9Sopenharmony_ci std::string* output); 1976d528ed9Sopenharmony_cistd::string_view TrimWhitespaceASCII(std::string_view input, 1986d528ed9Sopenharmony_ci TrimPositions positions); 1996d528ed9Sopenharmony_ci 2006d528ed9Sopenharmony_ci// Searches for CR or LF characters. Removes all contiguous whitespace 2016d528ed9Sopenharmony_ci// strings that contain them. This is useful when trying to deal with text 2026d528ed9Sopenharmony_ci// copied from terminals. 2036d528ed9Sopenharmony_ci// Returns |text|, with the following three transformations: 2046d528ed9Sopenharmony_ci// (1) Leading and trailing whitespace is trimmed. 2056d528ed9Sopenharmony_ci// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace 2066d528ed9Sopenharmony_ci// sequences containing a CR or LF are trimmed. 2076d528ed9Sopenharmony_ci// (3) All other whitespace sequences are converted to single spaces. 2086d528ed9Sopenharmony_cistd::u16string CollapseWhitespace(const std::u16string& text, 2096d528ed9Sopenharmony_ci bool trim_sequences_with_line_breaks); 2106d528ed9Sopenharmony_cistd::string CollapseWhitespaceASCII(const std::string& text, 2116d528ed9Sopenharmony_ci bool trim_sequences_with_line_breaks); 2126d528ed9Sopenharmony_ci 2136d528ed9Sopenharmony_ci// Returns true if |input| is empty or contains only characters found in 2146d528ed9Sopenharmony_ci// |characters|. 2156d528ed9Sopenharmony_cibool ContainsOnlyChars(std::string_view input, std::string_view characters); 2166d528ed9Sopenharmony_cibool ContainsOnlyChars(std::u16string_view input, 2176d528ed9Sopenharmony_ci std::u16string_view characters); 2186d528ed9Sopenharmony_ci 2196d528ed9Sopenharmony_ci// Returns true if the specified string matches the criteria. How can a wide 2206d528ed9Sopenharmony_ci// string be 8-bit or UTF8? It contains only characters that are < 256 (in the 2216d528ed9Sopenharmony_ci// first case) or characters that use only 8-bits and whose 8-bit 2226d528ed9Sopenharmony_ci// representation looks like a UTF-8 string (the second case). 2236d528ed9Sopenharmony_ci// 2246d528ed9Sopenharmony_ci// Note that IsStringUTF8 checks not only if the input is structurally 2256d528ed9Sopenharmony_ci// valid but also if it doesn't contain any non-character codepoint 2266d528ed9Sopenharmony_ci// (e.g. U+FFFE). It's done on purpose because all the existing callers want 2276d528ed9Sopenharmony_ci// to have the maximum 'discriminating' power from other encodings. If 2286d528ed9Sopenharmony_ci// there's a use case for just checking the structural validity, we have to 2296d528ed9Sopenharmony_ci// add a new function for that. 2306d528ed9Sopenharmony_ci// 2316d528ed9Sopenharmony_ci// IsStringASCII assumes the input is likely all ASCII, and does not leave early 2326d528ed9Sopenharmony_ci// if it is not the case. 2336d528ed9Sopenharmony_cibool IsStringUTF8(std::string_view str); 2346d528ed9Sopenharmony_cibool IsStringASCII(std::string_view str); 2356d528ed9Sopenharmony_cibool IsStringASCII(std::u16string_view str); 2366d528ed9Sopenharmony_ci 2376d528ed9Sopenharmony_ci// Compare the lower-case form of the given string against the given 2386d528ed9Sopenharmony_ci// previously-lower-cased ASCII string (typically a constant). 2396d528ed9Sopenharmony_cibool LowerCaseEqualsASCII(std::string_view str, 2406d528ed9Sopenharmony_ci std::string_view lowecase_ascii); 2416d528ed9Sopenharmony_cibool LowerCaseEqualsASCII(std::u16string_view str, 2426d528ed9Sopenharmony_ci std::string_view lowecase_ascii); 2436d528ed9Sopenharmony_ci 2446d528ed9Sopenharmony_ci// Performs a case-sensitive string compare of the given 16-bit string against 2456d528ed9Sopenharmony_ci// the given 8-bit ASCII string (typically a constant). The behavior is 2466d528ed9Sopenharmony_ci// undefined if the |ascii| string is not ASCII. 2476d528ed9Sopenharmony_cibool EqualsASCII(std::u16string_view str, std::string_view ascii); 2486d528ed9Sopenharmony_ci 2496d528ed9Sopenharmony_ci// starts_with/ends_with for ASCII case-insensitive comparisons. 2506d528ed9Sopenharmony_ci// If you need to do Unicode-aware case-insensitive StartsWith/EndsWith, it's 2516d528ed9Sopenharmony_ci// best to call base::i18n::ToLower() or base::i18n::FoldCase() (see 2526d528ed9Sopenharmony_ci// base/i18n/case_conversion.h for usage advice) on the arguments, and then use 2536d528ed9Sopenharmony_ci// the results to a case-sensitive comparison. 2546d528ed9Sopenharmony_cibool StartsWithCaseInsensitiveASCII(std::string_view str, 2556d528ed9Sopenharmony_ci std::string_view search_for); 2566d528ed9Sopenharmony_cibool StartsWithCaseInsensitiveASCII(std::u16string_view str, 2576d528ed9Sopenharmony_ci std::u16string_view search_for); 2586d528ed9Sopenharmony_cibool EndsWithCaseInsensitiveASCII(std::string_view str, 2596d528ed9Sopenharmony_ci std::string_view search_for); 2606d528ed9Sopenharmony_cibool EndsWithCaseInsensitiveASCII(std::u16string_view str, 2616d528ed9Sopenharmony_ci std::u16string_view search_for); 2626d528ed9Sopenharmony_cibool starts_with(std::string_view str1, std::string_view str2); 2636d528ed9Sopenharmony_cibool ends_with(std::string_view str1, std::string_view str2); 2646d528ed9Sopenharmony_ci 2656d528ed9Sopenharmony_ci// Determines the type of ASCII character, independent of locale (the C 2666d528ed9Sopenharmony_ci// library versions will change based on locale). 2676d528ed9Sopenharmony_citemplate <typename Char> 2686d528ed9Sopenharmony_ciinline bool IsAsciiWhitespace(Char c) { 2696d528ed9Sopenharmony_ci return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 2706d528ed9Sopenharmony_ci} 2716d528ed9Sopenharmony_citemplate <typename Char> 2726d528ed9Sopenharmony_ciinline bool IsAsciiAlpha(Char c) { 2736d528ed9Sopenharmony_ci return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); 2746d528ed9Sopenharmony_ci} 2756d528ed9Sopenharmony_citemplate <typename Char> 2766d528ed9Sopenharmony_ciinline bool IsAsciiUpper(Char c) { 2776d528ed9Sopenharmony_ci return c >= 'A' && c <= 'Z'; 2786d528ed9Sopenharmony_ci} 2796d528ed9Sopenharmony_citemplate <typename Char> 2806d528ed9Sopenharmony_ciinline bool IsAsciiLower(Char c) { 2816d528ed9Sopenharmony_ci return c >= 'a' && c <= 'z'; 2826d528ed9Sopenharmony_ci} 2836d528ed9Sopenharmony_citemplate <typename Char> 2846d528ed9Sopenharmony_ciinline bool IsAsciiDigit(Char c) { 2856d528ed9Sopenharmony_ci return c >= '0' && c <= '9'; 2866d528ed9Sopenharmony_ci} 2876d528ed9Sopenharmony_ci 2886d528ed9Sopenharmony_citemplate <typename Char> 2896d528ed9Sopenharmony_ciinline bool IsHexDigit(Char c) { 2906d528ed9Sopenharmony_ci return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || 2916d528ed9Sopenharmony_ci (c >= 'a' && c <= 'f'); 2926d528ed9Sopenharmony_ci} 2936d528ed9Sopenharmony_ci 2946d528ed9Sopenharmony_ci// Returns the integer corresponding to the given hex character. For example: 2956d528ed9Sopenharmony_ci// '4' -> 4 2966d528ed9Sopenharmony_ci// 'a' -> 10 2976d528ed9Sopenharmony_ci// 'B' -> 11 2986d528ed9Sopenharmony_ci// Assumes the input is a valid hex character. DCHECKs in debug builds if not. 2996d528ed9Sopenharmony_cichar HexDigitToInt(char16_t c); 3006d528ed9Sopenharmony_ci 3016d528ed9Sopenharmony_ci// Returns true if it's a Unicode whitespace character. 3026d528ed9Sopenharmony_cibool IsUnicodeWhitespace(char16_t c); 3036d528ed9Sopenharmony_ci 3046d528ed9Sopenharmony_ci// Return a byte string in human-readable format with a unit suffix. Not 3056d528ed9Sopenharmony_ci// appropriate for use in any UI; use of FormatBytes and friends in ui/base is 3066d528ed9Sopenharmony_ci// highly recommended instead. TODO(avi): Figure out how to get callers to use 3076d528ed9Sopenharmony_ci// FormatBytes instead; remove this. 3086d528ed9Sopenharmony_cistd::u16string FormatBytesUnlocalized(int64_t bytes); 3096d528ed9Sopenharmony_ci 3106d528ed9Sopenharmony_ci// Starting at |start_offset| (usually 0), replace the first instance of 3116d528ed9Sopenharmony_ci// |find_this| with |replace_with|. 3126d528ed9Sopenharmony_civoid ReplaceFirstSubstringAfterOffset(std::u16string* str, 3136d528ed9Sopenharmony_ci size_t start_offset, 3146d528ed9Sopenharmony_ci std::u16string_view find_this, 3156d528ed9Sopenharmony_ci std::u16string_view replace_with); 3166d528ed9Sopenharmony_civoid ReplaceFirstSubstringAfterOffset(std::string* str, 3176d528ed9Sopenharmony_ci size_t start_offset, 3186d528ed9Sopenharmony_ci std::string_view find_this, 3196d528ed9Sopenharmony_ci std::string_view replace_with); 3206d528ed9Sopenharmony_ci 3216d528ed9Sopenharmony_ci// Starting at |start_offset| (usually 0), look through |str| and replace all 3226d528ed9Sopenharmony_ci// instances of |find_this| with |replace_with|. 3236d528ed9Sopenharmony_ci// 3246d528ed9Sopenharmony_ci// This does entire substrings; use std::replace in <algorithm> for single 3256d528ed9Sopenharmony_ci// characters, for example: 3266d528ed9Sopenharmony_ci// std::replace(str.begin(), str.end(), 'a', 'b'); 3276d528ed9Sopenharmony_civoid ReplaceSubstringsAfterOffset(std::u16string* str, 3286d528ed9Sopenharmony_ci size_t start_offset, 3296d528ed9Sopenharmony_ci std::u16string_view find_this, 3306d528ed9Sopenharmony_ci std::u16string_view replace_with); 3316d528ed9Sopenharmony_civoid ReplaceSubstringsAfterOffset(std::string* str, 3326d528ed9Sopenharmony_ci size_t start_offset, 3336d528ed9Sopenharmony_ci std::string_view find_this, 3346d528ed9Sopenharmony_ci std::string_view replace_with); 3356d528ed9Sopenharmony_ci 3366d528ed9Sopenharmony_ci// Reserves enough memory in |str| to accommodate |length_with_null| characters, 3376d528ed9Sopenharmony_ci// sets the size of |str| to |length_with_null - 1| characters, and returns a 3386d528ed9Sopenharmony_ci// pointer to the underlying contiguous array of characters. This is typically 3396d528ed9Sopenharmony_ci// used when calling a function that writes results into a character array, but 3406d528ed9Sopenharmony_ci// the caller wants the data to be managed by a string-like object. It is 3416d528ed9Sopenharmony_ci// convenient in that is can be used inline in the call, and fast in that it 3426d528ed9Sopenharmony_ci// avoids copying the results of the call from a char* into a string. 3436d528ed9Sopenharmony_ci// 3446d528ed9Sopenharmony_ci// |length_with_null| must be at least 2, since otherwise the underlying string 3456d528ed9Sopenharmony_ci// would have size 0, and trying to access &((*str)[0]) in that case can result 3466d528ed9Sopenharmony_ci// in a number of problems. 3476d528ed9Sopenharmony_ci// 3486d528ed9Sopenharmony_ci// Internally, this takes linear time because the resize() call 0-fills the 3496d528ed9Sopenharmony_ci// underlying array for potentially all 3506d528ed9Sopenharmony_ci// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we 3516d528ed9Sopenharmony_ci// could avoid this aspect of the resize() call, as we expect the caller to 3526d528ed9Sopenharmony_ci// immediately write over this memory, but there is no other way to set the size 3536d528ed9Sopenharmony_ci// of the string, and not doing that will mean people who access |str| rather 3546d528ed9Sopenharmony_ci// than str.c_str() will get back a string of whatever size |str| had on entry 3556d528ed9Sopenharmony_ci// to this function (probably 0). 3566d528ed9Sopenharmony_cichar* WriteInto(std::string* str, size_t length_with_null); 3576d528ed9Sopenharmony_cichar16_t* WriteInto(std::u16string* str, size_t length_with_null); 3586d528ed9Sopenharmony_ci 3596d528ed9Sopenharmony_ci// Does the opposite of SplitString()/SplitStringPiece(). Joins a vector or list 3606d528ed9Sopenharmony_ci// of strings into a single string, inserting |separator| (which may be empty) 3616d528ed9Sopenharmony_ci// in between all elements. 3626d528ed9Sopenharmony_ci// 3636d528ed9Sopenharmony_ci// If possible, callers should build a vector of std::string_views and use the 3646d528ed9Sopenharmony_ci// std::string_view variant, so that they do not create unnecessary copies of 3656d528ed9Sopenharmony_ci// strings. For example, instead of using SplitString, modifying the vector, 3666d528ed9Sopenharmony_ci// then using JoinString, use SplitStringPiece followed by JoinString so that no 3676d528ed9Sopenharmony_ci// copies of those strings are created until the final join operation. 3686d528ed9Sopenharmony_ci// 3696d528ed9Sopenharmony_ci// Use StrCat (in base/strings/strcat.h) if you don't need a separator. 3706d528ed9Sopenharmony_cistd::string JoinString(const std::vector<std::string>& parts, 3716d528ed9Sopenharmony_ci std::string_view separator); 3726d528ed9Sopenharmony_cistd::u16string JoinString(const std::vector<std::u16string>& parts, 3736d528ed9Sopenharmony_ci std::u16string_view separator); 3746d528ed9Sopenharmony_cistd::string JoinString(const std::vector<std::string_view>& parts, 3756d528ed9Sopenharmony_ci std::string_view separator); 3766d528ed9Sopenharmony_cistd::u16string JoinString(const std::vector<std::u16string_view>& parts, 3776d528ed9Sopenharmony_ci std::u16string_view separator); 3786d528ed9Sopenharmony_ci// Explicit initializer_list overloads are required to break ambiguity when used 3796d528ed9Sopenharmony_ci// with a literal initializer list (otherwise the compiler would not be able to 3806d528ed9Sopenharmony_ci// decide between the string and std::string_view overloads). 3816d528ed9Sopenharmony_cistd::string JoinString(std::initializer_list<std::string_view> parts, 3826d528ed9Sopenharmony_ci std::string_view separator); 3836d528ed9Sopenharmony_cistd::u16string JoinString(std::initializer_list<std::u16string_view> parts, 3846d528ed9Sopenharmony_ci std::u16string_view separator); 3856d528ed9Sopenharmony_ci 3866d528ed9Sopenharmony_ci// Replace $1-$2-$3..$9 in the format string with values from |subst|. 3876d528ed9Sopenharmony_ci// Additionally, any number of consecutive '$' characters is replaced by that 3886d528ed9Sopenharmony_ci// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be 3896d528ed9Sopenharmony_ci// NULL. This only allows you to use up to nine replacements. 3906d528ed9Sopenharmony_cistd::u16string ReplaceStringPlaceholders( 3916d528ed9Sopenharmony_ci const std::u16string& format_string, 3926d528ed9Sopenharmony_ci const std::vector<std::u16string>& subst, 3936d528ed9Sopenharmony_ci std::vector<size_t>* offsets); 3946d528ed9Sopenharmony_ci 3956d528ed9Sopenharmony_cistd::string ReplaceStringPlaceholders(std::string_view format_string, 3966d528ed9Sopenharmony_ci const std::vector<std::string>& subst, 3976d528ed9Sopenharmony_ci std::vector<size_t>* offsets); 3986d528ed9Sopenharmony_ci 3996d528ed9Sopenharmony_ci// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. 4006d528ed9Sopenharmony_cistd::u16string ReplaceStringPlaceholders(const std::u16string& format_string, 4016d528ed9Sopenharmony_ci const std::u16string& a, 4026d528ed9Sopenharmony_ci size_t* offset); 4036d528ed9Sopenharmony_ci 4046d528ed9Sopenharmony_ci} // namespace base 4056d528ed9Sopenharmony_ci 4066d528ed9Sopenharmony_ci#if defined(OS_WIN) 4076d528ed9Sopenharmony_ci#include "base/strings/string_util_win.h" 4086d528ed9Sopenharmony_ci#elif defined(OS_POSIX) || defined(OS_FUCHSIA) 4096d528ed9Sopenharmony_ci#include "base/strings/string_util_posix.h" 4106d528ed9Sopenharmony_ci#else 4116d528ed9Sopenharmony_ci#error Define string operations appropriately for your platform 4126d528ed9Sopenharmony_ci#endif 4136d528ed9Sopenharmony_ci 4146d528ed9Sopenharmony_ci#endif // BASE_STRINGS_STRING_UTIL_H_ 415