16d528ed9Sopenharmony_ci// Copyright 2013 The Chromium Authors. All rights reserved.
26d528ed9Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
36d528ed9Sopenharmony_ci// found in the LICENSE file.
46d528ed9Sopenharmony_ci//
56d528ed9Sopenharmony_ci// This file defines utility functions for working with strings.
66d528ed9Sopenharmony_ci
76d528ed9Sopenharmony_ci#ifndef BASE_STRINGS_STRING_UTIL_H_
86d528ed9Sopenharmony_ci#define BASE_STRINGS_STRING_UTIL_H_
96d528ed9Sopenharmony_ci
106d528ed9Sopenharmony_ci#include <ctype.h>
116d528ed9Sopenharmony_ci#include <stdarg.h>  // va_list
126d528ed9Sopenharmony_ci#include <stddef.h>
136d528ed9Sopenharmony_ci#include <stdint.h>
146d528ed9Sopenharmony_ci
156d528ed9Sopenharmony_ci#include <initializer_list>
166d528ed9Sopenharmony_ci#include <string>
176d528ed9Sopenharmony_ci#include <string_view>
186d528ed9Sopenharmony_ci#include <vector>
196d528ed9Sopenharmony_ci
206d528ed9Sopenharmony_ci#include "base/compiler_specific.h"
216d528ed9Sopenharmony_ci#include "util/build_config.h"
226d528ed9Sopenharmony_ci
236d528ed9Sopenharmony_cinamespace base {
246d528ed9Sopenharmony_ci
256d528ed9Sopenharmony_ci// C standard-library functions that aren't cross-platform are provided as
266d528ed9Sopenharmony_ci// "base::...", and their prototypes are listed below. These functions are
276d528ed9Sopenharmony_ci// then implemented as inline calls to the platform-specific equivalents in the
286d528ed9Sopenharmony_ci// platform-specific headers.
296d528ed9Sopenharmony_ci
306d528ed9Sopenharmony_ci// Wrapper for vsnprintf that always null-terminates and always returns the
316d528ed9Sopenharmony_ci// number of characters that would be in an untruncated formatted
326d528ed9Sopenharmony_ci// string, even when truncation occurs.
336d528ed9Sopenharmony_ciint vsnprintf(char* buffer, size_t size, const char* format, va_list arguments)
346d528ed9Sopenharmony_ci    PRINTF_FORMAT(3, 0);
356d528ed9Sopenharmony_ci
366d528ed9Sopenharmony_ci// Some of these implementations need to be inlined.
376d528ed9Sopenharmony_ci
386d528ed9Sopenharmony_ci// We separate the declaration from the implementation of this inline
396d528ed9Sopenharmony_ci// function just so the PRINTF_FORMAT works.
406d528ed9Sopenharmony_ciinline int snprintf(char* buffer,
416d528ed9Sopenharmony_ci                    size_t size,
426d528ed9Sopenharmony_ci                    _Printf_format_string_ const char* format,
436d528ed9Sopenharmony_ci                    ...) PRINTF_FORMAT(3, 4);
446d528ed9Sopenharmony_ciinline int snprintf(char* buffer,
456d528ed9Sopenharmony_ci                    size_t size,
466d528ed9Sopenharmony_ci                    _Printf_format_string_ const char* format,
476d528ed9Sopenharmony_ci                    ...) {
486d528ed9Sopenharmony_ci  va_list arguments;
496d528ed9Sopenharmony_ci  va_start(arguments, format);
506d528ed9Sopenharmony_ci  int result = vsnprintf(buffer, size, format, arguments);
516d528ed9Sopenharmony_ci  va_end(arguments);
526d528ed9Sopenharmony_ci  return result;
536d528ed9Sopenharmony_ci}
546d528ed9Sopenharmony_ci
556d528ed9Sopenharmony_ci// ASCII-specific tolower.  The standard library's tolower is locale sensitive,
566d528ed9Sopenharmony_ci// so we don't want to use it here.
576d528ed9Sopenharmony_ciinline char ToLowerASCII(char c) {
586d528ed9Sopenharmony_ci  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
596d528ed9Sopenharmony_ci}
606d528ed9Sopenharmony_ciinline char16_t ToLowerASCII(char16_t c) {
616d528ed9Sopenharmony_ci  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
626d528ed9Sopenharmony_ci}
636d528ed9Sopenharmony_ci
646d528ed9Sopenharmony_ci// ASCII-specific toupper.  The standard library's toupper is locale sensitive,
656d528ed9Sopenharmony_ci// so we don't want to use it here.
666d528ed9Sopenharmony_ciinline char ToUpperASCII(char c) {
676d528ed9Sopenharmony_ci  return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
686d528ed9Sopenharmony_ci}
696d528ed9Sopenharmony_ciinline char16_t ToUpperASCII(char16_t c) {
706d528ed9Sopenharmony_ci  return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
716d528ed9Sopenharmony_ci}
726d528ed9Sopenharmony_ci
736d528ed9Sopenharmony_ci// Converts the given string to it's ASCII-lowercase equivalent.
746d528ed9Sopenharmony_cistd::string ToLowerASCII(std::string_view str);
756d528ed9Sopenharmony_cistd::u16string ToLowerASCII(std::u16string_view str);
766d528ed9Sopenharmony_ci
776d528ed9Sopenharmony_ci// Converts the given string to it's ASCII-uppercase equivalent.
786d528ed9Sopenharmony_cistd::string ToUpperASCII(std::string_view str);
796d528ed9Sopenharmony_cistd::u16string ToUpperASCII(std::u16string_view str);
806d528ed9Sopenharmony_ci
816d528ed9Sopenharmony_ci// Functor for case-insensitive ASCII comparisons for STL algorithms like
826d528ed9Sopenharmony_ci// std::search.
836d528ed9Sopenharmony_ci//
846d528ed9Sopenharmony_ci// Note that a full Unicode version of this functor is not possible to write
856d528ed9Sopenharmony_ci// because case mappings might change the number of characters, depend on
866d528ed9Sopenharmony_ci// context (combining accents), and require handling UTF-16. If you need
876d528ed9Sopenharmony_ci// proper Unicode support, use base::i18n::ToLower/FoldCase and then just
886d528ed9Sopenharmony_ci// use a normal operator== on the result.
896d528ed9Sopenharmony_citemplate <typename Char>
906d528ed9Sopenharmony_cistruct CaseInsensitiveCompareASCII {
916d528ed9Sopenharmony_ci public:
926d528ed9Sopenharmony_ci  bool operator()(Char x, Char y) const {
936d528ed9Sopenharmony_ci    return ToLowerASCII(x) == ToLowerASCII(y);
946d528ed9Sopenharmony_ci  }
956d528ed9Sopenharmony_ci};
966d528ed9Sopenharmony_ci
976d528ed9Sopenharmony_ci// Like strcasecmp for case-insensitive ASCII characters only. Returns:
986d528ed9Sopenharmony_ci//   -1  (a < b)
996d528ed9Sopenharmony_ci//    0  (a == b)
1006d528ed9Sopenharmony_ci//    1  (a > b)
1016d528ed9Sopenharmony_ci// (unlike strcasecmp which can return values greater or less than 1/-1). For
1026d528ed9Sopenharmony_ci// full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase
1036d528ed9Sopenharmony_ci// and then just call the normal string operators on the result.
1046d528ed9Sopenharmony_ciint CompareCaseInsensitiveASCII(std::string_view a, std::string_view b);
1056d528ed9Sopenharmony_ciint CompareCaseInsensitiveASCII(std::u16string_view a, std::u16string_view b);
1066d528ed9Sopenharmony_ci
1076d528ed9Sopenharmony_ci// Equality for ASCII case-insensitive comparisons. For full Unicode support,
1086d528ed9Sopenharmony_ci// use base::i18n::ToLower or base::i18h::FoldCase and then compare with either
1096d528ed9Sopenharmony_ci// == or !=.
1106d528ed9Sopenharmony_cibool EqualsCaseInsensitiveASCII(std::string_view a, std::string_view b);
1116d528ed9Sopenharmony_cibool EqualsCaseInsensitiveASCII(std::u16string_view a, std::u16string_view b);
1126d528ed9Sopenharmony_ci
1136d528ed9Sopenharmony_ci// Contains the set of characters representing whitespace in the corresponding
1146d528ed9Sopenharmony_ci// encoding. Null-terminated. The ASCII versions are the whitespaces as defined
1156d528ed9Sopenharmony_ci// by HTML5, and don't include control characters.
1166d528ed9Sopenharmony_ciextern const char16_t kWhitespaceUTF16[];  // Includes Unicode.
1176d528ed9Sopenharmony_ciextern const char kWhitespaceASCII[];
1186d528ed9Sopenharmony_ciextern const char16_t kWhitespaceASCIIAs16[];  // No unicode.
1196d528ed9Sopenharmony_ci
1206d528ed9Sopenharmony_ci// Null-terminated string representing the UTF-8 byte order mark.
1216d528ed9Sopenharmony_ciextern const char kUtf8ByteOrderMark[];
1226d528ed9Sopenharmony_ci
1236d528ed9Sopenharmony_ci// Removes characters in |remove_chars| from anywhere in |input|.  Returns true
1246d528ed9Sopenharmony_ci// if any characters were removed.  |remove_chars| must be null-terminated.
1256d528ed9Sopenharmony_ci// NOTE: Safe to use the same variable for both |input| and |output|.
1266d528ed9Sopenharmony_cibool RemoveChars(const std::u16string& input,
1276d528ed9Sopenharmony_ci                 std::u16string_view remove_chars,
1286d528ed9Sopenharmony_ci                 std::u16string* output);
1296d528ed9Sopenharmony_cibool RemoveChars(const std::string& input,
1306d528ed9Sopenharmony_ci                 std::string_view remove_chars,
1316d528ed9Sopenharmony_ci                 std::string* output);
1326d528ed9Sopenharmony_ci
1336d528ed9Sopenharmony_ci// Replaces characters in |replace_chars| from anywhere in |input| with
1346d528ed9Sopenharmony_ci// |replace_with|.  Each character in |replace_chars| will be replaced with
1356d528ed9Sopenharmony_ci// the |replace_with| string.  Returns true if any characters were replaced.
1366d528ed9Sopenharmony_ci// |replace_chars| must be null-terminated.
1376d528ed9Sopenharmony_ci// NOTE: Safe to use the same variable for both |input| and |output|.
1386d528ed9Sopenharmony_cibool ReplaceChars(const std::u16string& input,
1396d528ed9Sopenharmony_ci                  std::u16string_view replace_chars,
1406d528ed9Sopenharmony_ci                  const std::u16string& replace_with,
1416d528ed9Sopenharmony_ci                  std::u16string* output);
1426d528ed9Sopenharmony_cibool ReplaceChars(const std::string& input,
1436d528ed9Sopenharmony_ci                  std::string_view replace_chars,
1446d528ed9Sopenharmony_ci                  const std::string& replace_with,
1456d528ed9Sopenharmony_ci                  std::string* output);
1466d528ed9Sopenharmony_ci
1476d528ed9Sopenharmony_cienum TrimPositions {
1486d528ed9Sopenharmony_ci  TRIM_NONE = 0,
1496d528ed9Sopenharmony_ci  TRIM_LEADING = 1 << 0,
1506d528ed9Sopenharmony_ci  TRIM_TRAILING = 1 << 1,
1516d528ed9Sopenharmony_ci  TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,
1526d528ed9Sopenharmony_ci};
1536d528ed9Sopenharmony_ci
1546d528ed9Sopenharmony_ci// Removes characters in |trim_chars| from the beginning and end of |input|.
1556d528ed9Sopenharmony_ci// The 8-bit version only works on 8-bit characters, not UTF-8. Returns true if
1566d528ed9Sopenharmony_ci// any characters were removed.
1576d528ed9Sopenharmony_ci//
1586d528ed9Sopenharmony_ci// It is safe to use the same variable for both |input| and |output| (this is
1596d528ed9Sopenharmony_ci// the normal usage to trim in-place).
1606d528ed9Sopenharmony_cibool TrimString(const std::u16string& input,
1616d528ed9Sopenharmony_ci                std::u16string_view trim_chars,
1626d528ed9Sopenharmony_ci                std::u16string* output);
1636d528ed9Sopenharmony_cibool TrimString(const std::string& input,
1646d528ed9Sopenharmony_ci                std::string_view trim_chars,
1656d528ed9Sopenharmony_ci                std::string* output);
1666d528ed9Sopenharmony_ci
1676d528ed9Sopenharmony_ci// std::string_view versions of the above. The returned pieces refer to the
1686d528ed9Sopenharmony_ci// original buffer.
1696d528ed9Sopenharmony_cistd::u16string_view TrimString(std::u16string_view input,
1706d528ed9Sopenharmony_ci                               std::u16string_view trim_chars,
1716d528ed9Sopenharmony_ci                               TrimPositions positions);
1726d528ed9Sopenharmony_cistd::string_view TrimString(std::string_view input,
1736d528ed9Sopenharmony_ci                            std::string_view trim_chars,
1746d528ed9Sopenharmony_ci                            TrimPositions positions);
1756d528ed9Sopenharmony_ci
1766d528ed9Sopenharmony_ci// Truncates a string to the nearest UTF-8 character that will leave
1776d528ed9Sopenharmony_ci// the string less than or equal to the specified byte size.
1786d528ed9Sopenharmony_civoid TruncateUTF8ToByteSize(const std::string& input,
1796d528ed9Sopenharmony_ci                            const size_t byte_size,
1806d528ed9Sopenharmony_ci                            std::string* output);
1816d528ed9Sopenharmony_ci
1826d528ed9Sopenharmony_ci// Trims any whitespace from either end of the input string.
1836d528ed9Sopenharmony_ci//
1846d528ed9Sopenharmony_ci// The std::string_view versions return a substring referencing the input
1856d528ed9Sopenharmony_ci// buffer. The ASCII versions look only for ASCII whitespace.
1866d528ed9Sopenharmony_ci//
1876d528ed9Sopenharmony_ci// The std::string versions return where whitespace was found.
1886d528ed9Sopenharmony_ci// NOTE: Safe to use the same variable for both input and output.
1896d528ed9Sopenharmony_ciTrimPositions TrimWhitespace(const std::u16string& input,
1906d528ed9Sopenharmony_ci                             TrimPositions positions,
1916d528ed9Sopenharmony_ci                             std::u16string* output);
1926d528ed9Sopenharmony_cistd::u16string_view TrimWhitespace(std::u16string_view input,
1936d528ed9Sopenharmony_ci                                   TrimPositions positions);
1946d528ed9Sopenharmony_ciTrimPositions TrimWhitespaceASCII(const std::string& input,
1956d528ed9Sopenharmony_ci                                  TrimPositions positions,
1966d528ed9Sopenharmony_ci                                  std::string* output);
1976d528ed9Sopenharmony_cistd::string_view TrimWhitespaceASCII(std::string_view input,
1986d528ed9Sopenharmony_ci                                     TrimPositions positions);
1996d528ed9Sopenharmony_ci
2006d528ed9Sopenharmony_ci// Searches for CR or LF characters.  Removes all contiguous whitespace
2016d528ed9Sopenharmony_ci// strings that contain them.  This is useful when trying to deal with text
2026d528ed9Sopenharmony_ci// copied from terminals.
2036d528ed9Sopenharmony_ci// Returns |text|, with the following three transformations:
2046d528ed9Sopenharmony_ci// (1) Leading and trailing whitespace is trimmed.
2056d528ed9Sopenharmony_ci// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace
2066d528ed9Sopenharmony_ci//     sequences containing a CR or LF are trimmed.
2076d528ed9Sopenharmony_ci// (3) All other whitespace sequences are converted to single spaces.
2086d528ed9Sopenharmony_cistd::u16string CollapseWhitespace(const std::u16string& text,
2096d528ed9Sopenharmony_ci                                  bool trim_sequences_with_line_breaks);
2106d528ed9Sopenharmony_cistd::string CollapseWhitespaceASCII(const std::string& text,
2116d528ed9Sopenharmony_ci                                    bool trim_sequences_with_line_breaks);
2126d528ed9Sopenharmony_ci
2136d528ed9Sopenharmony_ci// Returns true if |input| is empty or contains only characters found in
2146d528ed9Sopenharmony_ci// |characters|.
2156d528ed9Sopenharmony_cibool ContainsOnlyChars(std::string_view input, std::string_view characters);
2166d528ed9Sopenharmony_cibool ContainsOnlyChars(std::u16string_view input,
2176d528ed9Sopenharmony_ci                       std::u16string_view characters);
2186d528ed9Sopenharmony_ci
2196d528ed9Sopenharmony_ci// Returns true if the specified string matches the criteria. How can a wide
2206d528ed9Sopenharmony_ci// string be 8-bit or UTF8? It contains only characters that are < 256 (in the
2216d528ed9Sopenharmony_ci// first case) or characters that use only 8-bits and whose 8-bit
2226d528ed9Sopenharmony_ci// representation looks like a UTF-8 string (the second case).
2236d528ed9Sopenharmony_ci//
2246d528ed9Sopenharmony_ci// Note that IsStringUTF8 checks not only if the input is structurally
2256d528ed9Sopenharmony_ci// valid but also if it doesn't contain any non-character codepoint
2266d528ed9Sopenharmony_ci// (e.g. U+FFFE). It's done on purpose because all the existing callers want
2276d528ed9Sopenharmony_ci// to have the maximum 'discriminating' power from other encodings. If
2286d528ed9Sopenharmony_ci// there's a use case for just checking the structural validity, we have to
2296d528ed9Sopenharmony_ci// add a new function for that.
2306d528ed9Sopenharmony_ci//
2316d528ed9Sopenharmony_ci// IsStringASCII assumes the input is likely all ASCII, and does not leave early
2326d528ed9Sopenharmony_ci// if it is not the case.
2336d528ed9Sopenharmony_cibool IsStringUTF8(std::string_view str);
2346d528ed9Sopenharmony_cibool IsStringASCII(std::string_view str);
2356d528ed9Sopenharmony_cibool IsStringASCII(std::u16string_view str);
2366d528ed9Sopenharmony_ci
2376d528ed9Sopenharmony_ci// Compare the lower-case form of the given string against the given
2386d528ed9Sopenharmony_ci// previously-lower-cased ASCII string (typically a constant).
2396d528ed9Sopenharmony_cibool LowerCaseEqualsASCII(std::string_view str,
2406d528ed9Sopenharmony_ci                          std::string_view lowecase_ascii);
2416d528ed9Sopenharmony_cibool LowerCaseEqualsASCII(std::u16string_view str,
2426d528ed9Sopenharmony_ci                          std::string_view lowecase_ascii);
2436d528ed9Sopenharmony_ci
2446d528ed9Sopenharmony_ci// Performs a case-sensitive string compare of the given 16-bit string against
2456d528ed9Sopenharmony_ci// the given 8-bit ASCII string (typically a constant). The behavior is
2466d528ed9Sopenharmony_ci// undefined if the |ascii| string is not ASCII.
2476d528ed9Sopenharmony_cibool EqualsASCII(std::u16string_view str, std::string_view ascii);
2486d528ed9Sopenharmony_ci
2496d528ed9Sopenharmony_ci// starts_with/ends_with for ASCII case-insensitive comparisons.
2506d528ed9Sopenharmony_ci// If you need to do Unicode-aware case-insensitive StartsWith/EndsWith, it's
2516d528ed9Sopenharmony_ci// best to call base::i18n::ToLower() or base::i18n::FoldCase() (see
2526d528ed9Sopenharmony_ci// base/i18n/case_conversion.h for usage advice) on the arguments, and then use
2536d528ed9Sopenharmony_ci// the results to a case-sensitive comparison.
2546d528ed9Sopenharmony_cibool StartsWithCaseInsensitiveASCII(std::string_view str,
2556d528ed9Sopenharmony_ci                                    std::string_view search_for);
2566d528ed9Sopenharmony_cibool StartsWithCaseInsensitiveASCII(std::u16string_view str,
2576d528ed9Sopenharmony_ci                                    std::u16string_view search_for);
2586d528ed9Sopenharmony_cibool EndsWithCaseInsensitiveASCII(std::string_view str,
2596d528ed9Sopenharmony_ci                                  std::string_view search_for);
2606d528ed9Sopenharmony_cibool EndsWithCaseInsensitiveASCII(std::u16string_view str,
2616d528ed9Sopenharmony_ci                                  std::u16string_view search_for);
2626d528ed9Sopenharmony_cibool starts_with(std::string_view str1, std::string_view str2);
2636d528ed9Sopenharmony_cibool ends_with(std::string_view str1, std::string_view str2);
2646d528ed9Sopenharmony_ci
2656d528ed9Sopenharmony_ci// Determines the type of ASCII character, independent of locale (the C
2666d528ed9Sopenharmony_ci// library versions will change based on locale).
2676d528ed9Sopenharmony_citemplate <typename Char>
2686d528ed9Sopenharmony_ciinline bool IsAsciiWhitespace(Char c) {
2696d528ed9Sopenharmony_ci  return c == ' ' || c == '\r' || c == '\n' || c == '\t';
2706d528ed9Sopenharmony_ci}
2716d528ed9Sopenharmony_citemplate <typename Char>
2726d528ed9Sopenharmony_ciinline bool IsAsciiAlpha(Char c) {
2736d528ed9Sopenharmony_ci  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
2746d528ed9Sopenharmony_ci}
2756d528ed9Sopenharmony_citemplate <typename Char>
2766d528ed9Sopenharmony_ciinline bool IsAsciiUpper(Char c) {
2776d528ed9Sopenharmony_ci  return c >= 'A' && c <= 'Z';
2786d528ed9Sopenharmony_ci}
2796d528ed9Sopenharmony_citemplate <typename Char>
2806d528ed9Sopenharmony_ciinline bool IsAsciiLower(Char c) {
2816d528ed9Sopenharmony_ci  return c >= 'a' && c <= 'z';
2826d528ed9Sopenharmony_ci}
2836d528ed9Sopenharmony_citemplate <typename Char>
2846d528ed9Sopenharmony_ciinline bool IsAsciiDigit(Char c) {
2856d528ed9Sopenharmony_ci  return c >= '0' && c <= '9';
2866d528ed9Sopenharmony_ci}
2876d528ed9Sopenharmony_ci
2886d528ed9Sopenharmony_citemplate <typename Char>
2896d528ed9Sopenharmony_ciinline bool IsHexDigit(Char c) {
2906d528ed9Sopenharmony_ci  return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') ||
2916d528ed9Sopenharmony_ci         (c >= 'a' && c <= 'f');
2926d528ed9Sopenharmony_ci}
2936d528ed9Sopenharmony_ci
2946d528ed9Sopenharmony_ci// Returns the integer corresponding to the given hex character. For example:
2956d528ed9Sopenharmony_ci//    '4' -> 4
2966d528ed9Sopenharmony_ci//    'a' -> 10
2976d528ed9Sopenharmony_ci//    'B' -> 11
2986d528ed9Sopenharmony_ci// Assumes the input is a valid hex character. DCHECKs in debug builds if not.
2996d528ed9Sopenharmony_cichar HexDigitToInt(char16_t c);
3006d528ed9Sopenharmony_ci
3016d528ed9Sopenharmony_ci// Returns true if it's a Unicode whitespace character.
3026d528ed9Sopenharmony_cibool IsUnicodeWhitespace(char16_t c);
3036d528ed9Sopenharmony_ci
3046d528ed9Sopenharmony_ci// Return a byte string in human-readable format with a unit suffix. Not
3056d528ed9Sopenharmony_ci// appropriate for use in any UI; use of FormatBytes and friends in ui/base is
3066d528ed9Sopenharmony_ci// highly recommended instead. TODO(avi): Figure out how to get callers to use
3076d528ed9Sopenharmony_ci// FormatBytes instead; remove this.
3086d528ed9Sopenharmony_cistd::u16string FormatBytesUnlocalized(int64_t bytes);
3096d528ed9Sopenharmony_ci
3106d528ed9Sopenharmony_ci// Starting at |start_offset| (usually 0), replace the first instance of
3116d528ed9Sopenharmony_ci// |find_this| with |replace_with|.
3126d528ed9Sopenharmony_civoid ReplaceFirstSubstringAfterOffset(std::u16string* str,
3136d528ed9Sopenharmony_ci                                      size_t start_offset,
3146d528ed9Sopenharmony_ci                                      std::u16string_view find_this,
3156d528ed9Sopenharmony_ci                                      std::u16string_view replace_with);
3166d528ed9Sopenharmony_civoid ReplaceFirstSubstringAfterOffset(std::string* str,
3176d528ed9Sopenharmony_ci                                      size_t start_offset,
3186d528ed9Sopenharmony_ci                                      std::string_view find_this,
3196d528ed9Sopenharmony_ci                                      std::string_view replace_with);
3206d528ed9Sopenharmony_ci
3216d528ed9Sopenharmony_ci// Starting at |start_offset| (usually 0), look through |str| and replace all
3226d528ed9Sopenharmony_ci// instances of |find_this| with |replace_with|.
3236d528ed9Sopenharmony_ci//
3246d528ed9Sopenharmony_ci// This does entire substrings; use std::replace in <algorithm> for single
3256d528ed9Sopenharmony_ci// characters, for example:
3266d528ed9Sopenharmony_ci//   std::replace(str.begin(), str.end(), 'a', 'b');
3276d528ed9Sopenharmony_civoid ReplaceSubstringsAfterOffset(std::u16string* str,
3286d528ed9Sopenharmony_ci                                  size_t start_offset,
3296d528ed9Sopenharmony_ci                                  std::u16string_view find_this,
3306d528ed9Sopenharmony_ci                                  std::u16string_view replace_with);
3316d528ed9Sopenharmony_civoid ReplaceSubstringsAfterOffset(std::string* str,
3326d528ed9Sopenharmony_ci                                  size_t start_offset,
3336d528ed9Sopenharmony_ci                                  std::string_view find_this,
3346d528ed9Sopenharmony_ci                                  std::string_view replace_with);
3356d528ed9Sopenharmony_ci
3366d528ed9Sopenharmony_ci// Reserves enough memory in |str| to accommodate |length_with_null| characters,
3376d528ed9Sopenharmony_ci// sets the size of |str| to |length_with_null - 1| characters, and returns a
3386d528ed9Sopenharmony_ci// pointer to the underlying contiguous array of characters.  This is typically
3396d528ed9Sopenharmony_ci// used when calling a function that writes results into a character array, but
3406d528ed9Sopenharmony_ci// the caller wants the data to be managed by a string-like object.  It is
3416d528ed9Sopenharmony_ci// convenient in that is can be used inline in the call, and fast in that it
3426d528ed9Sopenharmony_ci// avoids copying the results of the call from a char* into a string.
3436d528ed9Sopenharmony_ci//
3446d528ed9Sopenharmony_ci// |length_with_null| must be at least 2, since otherwise the underlying string
3456d528ed9Sopenharmony_ci// would have size 0, and trying to access &((*str)[0]) in that case can result
3466d528ed9Sopenharmony_ci// in a number of problems.
3476d528ed9Sopenharmony_ci//
3486d528ed9Sopenharmony_ci// Internally, this takes linear time because the resize() call 0-fills the
3496d528ed9Sopenharmony_ci// underlying array for potentially all
3506d528ed9Sopenharmony_ci// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes.  Ideally we
3516d528ed9Sopenharmony_ci// could avoid this aspect of the resize() call, as we expect the caller to
3526d528ed9Sopenharmony_ci// immediately write over this memory, but there is no other way to set the size
3536d528ed9Sopenharmony_ci// of the string, and not doing that will mean people who access |str| rather
3546d528ed9Sopenharmony_ci// than str.c_str() will get back a string of whatever size |str| had on entry
3556d528ed9Sopenharmony_ci// to this function (probably 0).
3566d528ed9Sopenharmony_cichar* WriteInto(std::string* str, size_t length_with_null);
3576d528ed9Sopenharmony_cichar16_t* WriteInto(std::u16string* str, size_t length_with_null);
3586d528ed9Sopenharmony_ci
3596d528ed9Sopenharmony_ci// Does the opposite of SplitString()/SplitStringPiece(). Joins a vector or list
3606d528ed9Sopenharmony_ci// of strings into a single string, inserting |separator| (which may be empty)
3616d528ed9Sopenharmony_ci// in between all elements.
3626d528ed9Sopenharmony_ci//
3636d528ed9Sopenharmony_ci// If possible, callers should build a vector of std::string_views and use the
3646d528ed9Sopenharmony_ci// std::string_view variant, so that they do not create unnecessary copies of
3656d528ed9Sopenharmony_ci// strings. For example, instead of using SplitString, modifying the vector,
3666d528ed9Sopenharmony_ci// then using JoinString, use SplitStringPiece followed by JoinString so that no
3676d528ed9Sopenharmony_ci// copies of those strings are created until the final join operation.
3686d528ed9Sopenharmony_ci//
3696d528ed9Sopenharmony_ci// Use StrCat (in base/strings/strcat.h) if you don't need a separator.
3706d528ed9Sopenharmony_cistd::string JoinString(const std::vector<std::string>& parts,
3716d528ed9Sopenharmony_ci                       std::string_view separator);
3726d528ed9Sopenharmony_cistd::u16string JoinString(const std::vector<std::u16string>& parts,
3736d528ed9Sopenharmony_ci                          std::u16string_view separator);
3746d528ed9Sopenharmony_cistd::string JoinString(const std::vector<std::string_view>& parts,
3756d528ed9Sopenharmony_ci                       std::string_view separator);
3766d528ed9Sopenharmony_cistd::u16string JoinString(const std::vector<std::u16string_view>& parts,
3776d528ed9Sopenharmony_ci                          std::u16string_view separator);
3786d528ed9Sopenharmony_ci// Explicit initializer_list overloads are required to break ambiguity when used
3796d528ed9Sopenharmony_ci// with a literal initializer list (otherwise the compiler would not be able to
3806d528ed9Sopenharmony_ci// decide between the string and std::string_view overloads).
3816d528ed9Sopenharmony_cistd::string JoinString(std::initializer_list<std::string_view> parts,
3826d528ed9Sopenharmony_ci                       std::string_view separator);
3836d528ed9Sopenharmony_cistd::u16string JoinString(std::initializer_list<std::u16string_view> parts,
3846d528ed9Sopenharmony_ci                          std::u16string_view separator);
3856d528ed9Sopenharmony_ci
3866d528ed9Sopenharmony_ci// Replace $1-$2-$3..$9 in the format string with values from |subst|.
3876d528ed9Sopenharmony_ci// Additionally, any number of consecutive '$' characters is replaced by that
3886d528ed9Sopenharmony_ci// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be
3896d528ed9Sopenharmony_ci// NULL. This only allows you to use up to nine replacements.
3906d528ed9Sopenharmony_cistd::u16string ReplaceStringPlaceholders(
3916d528ed9Sopenharmony_ci    const std::u16string& format_string,
3926d528ed9Sopenharmony_ci    const std::vector<std::u16string>& subst,
3936d528ed9Sopenharmony_ci    std::vector<size_t>* offsets);
3946d528ed9Sopenharmony_ci
3956d528ed9Sopenharmony_cistd::string ReplaceStringPlaceholders(std::string_view format_string,
3966d528ed9Sopenharmony_ci                                      const std::vector<std::string>& subst,
3976d528ed9Sopenharmony_ci                                      std::vector<size_t>* offsets);
3986d528ed9Sopenharmony_ci
3996d528ed9Sopenharmony_ci// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL.
4006d528ed9Sopenharmony_cistd::u16string ReplaceStringPlaceholders(const std::u16string& format_string,
4016d528ed9Sopenharmony_ci                                         const std::u16string& a,
4026d528ed9Sopenharmony_ci                                         size_t* offset);
4036d528ed9Sopenharmony_ci
4046d528ed9Sopenharmony_ci}  // namespace base
4056d528ed9Sopenharmony_ci
4066d528ed9Sopenharmony_ci#if defined(OS_WIN)
4076d528ed9Sopenharmony_ci#include "base/strings/string_util_win.h"
4086d528ed9Sopenharmony_ci#elif defined(OS_POSIX) || defined(OS_FUCHSIA)
4096d528ed9Sopenharmony_ci#include "base/strings/string_util_posix.h"
4106d528ed9Sopenharmony_ci#else
4116d528ed9Sopenharmony_ci#error Define string operations appropriately for your platform
4126d528ed9Sopenharmony_ci#endif
4136d528ed9Sopenharmony_ci
4146d528ed9Sopenharmony_ci#endif  // BASE_STRINGS_STRING_UTIL_H_
415