base/strings/string_util.h

6d528ed9Sopenharmony_ci// Copyright 2013 The Chromium Authors. All rights reserved.
6d528ed9Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
6d528ed9Sopenharmony_ci// found in the LICENSE file.
6d528ed9Sopenharmony_ci//
6d528ed9Sopenharmony_ci// This file defines utility functions for working with strings.
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci#ifndef BASE_STRINGS_STRING_UTIL_H_
6d528ed9Sopenharmony_ci#define BASE_STRINGS_STRING_UTIL_H_
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci#include <ctype.h>
6d528ed9Sopenharmony_ci#include <stdarg.h>  // va_list
6d528ed9Sopenharmony_ci#include <stddef.h>
6d528ed9Sopenharmony_ci#include <stdint.h>
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci#include <initializer_list>
6d528ed9Sopenharmony_ci#include <string>
6d528ed9Sopenharmony_ci#include <string_view>
6d528ed9Sopenharmony_ci#include <vector>
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci#include "base/compiler_specific.h"
6d528ed9Sopenharmony_ci#include "util/build_config.h"
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_cinamespace base {
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// C standard-library functions that aren't cross-platform are provided as
6d528ed9Sopenharmony_ci// "base::...", and their prototypes are listed below. These functions are
6d528ed9Sopenharmony_ci// then implemented as inline calls to the platform-specific equivalents in the
6d528ed9Sopenharmony_ci// platform-specific headers.
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Wrapper for vsnprintf that always null-terminates and always returns the
6d528ed9Sopenharmony_ci// number of characters that would be in an untruncated formatted
6d528ed9Sopenharmony_ci// string, even when truncation occurs.
6d528ed9Sopenharmony_ciint vsnprintf(char* buffer, size_t size, const char* format, va_list arguments)
6d528ed9Sopenharmony_ci    PRINTF_FORMAT(3, 0);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Some of these implementations need to be inlined.
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// We separate the declaration from the implementation of this inline
6d528ed9Sopenharmony_ci// function just so the PRINTF_FORMAT works.
6d528ed9Sopenharmony_ciinline int snprintf(char* buffer,
6d528ed9Sopenharmony_ci                    size_t size,
6d528ed9Sopenharmony_ci                    _Printf_format_string_ const char* format,
6d528ed9Sopenharmony_ci                    ...) PRINTF_FORMAT(3, 4);
6d528ed9Sopenharmony_ciinline int snprintf(char* buffer,
6d528ed9Sopenharmony_ci                    size_t size,
6d528ed9Sopenharmony_ci                    _Printf_format_string_ const char* format,
6d528ed9Sopenharmony_ci                    ...) {
6d528ed9Sopenharmony_ci  va_list arguments;
6d528ed9Sopenharmony_ci  va_start(arguments, format);
6d528ed9Sopenharmony_ci  int result = vsnprintf(buffer, size, format, arguments);
6d528ed9Sopenharmony_ci  va_end(arguments);
6d528ed9Sopenharmony_ci  return result;
6d528ed9Sopenharmony_ci}
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// ASCII-specific tolower.  The standard library's tolower is locale sensitive,
6d528ed9Sopenharmony_ci// so we don't want to use it here.
6d528ed9Sopenharmony_ciinline char ToLowerASCII(char c) {
6d528ed9Sopenharmony_ci  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
6d528ed9Sopenharmony_ci}
6d528ed9Sopenharmony_ciinline char16_t ToLowerASCII(char16_t c) {
6d528ed9Sopenharmony_ci  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
6d528ed9Sopenharmony_ci}
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// ASCII-specific toupper.  The standard library's toupper is locale sensitive,
6d528ed9Sopenharmony_ci// so we don't want to use it here.
6d528ed9Sopenharmony_ciinline char ToUpperASCII(char c) {
6d528ed9Sopenharmony_ci  return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
6d528ed9Sopenharmony_ci}
6d528ed9Sopenharmony_ciinline char16_t ToUpperASCII(char16_t c) {
6d528ed9Sopenharmony_ci  return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
6d528ed9Sopenharmony_ci}
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Converts the given string to it's ASCII-lowercase equivalent.
6d528ed9Sopenharmony_cistd::string ToLowerASCII(std::string_view str);
6d528ed9Sopenharmony_cistd::u16string ToLowerASCII(std::u16string_view str);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Converts the given string to it's ASCII-uppercase equivalent.
6d528ed9Sopenharmony_cistd::string ToUpperASCII(std::string_view str);
6d528ed9Sopenharmony_cistd::u16string ToUpperASCII(std::u16string_view str);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Functor for case-insensitive ASCII comparisons for STL algorithms like
6d528ed9Sopenharmony_ci// std::search.
6d528ed9Sopenharmony_ci//
6d528ed9Sopenharmony_ci// Note that a full Unicode version of this functor is not possible to write
6d528ed9Sopenharmony_ci// because case mappings might change the number of characters, depend on
6d528ed9Sopenharmony_ci// context (combining accents), and require handling UTF-16. If you need
6d528ed9Sopenharmony_ci// proper Unicode support, use base::i18n::ToLower/FoldCase and then just
6d528ed9Sopenharmony_ci// use a normal operator== on the result.
6d528ed9Sopenharmony_citemplate <typename Char>
6d528ed9Sopenharmony_cistruct CaseInsensitiveCompareASCII {
6d528ed9Sopenharmony_ci public:
6d528ed9Sopenharmony_ci  bool operator()(Char x, Char y) const {
6d528ed9Sopenharmony_ci    return ToLowerASCII(x) == ToLowerASCII(y);
6d528ed9Sopenharmony_ci  }
6d528ed9Sopenharmony_ci};
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Like strcasecmp for case-insensitive ASCII characters only. Returns:
6d528ed9Sopenharmony_ci//   -1  (a < b)
6d528ed9Sopenharmony_ci//    0  (a == b)
6d528ed9Sopenharmony_ci//    1  (a > b)
6d528ed9Sopenharmony_ci// (unlike strcasecmp which can return values greater or less than 1/-1). For
6d528ed9Sopenharmony_ci// full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase
6d528ed9Sopenharmony_ci// and then just call the normal string operators on the result.
6d528ed9Sopenharmony_ciint CompareCaseInsensitiveASCII(std::string_view a, std::string_view b);
6d528ed9Sopenharmony_ciint CompareCaseInsensitiveASCII(std::u16string_view a, std::u16string_view b);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Equality for ASCII case-insensitive comparisons. For full Unicode support,
6d528ed9Sopenharmony_ci// use base::i18n::ToLower or base::i18h::FoldCase and then compare with either
6d528ed9Sopenharmony_ci// == or !=.
6d528ed9Sopenharmony_cibool EqualsCaseInsensitiveASCII(std::string_view a, std::string_view b);
6d528ed9Sopenharmony_cibool EqualsCaseInsensitiveASCII(std::u16string_view a, std::u16string_view b);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Contains the set of characters representing whitespace in the corresponding
6d528ed9Sopenharmony_ci// encoding. Null-terminated. The ASCII versions are the whitespaces as defined
6d528ed9Sopenharmony_ci// by HTML5, and don't include control characters.
6d528ed9Sopenharmony_ciextern const char16_t kWhitespaceUTF16[];  // Includes Unicode.
6d528ed9Sopenharmony_ciextern const char kWhitespaceASCII[];
6d528ed9Sopenharmony_ciextern const char16_t kWhitespaceASCIIAs16[];  // No unicode.
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Null-terminated string representing the UTF-8 byte order mark.
6d528ed9Sopenharmony_ciextern const char kUtf8ByteOrderMark[];
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Removes characters in |remove_chars| from anywhere in |input|.  Returns true
6d528ed9Sopenharmony_ci// if any characters were removed.  |remove_chars| must be null-terminated.
6d528ed9Sopenharmony_ci// NOTE: Safe to use the same variable for both |input| and |output|.
6d528ed9Sopenharmony_cibool RemoveChars(const std::u16string& input,
6d528ed9Sopenharmony_ci                 std::u16string_view remove_chars,
6d528ed9Sopenharmony_ci                 std::u16string* output);
6d528ed9Sopenharmony_cibool RemoveChars(const std::string& input,
6d528ed9Sopenharmony_ci                 std::string_view remove_chars,
6d528ed9Sopenharmony_ci                 std::string* output);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Replaces characters in |replace_chars| from anywhere in |input| with
6d528ed9Sopenharmony_ci// |replace_with|.  Each character in |replace_chars| will be replaced with
6d528ed9Sopenharmony_ci// the |replace_with| string.  Returns true if any characters were replaced.
6d528ed9Sopenharmony_ci// |replace_chars| must be null-terminated.
6d528ed9Sopenharmony_ci// NOTE: Safe to use the same variable for both |input| and |output|.
6d528ed9Sopenharmony_cibool ReplaceChars(const std::u16string& input,
6d528ed9Sopenharmony_ci                  std::u16string_view replace_chars,
6d528ed9Sopenharmony_ci                  const std::u16string& replace_with,
6d528ed9Sopenharmony_ci                  std::u16string* output);
6d528ed9Sopenharmony_cibool ReplaceChars(const std::string& input,
6d528ed9Sopenharmony_ci                  std::string_view replace_chars,
6d528ed9Sopenharmony_ci                  const std::string& replace_with,
6d528ed9Sopenharmony_ci                  std::string* output);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_cienum TrimPositions {
6d528ed9Sopenharmony_ci  TRIM_NONE = 0,
6d528ed9Sopenharmony_ci  TRIM_LEADING = 1 << 0,
6d528ed9Sopenharmony_ci  TRIM_TRAILING = 1 << 1,
6d528ed9Sopenharmony_ci  TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,
6d528ed9Sopenharmony_ci};
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Removes characters in |trim_chars| from the beginning and end of |input|.
6d528ed9Sopenharmony_ci// The 8-bit version only works on 8-bit characters, not UTF-8. Returns true if
6d528ed9Sopenharmony_ci// any characters were removed.
6d528ed9Sopenharmony_ci//
6d528ed9Sopenharmony_ci// It is safe to use the same variable for both |input| and |output| (this is
6d528ed9Sopenharmony_ci// the normal usage to trim in-place).
6d528ed9Sopenharmony_cibool TrimString(const std::u16string& input,
6d528ed9Sopenharmony_ci                std::u16string_view trim_chars,
6d528ed9Sopenharmony_ci                std::u16string* output);
6d528ed9Sopenharmony_cibool TrimString(const std::string& input,
6d528ed9Sopenharmony_ci                std::string_view trim_chars,
6d528ed9Sopenharmony_ci                std::string* output);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// std::string_view versions of the above. The returned pieces refer to the
6d528ed9Sopenharmony_ci// original buffer.
6d528ed9Sopenharmony_cistd::u16string_view TrimString(std::u16string_view input,
6d528ed9Sopenharmony_ci                               std::u16string_view trim_chars,
6d528ed9Sopenharmony_ci                               TrimPositions positions);
6d528ed9Sopenharmony_cistd::string_view TrimString(std::string_view input,
6d528ed9Sopenharmony_ci                            std::string_view trim_chars,
6d528ed9Sopenharmony_ci                            TrimPositions positions);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Truncates a string to the nearest UTF-8 character that will leave
6d528ed9Sopenharmony_ci// the string less than or equal to the specified byte size.
6d528ed9Sopenharmony_civoid TruncateUTF8ToByteSize(const std::string& input,
6d528ed9Sopenharmony_ci                            const size_t byte_size,
6d528ed9Sopenharmony_ci                            std::string* output);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Trims any whitespace from either end of the input string.
6d528ed9Sopenharmony_ci//
6d528ed9Sopenharmony_ci// The std::string_view versions return a substring referencing the input
6d528ed9Sopenharmony_ci// buffer. The ASCII versions look only for ASCII whitespace.
6d528ed9Sopenharmony_ci//
6d528ed9Sopenharmony_ci// The std::string versions return where whitespace was found.
6d528ed9Sopenharmony_ci// NOTE: Safe to use the same variable for both input and output.
6d528ed9Sopenharmony_ciTrimPositions TrimWhitespace(const std::u16string& input,
6d528ed9Sopenharmony_ci                             TrimPositions positions,
6d528ed9Sopenharmony_ci                             std::u16string* output);
6d528ed9Sopenharmony_cistd::u16string_view TrimWhitespace(std::u16string_view input,
6d528ed9Sopenharmony_ci                                   TrimPositions positions);
6d528ed9Sopenharmony_ciTrimPositions TrimWhitespaceASCII(const std::string& input,
6d528ed9Sopenharmony_ci                                  TrimPositions positions,
6d528ed9Sopenharmony_ci                                  std::string* output);
6d528ed9Sopenharmony_cistd::string_view TrimWhitespaceASCII(std::string_view input,
6d528ed9Sopenharmony_ci                                     TrimPositions positions);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Searches for CR or LF characters.  Removes all contiguous whitespace
6d528ed9Sopenharmony_ci// strings that contain them.  This is useful when trying to deal with text
6d528ed9Sopenharmony_ci// copied from terminals.
6d528ed9Sopenharmony_ci// Returns |text|, with the following three transformations:
6d528ed9Sopenharmony_ci// (1) Leading and trailing whitespace is trimmed.
6d528ed9Sopenharmony_ci// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace
6d528ed9Sopenharmony_ci//     sequences containing a CR or LF are trimmed.
6d528ed9Sopenharmony_ci// (3) All other whitespace sequences are converted to single spaces.
6d528ed9Sopenharmony_cistd::u16string CollapseWhitespace(const std::u16string& text,
6d528ed9Sopenharmony_ci                                  bool trim_sequences_with_line_breaks);
6d528ed9Sopenharmony_cistd::string CollapseWhitespaceASCII(const std::string& text,
6d528ed9Sopenharmony_ci                                    bool trim_sequences_with_line_breaks);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Returns true if |input| is empty or contains only characters found in
6d528ed9Sopenharmony_ci// |characters|.
6d528ed9Sopenharmony_cibool ContainsOnlyChars(std::string_view input, std::string_view characters);
6d528ed9Sopenharmony_cibool ContainsOnlyChars(std::u16string_view input,
6d528ed9Sopenharmony_ci                       std::u16string_view characters);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Returns true if the specified string matches the criteria. How can a wide
6d528ed9Sopenharmony_ci// string be 8-bit or UTF8? It contains only characters that are < 256 (in the
6d528ed9Sopenharmony_ci// first case) or characters that use only 8-bits and whose 8-bit
6d528ed9Sopenharmony_ci// representation looks like a UTF-8 string (the second case).
6d528ed9Sopenharmony_ci//
6d528ed9Sopenharmony_ci// Note that IsStringUTF8 checks not only if the input is structurally
6d528ed9Sopenharmony_ci// valid but also if it doesn't contain any non-character codepoint
6d528ed9Sopenharmony_ci// (e.g. U+FFFE). It's done on purpose because all the existing callers want
6d528ed9Sopenharmony_ci// to have the maximum 'discriminating' power from other encodings. If
6d528ed9Sopenharmony_ci// there's a use case for just checking the structural validity, we have to
6d528ed9Sopenharmony_ci// add a new function for that.
6d528ed9Sopenharmony_ci//
6d528ed9Sopenharmony_ci// IsStringASCII assumes the input is likely all ASCII, and does not leave early
6d528ed9Sopenharmony_ci// if it is not the case.
6d528ed9Sopenharmony_cibool IsStringUTF8(std::string_view str);
6d528ed9Sopenharmony_cibool IsStringASCII(std::string_view str);
6d528ed9Sopenharmony_cibool IsStringASCII(std::u16string_view str);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Compare the lower-case form of the given string against the given
6d528ed9Sopenharmony_ci// previously-lower-cased ASCII string (typically a constant).
6d528ed9Sopenharmony_cibool LowerCaseEqualsASCII(std::string_view str,
6d528ed9Sopenharmony_ci                          std::string_view lowecase_ascii);
6d528ed9Sopenharmony_cibool LowerCaseEqualsASCII(std::u16string_view str,
6d528ed9Sopenharmony_ci                          std::string_view lowecase_ascii);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Performs a case-sensitive string compare of the given 16-bit string against
6d528ed9Sopenharmony_ci// the given 8-bit ASCII string (typically a constant). The behavior is
6d528ed9Sopenharmony_ci// undefined if the |ascii| string is not ASCII.
6d528ed9Sopenharmony_cibool EqualsASCII(std::u16string_view str, std::string_view ascii);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// starts_with/ends_with for ASCII case-insensitive comparisons.
6d528ed9Sopenharmony_ci// If you need to do Unicode-aware case-insensitive StartsWith/EndsWith, it's
6d528ed9Sopenharmony_ci// best to call base::i18n::ToLower() or base::i18n::FoldCase() (see
6d528ed9Sopenharmony_ci// base/i18n/case_conversion.h for usage advice) on the arguments, and then use
6d528ed9Sopenharmony_ci// the results to a case-sensitive comparison.
6d528ed9Sopenharmony_cibool StartsWithCaseInsensitiveASCII(std::string_view str,
6d528ed9Sopenharmony_ci                                    std::string_view search_for);
6d528ed9Sopenharmony_cibool StartsWithCaseInsensitiveASCII(std::u16string_view str,
6d528ed9Sopenharmony_ci                                    std::u16string_view search_for);
6d528ed9Sopenharmony_cibool EndsWithCaseInsensitiveASCII(std::string_view str,
6d528ed9Sopenharmony_ci                                  std::string_view search_for);
6d528ed9Sopenharmony_cibool EndsWithCaseInsensitiveASCII(std::u16string_view str,
6d528ed9Sopenharmony_ci                                  std::u16string_view search_for);
6d528ed9Sopenharmony_cibool starts_with(std::string_view str1, std::string_view str2);
6d528ed9Sopenharmony_cibool ends_with(std::string_view str1, std::string_view str2);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Determines the type of ASCII character, independent of locale (the C
6d528ed9Sopenharmony_ci// library versions will change based on locale).
6d528ed9Sopenharmony_citemplate <typename Char>
6d528ed9Sopenharmony_ciinline bool IsAsciiWhitespace(Char c) {
6d528ed9Sopenharmony_ci  return c == ' ' || c == '\r' || c == '\n' || c == '\t';
6d528ed9Sopenharmony_ci}
6d528ed9Sopenharmony_citemplate <typename Char>
6d528ed9Sopenharmony_ciinline bool IsAsciiAlpha(Char c) {
6d528ed9Sopenharmony_ci  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
6d528ed9Sopenharmony_ci}
6d528ed9Sopenharmony_citemplate <typename Char>
6d528ed9Sopenharmony_ciinline bool IsAsciiUpper(Char c) {
6d528ed9Sopenharmony_ci  return c >= 'A' && c <= 'Z';
6d528ed9Sopenharmony_ci}
6d528ed9Sopenharmony_citemplate <typename Char>
6d528ed9Sopenharmony_ciinline bool IsAsciiLower(Char c) {
6d528ed9Sopenharmony_ci  return c >= 'a' && c <= 'z';
6d528ed9Sopenharmony_ci}
6d528ed9Sopenharmony_citemplate <typename Char>
6d528ed9Sopenharmony_ciinline bool IsAsciiDigit(Char c) {
6d528ed9Sopenharmony_ci  return c >= '0' && c <= '9';
6d528ed9Sopenharmony_ci}
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_citemplate <typename Char>
6d528ed9Sopenharmony_ciinline bool IsHexDigit(Char c) {
6d528ed9Sopenharmony_ci  return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') ||
6d528ed9Sopenharmony_ci         (c >= 'a' && c <= 'f');
6d528ed9Sopenharmony_ci}
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Returns the integer corresponding to the given hex character. For example:
6d528ed9Sopenharmony_ci//    '4' -> 4
6d528ed9Sopenharmony_ci//    'a' -> 10
6d528ed9Sopenharmony_ci//    'B' -> 11
6d528ed9Sopenharmony_ci// Assumes the input is a valid hex character. DCHECKs in debug builds if not.
6d528ed9Sopenharmony_cichar HexDigitToInt(char16_t c);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Returns true if it's a Unicode whitespace character.
6d528ed9Sopenharmony_cibool IsUnicodeWhitespace(char16_t c);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Return a byte string in human-readable format with a unit suffix. Not
6d528ed9Sopenharmony_ci// appropriate for use in any UI; use of FormatBytes and friends in ui/base is
6d528ed9Sopenharmony_ci// highly recommended instead. TODO(avi): Figure out how to get callers to use
6d528ed9Sopenharmony_ci// FormatBytes instead; remove this.
6d528ed9Sopenharmony_cistd::u16string FormatBytesUnlocalized(int64_t bytes);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Starting at |start_offset| (usually 0), replace the first instance of
6d528ed9Sopenharmony_ci// |find_this| with |replace_with|.
6d528ed9Sopenharmony_civoid ReplaceFirstSubstringAfterOffset(std::u16string* str,
6d528ed9Sopenharmony_ci                                      size_t start_offset,
6d528ed9Sopenharmony_ci                                      std::u16string_view find_this,
6d528ed9Sopenharmony_ci                                      std::u16string_view replace_with);
6d528ed9Sopenharmony_civoid ReplaceFirstSubstringAfterOffset(std::string* str,
6d528ed9Sopenharmony_ci                                      size_t start_offset,
6d528ed9Sopenharmony_ci                                      std::string_view find_this,
6d528ed9Sopenharmony_ci                                      std::string_view replace_with);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Starting at |start_offset| (usually 0), look through |str| and replace all
6d528ed9Sopenharmony_ci// instances of |find_this| with |replace_with|.
6d528ed9Sopenharmony_ci//
6d528ed9Sopenharmony_ci// This does entire substrings; use std::replace in <algorithm> for single
6d528ed9Sopenharmony_ci// characters, for example:
6d528ed9Sopenharmony_ci//   std::replace(str.begin(), str.end(), 'a', 'b');
6d528ed9Sopenharmony_civoid ReplaceSubstringsAfterOffset(std::u16string* str,
6d528ed9Sopenharmony_ci                                  size_t start_offset,
6d528ed9Sopenharmony_ci                                  std::u16string_view find_this,
6d528ed9Sopenharmony_ci                                  std::u16string_view replace_with);
6d528ed9Sopenharmony_civoid ReplaceSubstringsAfterOffset(std::string* str,
6d528ed9Sopenharmony_ci                                  size_t start_offset,
6d528ed9Sopenharmony_ci                                  std::string_view find_this,
6d528ed9Sopenharmony_ci                                  std::string_view replace_with);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Reserves enough memory in |str| to accommodate |length_with_null| characters,
6d528ed9Sopenharmony_ci// sets the size of |str| to |length_with_null - 1| characters, and returns a
6d528ed9Sopenharmony_ci// pointer to the underlying contiguous array of characters.  This is typically
6d528ed9Sopenharmony_ci// used when calling a function that writes results into a character array, but
6d528ed9Sopenharmony_ci// the caller wants the data to be managed by a string-like object.  It is
6d528ed9Sopenharmony_ci// convenient in that is can be used inline in the call, and fast in that it
6d528ed9Sopenharmony_ci// avoids copying the results of the call from a char* into a string.
6d528ed9Sopenharmony_ci//
6d528ed9Sopenharmony_ci// |length_with_null| must be at least 2, since otherwise the underlying string
6d528ed9Sopenharmony_ci// would have size 0, and trying to access &((*str)[0]) in that case can result
6d528ed9Sopenharmony_ci// in a number of problems.
6d528ed9Sopenharmony_ci//
6d528ed9Sopenharmony_ci// Internally, this takes linear time because the resize() call 0-fills the
6d528ed9Sopenharmony_ci// underlying array for potentially all
6d528ed9Sopenharmony_ci// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes.  Ideally we
6d528ed9Sopenharmony_ci// could avoid this aspect of the resize() call, as we expect the caller to
6d528ed9Sopenharmony_ci// immediately write over this memory, but there is no other way to set the size
6d528ed9Sopenharmony_ci// of the string, and not doing that will mean people who access |str| rather
6d528ed9Sopenharmony_ci// than str.c_str() will get back a string of whatever size |str| had on entry
6d528ed9Sopenharmony_ci// to this function (probably 0).
6d528ed9Sopenharmony_cichar* WriteInto(std::string* str, size_t length_with_null);
6d528ed9Sopenharmony_cichar16_t* WriteInto(std::u16string* str, size_t length_with_null);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Does the opposite of SplitString()/SplitStringPiece(). Joins a vector or list
6d528ed9Sopenharmony_ci// of strings into a single string, inserting |separator| (which may be empty)
6d528ed9Sopenharmony_ci// in between all elements.
6d528ed9Sopenharmony_ci//
6d528ed9Sopenharmony_ci// If possible, callers should build a vector of std::string_views and use the
6d528ed9Sopenharmony_ci// std::string_view variant, so that they do not create unnecessary copies of
6d528ed9Sopenharmony_ci// strings. For example, instead of using SplitString, modifying the vector,
6d528ed9Sopenharmony_ci// then using JoinString, use SplitStringPiece followed by JoinString so that no
6d528ed9Sopenharmony_ci// copies of those strings are created until the final join operation.
6d528ed9Sopenharmony_ci//
6d528ed9Sopenharmony_ci// Use StrCat (in base/strings/strcat.h) if you don't need a separator.
6d528ed9Sopenharmony_cistd::string JoinString(const std::vector<std::string>& parts,
6d528ed9Sopenharmony_ci                       std::string_view separator);
6d528ed9Sopenharmony_cistd::u16string JoinString(const std::vector<std::u16string>& parts,
6d528ed9Sopenharmony_ci                          std::u16string_view separator);
6d528ed9Sopenharmony_cistd::string JoinString(const std::vector<std::string_view>& parts,
6d528ed9Sopenharmony_ci                       std::string_view separator);
6d528ed9Sopenharmony_cistd::u16string JoinString(const std::vector<std::u16string_view>& parts,
6d528ed9Sopenharmony_ci                          std::u16string_view separator);
6d528ed9Sopenharmony_ci// Explicit initializer_list overloads are required to break ambiguity when used
6d528ed9Sopenharmony_ci// with a literal initializer list (otherwise the compiler would not be able to
6d528ed9Sopenharmony_ci// decide between the string and std::string_view overloads).
6d528ed9Sopenharmony_cistd::string JoinString(std::initializer_list<std::string_view> parts,
6d528ed9Sopenharmony_ci                       std::string_view separator);
6d528ed9Sopenharmony_cistd::u16string JoinString(std::initializer_list<std::u16string_view> parts,
6d528ed9Sopenharmony_ci                          std::u16string_view separator);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Replace $1-$2-$3..$9 in the format string with values from |subst|.
6d528ed9Sopenharmony_ci// Additionally, any number of consecutive '$' characters is replaced by that
6d528ed9Sopenharmony_ci// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be
6d528ed9Sopenharmony_ci// NULL. This only allows you to use up to nine replacements.
6d528ed9Sopenharmony_cistd::u16string ReplaceStringPlaceholders(
6d528ed9Sopenharmony_ci    const std::u16string& format_string,
6d528ed9Sopenharmony_ci    const std::vector<std::u16string>& subst,
6d528ed9Sopenharmony_ci    std::vector<size_t>* offsets);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_cistd::string ReplaceStringPlaceholders(std::string_view format_string,
6d528ed9Sopenharmony_ci                                      const std::vector<std::string>& subst,
6d528ed9Sopenharmony_ci                                      std::vector<size_t>* offsets);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL.
6d528ed9Sopenharmony_cistd::u16string ReplaceStringPlaceholders(const std::u16string& format_string,
6d528ed9Sopenharmony_ci                                         const std::u16string& a,
6d528ed9Sopenharmony_ci                                         size_t* offset);
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci}  // namespace base
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci#if defined(OS_WIN)
6d528ed9Sopenharmony_ci#include "base/strings/string_util_win.h"
6d528ed9Sopenharmony_ci#elif defined(OS_POSIX) || defined(OS_FUCHSIA)
6d528ed9Sopenharmony_ci#include "base/strings/string_util_posix.h"
6d528ed9Sopenharmony_ci#else
6d528ed9Sopenharmony_ci#error Define string operations appropriately for your platform
6d528ed9Sopenharmony_ci#endif
6d528ed9Sopenharmony_ci
6d528ed9Sopenharmony_ci#endif  // BASE_STRINGS_STRING_UTIL_H_