1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
6#define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
7
8#include <stddef.h>
9
10#include <string>
11#include <string_view>
12#include <vector>
13
14namespace base {
15
16// A helper class and associated data structures to adjust offsets into a
17// string in response to various adjustments one might do to that string
18// (e.g., eliminating a range).  For details on offsets, see the comments by
19// the AdjustOffsets() function below.
20class OffsetAdjuster {
21 public:
22  struct Adjustment {
23    Adjustment(size_t original_offset,
24               size_t original_length,
25               size_t output_length);
26
27    size_t original_offset;
28    size_t original_length;
29    size_t output_length;
30  };
31  typedef std::vector<Adjustment> Adjustments;
32
33  // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments
34  // recorded in |adjustments|.  Adjusted offsets greater than |limit| will be
35  // set to std::u16string::npos.
36  //
37  // Offsets represents insertion/selection points between characters: if |src|
38  // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the
39  // end of the string.  Valid input offsets range from 0 to |src_len|.  On
40  // exit, each offset will have been modified to point at the same logical
41  // position in the output string.  If an offset cannot be successfully
42  // adjusted (e.g., because it points into the middle of a multibyte sequence),
43  // it will be set to std::u16string::npos.
44  static void AdjustOffsets(const Adjustments& adjustments,
45                            std::vector<size_t>* offsets_for_adjustment,
46                            size_t limit = std::u16string::npos);
47
48  // Adjusts the single |offset| to reflect the adjustments recorded in
49  // |adjustments|.
50  static void AdjustOffset(const Adjustments& adjustments,
51                           size_t* offset,
52                           size_t limit = std::u16string::npos);
53
54  // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse
55  // of the adjustments recorded in |adjustments|.  In other words, the offsets
56  // provided represent offsets into an adjusted string and the caller wants
57  // to know the offsets they correspond to in the original string.  If an
58  // offset cannot be successfully unadjusted (e.g., because it points into
59  // the middle of a multibyte sequence), it will be set to
60  // std::u16string::npos.
61  static void UnadjustOffsets(const Adjustments& adjustments,
62                              std::vector<size_t>* offsets_for_unadjustment);
63
64  // Adjusts the single |offset| to reflect the reverse of the adjustments
65  // recorded in |adjustments|.
66  static void UnadjustOffset(const Adjustments& adjustments, size_t* offset);
67
68  // Combines two sequential sets of adjustments, storing the combined revised
69  // adjustments in |adjustments_on_adjusted_string|.  That is, suppose a
70  // string was altered in some way, with the alterations recorded as
71  // adjustments in |first_adjustments|.  Then suppose the resulting string is
72  // further altered, with the alterations recorded as adjustments scored in
73  // |adjustments_on_adjusted_string|, with the offsets recorded in these
74  // adjustments being with respect to the intermediate string.  This function
75  // combines the two sets of adjustments into one, storing the result in
76  // |adjustments_on_adjusted_string|, whose offsets are correct with respect
77  // to the original string.
78  //
79  // Assumes both parameters are sorted by increasing offset.
80  //
81  // WARNING: Only supports |first_adjustments| that involve collapsing ranges
82  // of text, not expanding ranges.
83  static void MergeSequentialAdjustments(
84      const Adjustments& first_adjustments,
85      Adjustments* adjustments_on_adjusted_string);
86};
87
88// Like the conversions in utf_string_conversions.h, but also fills in an
89// |adjustments| parameter that reflects the alterations done to the string.
90// It may be NULL.
91bool UTF8ToUTF16WithAdjustments(const char* src,
92                                size_t src_len,
93                                std::u16string* output,
94                                base::OffsetAdjuster::Adjustments* adjustments);
95std::u16string UTF8ToUTF16WithAdjustments(
96    std::string_view utf8,
97    base::OffsetAdjuster::Adjustments* adjustments);
98// As above, but instead internally examines the adjustments and applies them
99// to |offsets_for_adjustment|.  Input offsets greater than the length of the
100// input string will be set to std::u16string::npos.  See comments by
101// AdjustOffsets().
102std::u16string UTF8ToUTF16AndAdjustOffsets(
103    std::string_view utf8,
104    std::vector<size_t>* offsets_for_adjustment);
105std::string UTF16ToUTF8AndAdjustOffsets(
106    std::u16string_view utf16,
107    std::vector<size_t>* offsets_for_adjustment);
108
109}  // namespace base
110
111#endif  // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
112