16d528ed9Sopenharmony_ci// Copyright (c) 2011 The Chromium Authors. All rights reserved.
26d528ed9Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
36d528ed9Sopenharmony_ci// found in the LICENSE file.
46d528ed9Sopenharmony_ci
56d528ed9Sopenharmony_ci#ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
66d528ed9Sopenharmony_ci#define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
76d528ed9Sopenharmony_ci
86d528ed9Sopenharmony_ci#include <stddef.h>
96d528ed9Sopenharmony_ci
106d528ed9Sopenharmony_ci#include <string>
116d528ed9Sopenharmony_ci#include <string_view>
126d528ed9Sopenharmony_ci#include <vector>
136d528ed9Sopenharmony_ci
146d528ed9Sopenharmony_cinamespace base {
156d528ed9Sopenharmony_ci
166d528ed9Sopenharmony_ci// A helper class and associated data structures to adjust offsets into a
176d528ed9Sopenharmony_ci// string in response to various adjustments one might do to that string
186d528ed9Sopenharmony_ci// (e.g., eliminating a range).  For details on offsets, see the comments by
196d528ed9Sopenharmony_ci// the AdjustOffsets() function below.
206d528ed9Sopenharmony_ciclass OffsetAdjuster {
216d528ed9Sopenharmony_ci public:
226d528ed9Sopenharmony_ci  struct Adjustment {
236d528ed9Sopenharmony_ci    Adjustment(size_t original_offset,
246d528ed9Sopenharmony_ci               size_t original_length,
256d528ed9Sopenharmony_ci               size_t output_length);
266d528ed9Sopenharmony_ci
276d528ed9Sopenharmony_ci    size_t original_offset;
286d528ed9Sopenharmony_ci    size_t original_length;
296d528ed9Sopenharmony_ci    size_t output_length;
306d528ed9Sopenharmony_ci  };
316d528ed9Sopenharmony_ci  typedef std::vector<Adjustment> Adjustments;
326d528ed9Sopenharmony_ci
336d528ed9Sopenharmony_ci  // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments
346d528ed9Sopenharmony_ci  // recorded in |adjustments|.  Adjusted offsets greater than |limit| will be
356d528ed9Sopenharmony_ci  // set to std::u16string::npos.
366d528ed9Sopenharmony_ci  //
376d528ed9Sopenharmony_ci  // Offsets represents insertion/selection points between characters: if |src|
386d528ed9Sopenharmony_ci  // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the
396d528ed9Sopenharmony_ci  // end of the string.  Valid input offsets range from 0 to |src_len|.  On
406d528ed9Sopenharmony_ci  // exit, each offset will have been modified to point at the same logical
416d528ed9Sopenharmony_ci  // position in the output string.  If an offset cannot be successfully
426d528ed9Sopenharmony_ci  // adjusted (e.g., because it points into the middle of a multibyte sequence),
436d528ed9Sopenharmony_ci  // it will be set to std::u16string::npos.
446d528ed9Sopenharmony_ci  static void AdjustOffsets(const Adjustments& adjustments,
456d528ed9Sopenharmony_ci                            std::vector<size_t>* offsets_for_adjustment,
466d528ed9Sopenharmony_ci                            size_t limit = std::u16string::npos);
476d528ed9Sopenharmony_ci
486d528ed9Sopenharmony_ci  // Adjusts the single |offset| to reflect the adjustments recorded in
496d528ed9Sopenharmony_ci  // |adjustments|.
506d528ed9Sopenharmony_ci  static void AdjustOffset(const Adjustments& adjustments,
516d528ed9Sopenharmony_ci                           size_t* offset,
526d528ed9Sopenharmony_ci                           size_t limit = std::u16string::npos);
536d528ed9Sopenharmony_ci
546d528ed9Sopenharmony_ci  // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse
556d528ed9Sopenharmony_ci  // of the adjustments recorded in |adjustments|.  In other words, the offsets
566d528ed9Sopenharmony_ci  // provided represent offsets into an adjusted string and the caller wants
576d528ed9Sopenharmony_ci  // to know the offsets they correspond to in the original string.  If an
586d528ed9Sopenharmony_ci  // offset cannot be successfully unadjusted (e.g., because it points into
596d528ed9Sopenharmony_ci  // the middle of a multibyte sequence), it will be set to
606d528ed9Sopenharmony_ci  // std::u16string::npos.
616d528ed9Sopenharmony_ci  static void UnadjustOffsets(const Adjustments& adjustments,
626d528ed9Sopenharmony_ci                              std::vector<size_t>* offsets_for_unadjustment);
636d528ed9Sopenharmony_ci
646d528ed9Sopenharmony_ci  // Adjusts the single |offset| to reflect the reverse of the adjustments
656d528ed9Sopenharmony_ci  // recorded in |adjustments|.
666d528ed9Sopenharmony_ci  static void UnadjustOffset(const Adjustments& adjustments, size_t* offset);
676d528ed9Sopenharmony_ci
686d528ed9Sopenharmony_ci  // Combines two sequential sets of adjustments, storing the combined revised
696d528ed9Sopenharmony_ci  // adjustments in |adjustments_on_adjusted_string|.  That is, suppose a
706d528ed9Sopenharmony_ci  // string was altered in some way, with the alterations recorded as
716d528ed9Sopenharmony_ci  // adjustments in |first_adjustments|.  Then suppose the resulting string is
726d528ed9Sopenharmony_ci  // further altered, with the alterations recorded as adjustments scored in
736d528ed9Sopenharmony_ci  // |adjustments_on_adjusted_string|, with the offsets recorded in these
746d528ed9Sopenharmony_ci  // adjustments being with respect to the intermediate string.  This function
756d528ed9Sopenharmony_ci  // combines the two sets of adjustments into one, storing the result in
766d528ed9Sopenharmony_ci  // |adjustments_on_adjusted_string|, whose offsets are correct with respect
776d528ed9Sopenharmony_ci  // to the original string.
786d528ed9Sopenharmony_ci  //
796d528ed9Sopenharmony_ci  // Assumes both parameters are sorted by increasing offset.
806d528ed9Sopenharmony_ci  //
816d528ed9Sopenharmony_ci  // WARNING: Only supports |first_adjustments| that involve collapsing ranges
826d528ed9Sopenharmony_ci  // of text, not expanding ranges.
836d528ed9Sopenharmony_ci  static void MergeSequentialAdjustments(
846d528ed9Sopenharmony_ci      const Adjustments& first_adjustments,
856d528ed9Sopenharmony_ci      Adjustments* adjustments_on_adjusted_string);
866d528ed9Sopenharmony_ci};
876d528ed9Sopenharmony_ci
886d528ed9Sopenharmony_ci// Like the conversions in utf_string_conversions.h, but also fills in an
896d528ed9Sopenharmony_ci// |adjustments| parameter that reflects the alterations done to the string.
906d528ed9Sopenharmony_ci// It may be NULL.
916d528ed9Sopenharmony_cibool UTF8ToUTF16WithAdjustments(const char* src,
926d528ed9Sopenharmony_ci                                size_t src_len,
936d528ed9Sopenharmony_ci                                std::u16string* output,
946d528ed9Sopenharmony_ci                                base::OffsetAdjuster::Adjustments* adjustments);
956d528ed9Sopenharmony_cistd::u16string UTF8ToUTF16WithAdjustments(
966d528ed9Sopenharmony_ci    std::string_view utf8,
976d528ed9Sopenharmony_ci    base::OffsetAdjuster::Adjustments* adjustments);
986d528ed9Sopenharmony_ci// As above, but instead internally examines the adjustments and applies them
996d528ed9Sopenharmony_ci// to |offsets_for_adjustment|.  Input offsets greater than the length of the
1006d528ed9Sopenharmony_ci// input string will be set to std::u16string::npos.  See comments by
1016d528ed9Sopenharmony_ci// AdjustOffsets().
1026d528ed9Sopenharmony_cistd::u16string UTF8ToUTF16AndAdjustOffsets(
1036d528ed9Sopenharmony_ci    std::string_view utf8,
1046d528ed9Sopenharmony_ci    std::vector<size_t>* offsets_for_adjustment);
1056d528ed9Sopenharmony_cistd::string UTF16ToUTF8AndAdjustOffsets(
1066d528ed9Sopenharmony_ci    std::u16string_view utf16,
1076d528ed9Sopenharmony_ci    std::vector<size_t>* offsets_for_adjustment);
1086d528ed9Sopenharmony_ci
1096d528ed9Sopenharmony_ci}  // namespace base
1106d528ed9Sopenharmony_ci
1116d528ed9Sopenharmony_ci#endif  // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
112