16d528ed9Sopenharmony_ci// Copyright (c) 2011 The Chromium Authors. All rights reserved. 26d528ed9Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 36d528ed9Sopenharmony_ci// found in the LICENSE file. 46d528ed9Sopenharmony_ci 56d528ed9Sopenharmony_ci#ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 66d528ed9Sopenharmony_ci#define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 76d528ed9Sopenharmony_ci 86d528ed9Sopenharmony_ci#include <stddef.h> 96d528ed9Sopenharmony_ci 106d528ed9Sopenharmony_ci#include <string> 116d528ed9Sopenharmony_ci#include <string_view> 126d528ed9Sopenharmony_ci#include <vector> 136d528ed9Sopenharmony_ci 146d528ed9Sopenharmony_cinamespace base { 156d528ed9Sopenharmony_ci 166d528ed9Sopenharmony_ci// A helper class and associated data structures to adjust offsets into a 176d528ed9Sopenharmony_ci// string in response to various adjustments one might do to that string 186d528ed9Sopenharmony_ci// (e.g., eliminating a range). For details on offsets, see the comments by 196d528ed9Sopenharmony_ci// the AdjustOffsets() function below. 206d528ed9Sopenharmony_ciclass OffsetAdjuster { 216d528ed9Sopenharmony_ci public: 226d528ed9Sopenharmony_ci struct Adjustment { 236d528ed9Sopenharmony_ci Adjustment(size_t original_offset, 246d528ed9Sopenharmony_ci size_t original_length, 256d528ed9Sopenharmony_ci size_t output_length); 266d528ed9Sopenharmony_ci 276d528ed9Sopenharmony_ci size_t original_offset; 286d528ed9Sopenharmony_ci size_t original_length; 296d528ed9Sopenharmony_ci size_t output_length; 306d528ed9Sopenharmony_ci }; 316d528ed9Sopenharmony_ci typedef std::vector<Adjustment> Adjustments; 326d528ed9Sopenharmony_ci 336d528ed9Sopenharmony_ci // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments 346d528ed9Sopenharmony_ci // recorded in |adjustments|. Adjusted offsets greater than |limit| will be 356d528ed9Sopenharmony_ci // set to std::u16string::npos. 366d528ed9Sopenharmony_ci // 376d528ed9Sopenharmony_ci // Offsets represents insertion/selection points between characters: if |src| 386d528ed9Sopenharmony_ci // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the 396d528ed9Sopenharmony_ci // end of the string. Valid input offsets range from 0 to |src_len|. On 406d528ed9Sopenharmony_ci // exit, each offset will have been modified to point at the same logical 416d528ed9Sopenharmony_ci // position in the output string. If an offset cannot be successfully 426d528ed9Sopenharmony_ci // adjusted (e.g., because it points into the middle of a multibyte sequence), 436d528ed9Sopenharmony_ci // it will be set to std::u16string::npos. 446d528ed9Sopenharmony_ci static void AdjustOffsets(const Adjustments& adjustments, 456d528ed9Sopenharmony_ci std::vector<size_t>* offsets_for_adjustment, 466d528ed9Sopenharmony_ci size_t limit = std::u16string::npos); 476d528ed9Sopenharmony_ci 486d528ed9Sopenharmony_ci // Adjusts the single |offset| to reflect the adjustments recorded in 496d528ed9Sopenharmony_ci // |adjustments|. 506d528ed9Sopenharmony_ci static void AdjustOffset(const Adjustments& adjustments, 516d528ed9Sopenharmony_ci size_t* offset, 526d528ed9Sopenharmony_ci size_t limit = std::u16string::npos); 536d528ed9Sopenharmony_ci 546d528ed9Sopenharmony_ci // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse 556d528ed9Sopenharmony_ci // of the adjustments recorded in |adjustments|. In other words, the offsets 566d528ed9Sopenharmony_ci // provided represent offsets into an adjusted string and the caller wants 576d528ed9Sopenharmony_ci // to know the offsets they correspond to in the original string. If an 586d528ed9Sopenharmony_ci // offset cannot be successfully unadjusted (e.g., because it points into 596d528ed9Sopenharmony_ci // the middle of a multibyte sequence), it will be set to 606d528ed9Sopenharmony_ci // std::u16string::npos. 616d528ed9Sopenharmony_ci static void UnadjustOffsets(const Adjustments& adjustments, 626d528ed9Sopenharmony_ci std::vector<size_t>* offsets_for_unadjustment); 636d528ed9Sopenharmony_ci 646d528ed9Sopenharmony_ci // Adjusts the single |offset| to reflect the reverse of the adjustments 656d528ed9Sopenharmony_ci // recorded in |adjustments|. 666d528ed9Sopenharmony_ci static void UnadjustOffset(const Adjustments& adjustments, size_t* offset); 676d528ed9Sopenharmony_ci 686d528ed9Sopenharmony_ci // Combines two sequential sets of adjustments, storing the combined revised 696d528ed9Sopenharmony_ci // adjustments in |adjustments_on_adjusted_string|. That is, suppose a 706d528ed9Sopenharmony_ci // string was altered in some way, with the alterations recorded as 716d528ed9Sopenharmony_ci // adjustments in |first_adjustments|. Then suppose the resulting string is 726d528ed9Sopenharmony_ci // further altered, with the alterations recorded as adjustments scored in 736d528ed9Sopenharmony_ci // |adjustments_on_adjusted_string|, with the offsets recorded in these 746d528ed9Sopenharmony_ci // adjustments being with respect to the intermediate string. This function 756d528ed9Sopenharmony_ci // combines the two sets of adjustments into one, storing the result in 766d528ed9Sopenharmony_ci // |adjustments_on_adjusted_string|, whose offsets are correct with respect 776d528ed9Sopenharmony_ci // to the original string. 786d528ed9Sopenharmony_ci // 796d528ed9Sopenharmony_ci // Assumes both parameters are sorted by increasing offset. 806d528ed9Sopenharmony_ci // 816d528ed9Sopenharmony_ci // WARNING: Only supports |first_adjustments| that involve collapsing ranges 826d528ed9Sopenharmony_ci // of text, not expanding ranges. 836d528ed9Sopenharmony_ci static void MergeSequentialAdjustments( 846d528ed9Sopenharmony_ci const Adjustments& first_adjustments, 856d528ed9Sopenharmony_ci Adjustments* adjustments_on_adjusted_string); 866d528ed9Sopenharmony_ci}; 876d528ed9Sopenharmony_ci 886d528ed9Sopenharmony_ci// Like the conversions in utf_string_conversions.h, but also fills in an 896d528ed9Sopenharmony_ci// |adjustments| parameter that reflects the alterations done to the string. 906d528ed9Sopenharmony_ci// It may be NULL. 916d528ed9Sopenharmony_cibool UTF8ToUTF16WithAdjustments(const char* src, 926d528ed9Sopenharmony_ci size_t src_len, 936d528ed9Sopenharmony_ci std::u16string* output, 946d528ed9Sopenharmony_ci base::OffsetAdjuster::Adjustments* adjustments); 956d528ed9Sopenharmony_cistd::u16string UTF8ToUTF16WithAdjustments( 966d528ed9Sopenharmony_ci std::string_view utf8, 976d528ed9Sopenharmony_ci base::OffsetAdjuster::Adjustments* adjustments); 986d528ed9Sopenharmony_ci// As above, but instead internally examines the adjustments and applies them 996d528ed9Sopenharmony_ci// to |offsets_for_adjustment|. Input offsets greater than the length of the 1006d528ed9Sopenharmony_ci// input string will be set to std::u16string::npos. See comments by 1016d528ed9Sopenharmony_ci// AdjustOffsets(). 1026d528ed9Sopenharmony_cistd::u16string UTF8ToUTF16AndAdjustOffsets( 1036d528ed9Sopenharmony_ci std::string_view utf8, 1046d528ed9Sopenharmony_ci std::vector<size_t>* offsets_for_adjustment); 1056d528ed9Sopenharmony_cistd::string UTF16ToUTF8AndAdjustOffsets( 1066d528ed9Sopenharmony_ci std::u16string_view utf16, 1076d528ed9Sopenharmony_ci std::vector<size_t>* offsets_for_adjustment); 1086d528ed9Sopenharmony_ci 1096d528ed9Sopenharmony_ci} // namespace base 1106d528ed9Sopenharmony_ci 1116d528ed9Sopenharmony_ci#endif // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 112