1// Copyright (C) 2011 The Libphonenumber Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Author: Philippe Liard 16 17#ifndef I18N_PHONENUMBERS_UNICODESTRING_H_ 18#define I18N_PHONENUMBERS_UNICODESTRING_H_ 19 20#include "phonenumbers/utf/unicodetext.h" 21 22#include <cstring> 23#include <limits> 24 25namespace i18n { 26namespace phonenumbers { 27 28// This class supports the minimal subset of icu::UnicodeString needed by 29// AsYouTypeFormatter in order to let the libphonenumber not depend on ICU 30// which is not available by default on some systems, such as iOS. 31class UnicodeString { 32 public: 33 UnicodeString() : cached_index_(-1) {} 34 35 // Constructs a new unicode string copying the provided C string. 36 explicit UnicodeString(const char* utf8) 37 : text_(UTF8ToUnicodeText(utf8, static_cast<int>(std::strlen(utf8)))), 38 cached_index_(-1) {} 39 40 // Constructs a new unicode string containing the provided codepoint. 41 explicit UnicodeString(char32 codepoint) : cached_index_(-1) { 42 append(codepoint); 43 } 44 45 UnicodeString(const UnicodeString& src) 46 : text_(src.text_), cached_index_(-1) {} 47 48 UnicodeString& operator=(const UnicodeString& src); 49 50 bool operator==(const UnicodeString& rhs) const; 51 52 void append(const UnicodeString& unicode_string); 53 54 inline void append(char32 codepoint) { 55 invalidateCachedIndex(); 56 text_.push_back(codepoint); 57 } 58 59 typedef UnicodeText::const_iterator const_iterator; 60 61 inline const_iterator begin() const { 62 return text_.begin(); 63 } 64 65 inline const_iterator end() const { 66 return text_.end(); 67 } 68 69 // Returns the index of the provided codepoint or -1 if not found. 70 int indexOf(char32 codepoint) const; 71 72 // Returns the number of codepoints contained in the unicode string. 73 inline int length() const { 74 return text_.size(); 75 } 76 77 // Clears the unicode string. 78 inline void remove() { 79 invalidateCachedIndex(); 80 text_.clear(); 81 } 82 83 // Replaces the substring located at [ start, start + length - 1 ] with the 84 // provided unicode string. 85 void replace(int start, int length, const UnicodeString& src); 86 87 void setCharAt(int pos, char32 c); 88 89 // Copies the provided C string. 90 inline void setTo(const char* s, size_t len) { 91 invalidateCachedIndex(); 92 text_.CopyUTF8(s, static_cast<int>(len)); 93 } 94 95 // Was this UnicodeString created from valid UTF-8? 96 bool UTF8WasValid() const { return text_.UTF8WasValid(); } 97 98 // Returns the substring located at [ start, start + length - 1 ] without 99 // copying the underlying C string. If one of the provided parameters is out 100 // of range, the function returns an empty unicode string. 101 UnicodeString tempSubString( 102 int start, 103 int length = std::numeric_limits<int>::max()) const; 104 105 inline void toUTF8String(string& out) const { 106 out = UnicodeTextToUTF8(text_); 107 } 108 109 char32 operator[](int index) const; 110 111 private: 112 UnicodeText text_; 113 114 // As UnicodeText doesn't provide random access, an operator[] implementation 115 // would naively iterate from the beginning of the string to the supplied 116 // index which would be inefficient. 117 // As operator[] is very likely to be called in a loop with consecutive 118 // indexes, we save the corresponding iterator so we can reuse it the next 119 // time it is called. 120 121 // The following function which invalidates the cached index corresponding to 122 // the iterator position must be called every time the unicode string is 123 // modified (i.e. in all the non-const methods). 124 inline void invalidateCachedIndex() { 125 cached_index_ = -1; 126 } 127 128 // Iterator corresponding to the cached index below, used by operator[]. 129 mutable UnicodeText::const_iterator cached_it_; 130 mutable int cached_index_; 131}; 132 133} // namespace phonenumbers 134} // namespace i18n 135 136#endif // I18N_PHONENUMBERS_UNICODESTRING_H_ 137