1// Copyright (C) 2011 The Libphonenumber Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Author: Philippe Liard
16
17#ifndef I18N_PHONENUMBERS_UNICODESTRING_H_
18#define I18N_PHONENUMBERS_UNICODESTRING_H_
19
20#include "phonenumbers/utf/unicodetext.h"
21
22#include <cstring>
23#include <limits>
24
25namespace i18n {
26namespace phonenumbers {
27
28// This class supports the minimal subset of icu::UnicodeString needed by
29// AsYouTypeFormatter in order to let the libphonenumber not depend on ICU
30// which is not available by default on some systems, such as iOS.
31class UnicodeString {
32 public:
33  UnicodeString() : cached_index_(-1) {}
34
35  // Constructs a new unicode string copying the provided C string.
36  explicit UnicodeString(const char* utf8)
37      : text_(UTF8ToUnicodeText(utf8, static_cast<int>(std::strlen(utf8)))),
38        cached_index_(-1) {}
39
40  // Constructs a new unicode string containing the provided codepoint.
41  explicit UnicodeString(char32 codepoint) : cached_index_(-1) {
42    append(codepoint);
43  }
44
45  UnicodeString(const UnicodeString& src)
46      : text_(src.text_), cached_index_(-1) {}
47
48  UnicodeString& operator=(const UnicodeString& src);
49
50  bool operator==(const UnicodeString& rhs) const;
51
52  void append(const UnicodeString& unicode_string);
53
54  inline void append(char32 codepoint) {
55    invalidateCachedIndex();
56    text_.push_back(codepoint);
57  }
58
59  typedef UnicodeText::const_iterator const_iterator;
60
61  inline const_iterator begin() const {
62    return text_.begin();
63  }
64
65  inline const_iterator end() const {
66    return text_.end();
67  }
68
69  // Returns the index of the provided codepoint or -1 if not found.
70  int indexOf(char32 codepoint) const;
71
72  // Returns the number of codepoints contained in the unicode string.
73  inline int length() const {
74    return text_.size();
75  }
76
77  // Clears the unicode string.
78  inline void remove() {
79    invalidateCachedIndex();
80    text_.clear();
81  }
82
83  // Replaces the substring located at [ start, start + length - 1 ] with the
84  // provided unicode string.
85  void replace(int start, int length, const UnicodeString& src);
86
87  void setCharAt(int pos, char32 c);
88
89  // Copies the provided C string.
90  inline void setTo(const char* s, size_t len) {
91    invalidateCachedIndex();
92    text_.CopyUTF8(s, static_cast<int>(len));
93  }
94
95  // Was this UnicodeString created from valid UTF-8?
96  bool UTF8WasValid() const { return text_.UTF8WasValid(); }
97
98  // Returns the substring located at [ start, start + length - 1 ] without
99  // copying the underlying C string. If one of the provided parameters is out
100  // of range, the function returns an empty unicode string.
101  UnicodeString tempSubString(
102      int start,
103      int length = std::numeric_limits<int>::max()) const;
104
105  inline void toUTF8String(string& out) const {
106    out = UnicodeTextToUTF8(text_);
107  }
108
109  char32 operator[](int index) const;
110
111 private:
112  UnicodeText text_;
113
114  // As UnicodeText doesn't provide random access, an operator[] implementation
115  // would naively iterate from the beginning of the string to the supplied
116  // index which would be inefficient.
117  // As operator[] is very likely to be called in a loop with consecutive
118  // indexes, we save the corresponding iterator so we can reuse it the next
119  // time it is called.
120
121  // The following function which invalidates the cached index corresponding to
122  // the iterator position must be called every time the unicode string is
123  // modified (i.e. in all the non-const methods).
124  inline void invalidateCachedIndex() {
125    cached_index_ = -1;
126  }
127
128  // Iterator corresponding to the cached index below, used by operator[].
129  mutable UnicodeText::const_iterator cached_it_;
130  mutable int cached_index_;
131};
132
133}  // namespace phonenumbers
134}  // namespace i18n
135
136#endif  // I18N_PHONENUMBERS_UNICODESTRING_H_
137