1// Copyright (C) 2011 The Libphonenumber Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Author: George Yakovlev 16// Philippe Liard 17// 18// RegExp adapter to allow a pluggable regexp engine. It has been introduced 19// during the integration of the open-source version of this library into 20// Chromium to be able to use the ICU Regex engine instead of RE2, which is not 21// officially supported on Windows. 22// Since RE2 was initially used in this library, the interface of this adapter 23// is very close to the subset of the RE2 API used in phonenumberutil.cc. 24 25#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ 26#define I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ 27 28#include <cstddef> 29#include <string> 30 31namespace i18n { 32namespace phonenumbers { 33 34using std::string; 35 36// RegExpInput is the interface that abstracts the input that feeds the 37// Consume() method of RegExp which may differ depending on its various 38// implementations (StringPiece for RE2, UnicodeString for ICU Regex). 39class RegExpInput { 40 public: 41 virtual ~RegExpInput() {} 42 43 // Converts to a C++ string. 44 virtual string ToString() const = 0; 45}; 46 47// The regular expression abstract class. It supports only functions used in 48// phonenumberutil.cc. Consume(), Match() and Replace() methods must be 49// implemented. 50class RegExp { 51 public: 52 virtual ~RegExp() {} 53 54 // Matches string to regular expression, returns true if expression was 55 // matched, false otherwise, advances position in the match. 56 // input_string - string to be searched. 57 // anchor_at_start - if true, match would be successful only if it appears at 58 // the beginning of the tested region of the string. 59 // matched_string1..6 - string extracted from the match in sequential order. 60 // Can be NULL. 61 virtual bool Consume(RegExpInput* input_string, 62 bool anchor_at_start, 63 string* matched_string1, 64 string* matched_string2, 65 string* matched_string3, 66 string* matched_string4, 67 string* matched_string5, 68 string* matched_string6) const = 0; 69 70 // Helper methods calling the Consume method that assume the match must start 71 // at the beginning. 72 inline bool Consume(RegExpInput* input_string, string* matched_string1, 73 string* matched_string2, 74 string* matched_string3, 75 string* matched_string4, 76 string* matched_string5, 77 string* matched_string6) const { 78 return Consume(input_string, true, matched_string1, matched_string2, 79 matched_string3, matched_string4, matched_string5, 80 matched_string6); 81 } 82 83 inline bool Consume(RegExpInput* input_string, string* matched_string1, 84 string* matched_string2, 85 string* matched_string3, 86 string* matched_string4, 87 string* matched_string5) const { 88 return Consume(input_string, true, matched_string1, matched_string2, 89 matched_string3, matched_string4, matched_string5, NULL); 90 } 91 92 inline bool Consume(RegExpInput* input_string, string* matched_string1, 93 string* matched_string2, 94 string* matched_string3, 95 string* matched_string4) const { 96 return Consume(input_string, true, matched_string1, matched_string2, 97 matched_string3, matched_string4, NULL, NULL); 98 } 99 100 101 // Helper methods calling the Consume method that assume the match must start 102 // at the beginning. 103 inline bool Consume(RegExpInput* input_string, 104 string* matched_string1, 105 string* matched_string2, 106 string* matched_string3) const { 107 return Consume(input_string, true, matched_string1, matched_string2, 108 matched_string3, NULL, NULL, NULL); 109 } 110 111 inline bool Consume(RegExpInput* input_string, 112 string* matched_string1, 113 string* matched_string2) const { 114 return Consume(input_string, true, matched_string1, matched_string2, NULL, 115 NULL, NULL, NULL); 116 } 117 118 inline bool Consume(RegExpInput* input_string, string* matched_string) const { 119 return Consume(input_string, true, matched_string, NULL, NULL, NULL, NULL, 120 NULL); 121 } 122 123 inline bool Consume(RegExpInput* input_string) const { 124 return Consume(input_string, true, NULL, NULL, NULL, NULL, NULL, NULL); 125 } 126 127 // Helper method calling the Consume method that assumes the match can start 128 // at any place in the string. 129 inline bool FindAndConsume(RegExpInput* input_string, 130 string* matched_string) const { 131 return Consume(input_string, false, matched_string, NULL, NULL, NULL, NULL, 132 NULL); 133 } 134 135 // Matches string to regular expression, returns true if the expression was 136 // matched, false otherwise. 137 // input_string - string to be searched. 138 // full_match - if true, match would be successful only if it matches the 139 // complete string. 140 // matched_string - the string extracted from the match. Can be NULL. 141 virtual bool Match(const string& input_string, 142 bool full_match, 143 string* matched_string) const = 0; 144 145 // Helper methods calling the Match method with the right arguments. 146 inline bool PartialMatch(const string& input_string, 147 string* matched_string) const { 148 return Match(input_string, false, matched_string); 149 } 150 151 inline bool PartialMatch(const string& input_string) const { 152 return Match(input_string, false, NULL); 153 } 154 155 inline bool FullMatch(const string& input_string, 156 string* matched_string) const { 157 return Match(input_string, true, matched_string); 158 } 159 160 inline bool FullMatch(const string& input_string) const { 161 return Match(input_string, true, NULL); 162 } 163 164 // Replaces match(es) in 'string_to_process'. If 'global' is true, 165 // replaces all the matches, otherwise only the first match. 166 // replacement_string - text the matches are replaced with. The groups in the 167 // replacement string are referenced with the $[0-9] notation. 168 // Returns true if the pattern matches and a replacement occurs, false 169 // otherwise. 170 virtual bool Replace(string* string_to_process, 171 bool global, 172 const string& replacement_string) const = 0; 173 174 // Helper methods calling the Replace method with the right arguments. 175 inline bool Replace(string* string_to_process, 176 const string& replacement_string) const { 177 return Replace(string_to_process, false, replacement_string); 178 } 179 180 inline bool GlobalReplace(string* string_to_process, 181 const string& replacement_string) const { 182 return Replace(string_to_process, true, replacement_string); 183 } 184}; 185 186// Abstract factory class that lets its subclasses instantiate the classes 187// implementing RegExp and RegExpInput. 188class AbstractRegExpFactory { 189 public: 190 virtual ~AbstractRegExpFactory() {} 191 192 // Creates a new instance of RegExpInput. The deletion of the returned 193 // instance is under the responsibility of the caller. 194 virtual RegExpInput* CreateInput(const string& utf8_input) const = 0; 195 196 // Creates a new instance of RegExp. The deletion of the returned instance is 197 // under the responsibility of the caller. 198 virtual RegExp* CreateRegExp(const string& utf8_regexp) const = 0; 199}; 200 201} // namespace phonenumbers 202} // namespace i18n 203 204#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ 205