1// Copyright (C) 2011 The Libphonenumber Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Author: George Yakovlev
16//         Philippe Liard
17//
18// RegExp adapter to allow a pluggable regexp engine. It has been introduced
19// during the integration of the open-source version of this library into
20// Chromium to be able to use the ICU Regex engine instead of RE2, which is not
21// officially supported on Windows.
22// Since RE2 was initially used in this library, the interface of this adapter
23// is very close to the subset of the RE2 API used in phonenumberutil.cc.
24
25#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
26#define I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
27
28#include <cstddef>
29#include <string>
30
31namespace i18n {
32namespace phonenumbers {
33
34using std::string;
35
36// RegExpInput is the interface that abstracts the input that feeds the
37// Consume() method of RegExp which may differ depending on its various
38// implementations (StringPiece for RE2, UnicodeString for ICU Regex).
39class RegExpInput {
40 public:
41  virtual ~RegExpInput() {}
42
43  // Converts to a C++ string.
44  virtual string ToString() const = 0;
45};
46
47// The regular expression abstract class. It supports only functions used in
48// phonenumberutil.cc. Consume(), Match() and Replace() methods must be
49// implemented.
50class RegExp {
51 public:
52  virtual ~RegExp() {}
53
54  // Matches string to regular expression, returns true if expression was
55  // matched, false otherwise, advances position in the match.
56  // input_string - string to be searched.
57  // anchor_at_start - if true, match would be successful only if it appears at
58  // the beginning of the tested region of the string.
59  // matched_string1..6 - string extracted from the match in sequential order.
60  // Can be NULL.
61  virtual bool Consume(RegExpInput* input_string,
62                       bool anchor_at_start,
63                       string* matched_string1,
64                       string* matched_string2,
65                       string* matched_string3,
66                       string* matched_string4,
67                       string* matched_string5,
68                       string* matched_string6) const = 0;
69
70  // Helper methods calling the Consume method that assume the match must start
71  // at the beginning.
72  inline bool Consume(RegExpInput* input_string, string* matched_string1,
73                      string* matched_string2,
74                      string* matched_string3,
75                      string* matched_string4,
76                      string* matched_string5,
77                      string* matched_string6) const {
78    return Consume(input_string, true, matched_string1, matched_string2,
79                   matched_string3, matched_string4, matched_string5,
80                   matched_string6);
81  }
82
83  inline bool Consume(RegExpInput* input_string, string* matched_string1,
84                      string* matched_string2,
85                      string* matched_string3,
86                      string* matched_string4,
87                      string* matched_string5) const {
88    return Consume(input_string, true, matched_string1, matched_string2,
89                   matched_string3, matched_string4, matched_string5, NULL);
90  }
91
92  inline bool Consume(RegExpInput* input_string, string* matched_string1,
93                      string* matched_string2,
94                      string* matched_string3,
95                      string* matched_string4) const {
96    return Consume(input_string, true, matched_string1, matched_string2,
97                   matched_string3, matched_string4, NULL, NULL);
98  }
99
100
101  // Helper methods calling the Consume method that assume the match must start
102  // at the beginning.
103  inline bool Consume(RegExpInput* input_string,
104                      string* matched_string1,
105                      string* matched_string2,
106                      string* matched_string3) const {
107    return Consume(input_string, true, matched_string1, matched_string2,
108                   matched_string3, NULL, NULL, NULL);
109  }
110
111  inline bool Consume(RegExpInput* input_string,
112                      string* matched_string1,
113                      string* matched_string2) const {
114    return Consume(input_string, true, matched_string1, matched_string2, NULL,
115    		   NULL, NULL, NULL);
116  }
117
118  inline bool Consume(RegExpInput* input_string, string* matched_string) const {
119    return Consume(input_string, true, matched_string, NULL, NULL, NULL, NULL,
120    	   	   NULL);
121  }
122
123  inline bool Consume(RegExpInput* input_string) const {
124    return Consume(input_string, true, NULL, NULL, NULL, NULL, NULL, NULL);
125  }
126
127  // Helper method calling the Consume method that assumes the match can start
128  // at any place in the string.
129  inline bool FindAndConsume(RegExpInput* input_string,
130                             string* matched_string) const {
131    return Consume(input_string, false, matched_string, NULL, NULL, NULL, NULL,
132    	           NULL);
133  }
134
135  // Matches string to regular expression, returns true if the expression was
136  // matched, false otherwise.
137  // input_string - string to be searched.
138  // full_match - if true, match would be successful only if it matches the
139  // complete string.
140  // matched_string - the string extracted from the match. Can be NULL.
141  virtual bool Match(const string& input_string,
142                     bool full_match,
143                     string* matched_string) const = 0;
144
145  // Helper methods calling the Match method with the right arguments.
146  inline bool PartialMatch(const string& input_string,
147                           string* matched_string) const {
148    return Match(input_string, false, matched_string);
149  }
150
151  inline bool PartialMatch(const string& input_string) const {
152    return Match(input_string, false, NULL);
153  }
154
155  inline bool FullMatch(const string& input_string,
156                        string* matched_string) const {
157    return Match(input_string, true, matched_string);
158  }
159
160  inline bool FullMatch(const string& input_string) const {
161    return Match(input_string, true, NULL);
162  }
163
164  // Replaces match(es) in 'string_to_process'. If 'global' is true,
165  // replaces all the matches, otherwise only the first match.
166  // replacement_string - text the matches are replaced with. The groups in the
167  // replacement string are referenced with the $[0-9] notation.
168  // Returns true if the pattern matches and a replacement occurs, false
169  // otherwise.
170  virtual bool Replace(string* string_to_process,
171                       bool global,
172                       const string& replacement_string) const = 0;
173
174  // Helper methods calling the Replace method with the right arguments.
175  inline bool Replace(string* string_to_process,
176                      const string& replacement_string) const {
177    return Replace(string_to_process, false, replacement_string);
178  }
179
180  inline bool GlobalReplace(string* string_to_process,
181                            const string& replacement_string) const {
182    return Replace(string_to_process, true, replacement_string);
183  }
184};
185
186// Abstract factory class that lets its subclasses instantiate the classes
187// implementing RegExp and RegExpInput.
188class AbstractRegExpFactory {
189 public:
190  virtual ~AbstractRegExpFactory() {}
191
192  // Creates a new instance of RegExpInput. The deletion of the returned
193  // instance is under the responsibility of the caller.
194  virtual RegExpInput* CreateInput(const string& utf8_input) const = 0;
195
196  // Creates a new instance of RegExp. The deletion of the returned instance is
197  // under the responsibility of the caller.
198  virtual RegExp* CreateRegExp(const string& utf8_regexp) const = 0;
199};
200
201}  // namespace phonenumbers
202}  // namespace i18n
203
204#endif  // I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
205