11767c5feSopenharmony_ci// Copyright (C) 2011 The Libphonenumber Authors
21767c5feSopenharmony_ci//
31767c5feSopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License");
41767c5feSopenharmony_ci// you may not use this file except in compliance with the License.
51767c5feSopenharmony_ci// You may obtain a copy of the License at
61767c5feSopenharmony_ci//
71767c5feSopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0
81767c5feSopenharmony_ci//
91767c5feSopenharmony_ci// Unless required by applicable law or agreed to in writing, software
101767c5feSopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS,
111767c5feSopenharmony_ci// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
121767c5feSopenharmony_ci// See the License for the specific language governing permissions and
131767c5feSopenharmony_ci// limitations under the License.
141767c5feSopenharmony_ci
151767c5feSopenharmony_ci// Author: George Yakovlev
161767c5feSopenharmony_ci//         Philippe Liard
171767c5feSopenharmony_ci//
181767c5feSopenharmony_ci// RegExp adapter to allow a pluggable regexp engine. It has been introduced
191767c5feSopenharmony_ci// during the integration of the open-source version of this library into
201767c5feSopenharmony_ci// Chromium to be able to use the ICU Regex engine instead of RE2, which is not
211767c5feSopenharmony_ci// officially supported on Windows.
221767c5feSopenharmony_ci// Since RE2 was initially used in this library, the interface of this adapter
231767c5feSopenharmony_ci// is very close to the subset of the RE2 API used in phonenumberutil.cc.
241767c5feSopenharmony_ci
251767c5feSopenharmony_ci#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
261767c5feSopenharmony_ci#define I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
271767c5feSopenharmony_ci
281767c5feSopenharmony_ci#include <cstddef>
291767c5feSopenharmony_ci#include <string>
301767c5feSopenharmony_ci
311767c5feSopenharmony_cinamespace i18n {
321767c5feSopenharmony_cinamespace phonenumbers {
331767c5feSopenharmony_ci
341767c5feSopenharmony_ciusing std::string;
351767c5feSopenharmony_ci
361767c5feSopenharmony_ci// RegExpInput is the interface that abstracts the input that feeds the
371767c5feSopenharmony_ci// Consume() method of RegExp which may differ depending on its various
381767c5feSopenharmony_ci// implementations (StringPiece for RE2, UnicodeString for ICU Regex).
391767c5feSopenharmony_ciclass RegExpInput {
401767c5feSopenharmony_ci public:
411767c5feSopenharmony_ci  virtual ~RegExpInput() {}
421767c5feSopenharmony_ci
431767c5feSopenharmony_ci  // Converts to a C++ string.
441767c5feSopenharmony_ci  virtual string ToString() const = 0;
451767c5feSopenharmony_ci};
461767c5feSopenharmony_ci
471767c5feSopenharmony_ci// The regular expression abstract class. It supports only functions used in
481767c5feSopenharmony_ci// phonenumberutil.cc. Consume(), Match() and Replace() methods must be
491767c5feSopenharmony_ci// implemented.
501767c5feSopenharmony_ciclass RegExp {
511767c5feSopenharmony_ci public:
521767c5feSopenharmony_ci  virtual ~RegExp() {}
531767c5feSopenharmony_ci
541767c5feSopenharmony_ci  // Matches string to regular expression, returns true if expression was
551767c5feSopenharmony_ci  // matched, false otherwise, advances position in the match.
561767c5feSopenharmony_ci  // input_string - string to be searched.
571767c5feSopenharmony_ci  // anchor_at_start - if true, match would be successful only if it appears at
581767c5feSopenharmony_ci  // the beginning of the tested region of the string.
591767c5feSopenharmony_ci  // matched_string1..6 - string extracted from the match in sequential order.
601767c5feSopenharmony_ci  // Can be NULL.
611767c5feSopenharmony_ci  virtual bool Consume(RegExpInput* input_string,
621767c5feSopenharmony_ci                       bool anchor_at_start,
631767c5feSopenharmony_ci                       string* matched_string1,
641767c5feSopenharmony_ci                       string* matched_string2,
651767c5feSopenharmony_ci                       string* matched_string3,
661767c5feSopenharmony_ci                       string* matched_string4,
671767c5feSopenharmony_ci                       string* matched_string5,
681767c5feSopenharmony_ci                       string* matched_string6) const = 0;
691767c5feSopenharmony_ci
701767c5feSopenharmony_ci  // Helper methods calling the Consume method that assume the match must start
711767c5feSopenharmony_ci  // at the beginning.
721767c5feSopenharmony_ci  inline bool Consume(RegExpInput* input_string, string* matched_string1,
731767c5feSopenharmony_ci                      string* matched_string2,
741767c5feSopenharmony_ci                      string* matched_string3,
751767c5feSopenharmony_ci                      string* matched_string4,
761767c5feSopenharmony_ci                      string* matched_string5,
771767c5feSopenharmony_ci                      string* matched_string6) const {
781767c5feSopenharmony_ci    return Consume(input_string, true, matched_string1, matched_string2,
791767c5feSopenharmony_ci                   matched_string3, matched_string4, matched_string5,
801767c5feSopenharmony_ci                   matched_string6);
811767c5feSopenharmony_ci  }
821767c5feSopenharmony_ci
831767c5feSopenharmony_ci  inline bool Consume(RegExpInput* input_string, string* matched_string1,
841767c5feSopenharmony_ci                      string* matched_string2,
851767c5feSopenharmony_ci                      string* matched_string3,
861767c5feSopenharmony_ci                      string* matched_string4,
871767c5feSopenharmony_ci                      string* matched_string5) const {
881767c5feSopenharmony_ci    return Consume(input_string, true, matched_string1, matched_string2,
891767c5feSopenharmony_ci                   matched_string3, matched_string4, matched_string5, NULL);
901767c5feSopenharmony_ci  }
911767c5feSopenharmony_ci
921767c5feSopenharmony_ci  inline bool Consume(RegExpInput* input_string, string* matched_string1,
931767c5feSopenharmony_ci                      string* matched_string2,
941767c5feSopenharmony_ci                      string* matched_string3,
951767c5feSopenharmony_ci                      string* matched_string4) const {
961767c5feSopenharmony_ci    return Consume(input_string, true, matched_string1, matched_string2,
971767c5feSopenharmony_ci                   matched_string3, matched_string4, NULL, NULL);
981767c5feSopenharmony_ci  }
991767c5feSopenharmony_ci
1001767c5feSopenharmony_ci
1011767c5feSopenharmony_ci  // Helper methods calling the Consume method that assume the match must start
1021767c5feSopenharmony_ci  // at the beginning.
1031767c5feSopenharmony_ci  inline bool Consume(RegExpInput* input_string,
1041767c5feSopenharmony_ci                      string* matched_string1,
1051767c5feSopenharmony_ci                      string* matched_string2,
1061767c5feSopenharmony_ci                      string* matched_string3) const {
1071767c5feSopenharmony_ci    return Consume(input_string, true, matched_string1, matched_string2,
1081767c5feSopenharmony_ci                   matched_string3, NULL, NULL, NULL);
1091767c5feSopenharmony_ci  }
1101767c5feSopenharmony_ci
1111767c5feSopenharmony_ci  inline bool Consume(RegExpInput* input_string,
1121767c5feSopenharmony_ci                      string* matched_string1,
1131767c5feSopenharmony_ci                      string* matched_string2) const {
1141767c5feSopenharmony_ci    return Consume(input_string, true, matched_string1, matched_string2, NULL,
1151767c5feSopenharmony_ci    		   NULL, NULL, NULL);
1161767c5feSopenharmony_ci  }
1171767c5feSopenharmony_ci
1181767c5feSopenharmony_ci  inline bool Consume(RegExpInput* input_string, string* matched_string) const {
1191767c5feSopenharmony_ci    return Consume(input_string, true, matched_string, NULL, NULL, NULL, NULL,
1201767c5feSopenharmony_ci    	   	   NULL);
1211767c5feSopenharmony_ci  }
1221767c5feSopenharmony_ci
1231767c5feSopenharmony_ci  inline bool Consume(RegExpInput* input_string) const {
1241767c5feSopenharmony_ci    return Consume(input_string, true, NULL, NULL, NULL, NULL, NULL, NULL);
1251767c5feSopenharmony_ci  }
1261767c5feSopenharmony_ci
1271767c5feSopenharmony_ci  // Helper method calling the Consume method that assumes the match can start
1281767c5feSopenharmony_ci  // at any place in the string.
1291767c5feSopenharmony_ci  inline bool FindAndConsume(RegExpInput* input_string,
1301767c5feSopenharmony_ci                             string* matched_string) const {
1311767c5feSopenharmony_ci    return Consume(input_string, false, matched_string, NULL, NULL, NULL, NULL,
1321767c5feSopenharmony_ci    	           NULL);
1331767c5feSopenharmony_ci  }
1341767c5feSopenharmony_ci
1351767c5feSopenharmony_ci  // Matches string to regular expression, returns true if the expression was
1361767c5feSopenharmony_ci  // matched, false otherwise.
1371767c5feSopenharmony_ci  // input_string - string to be searched.
1381767c5feSopenharmony_ci  // full_match - if true, match would be successful only if it matches the
1391767c5feSopenharmony_ci  // complete string.
1401767c5feSopenharmony_ci  // matched_string - the string extracted from the match. Can be NULL.
1411767c5feSopenharmony_ci  virtual bool Match(const string& input_string,
1421767c5feSopenharmony_ci                     bool full_match,
1431767c5feSopenharmony_ci                     string* matched_string) const = 0;
1441767c5feSopenharmony_ci
1451767c5feSopenharmony_ci  // Helper methods calling the Match method with the right arguments.
1461767c5feSopenharmony_ci  inline bool PartialMatch(const string& input_string,
1471767c5feSopenharmony_ci                           string* matched_string) const {
1481767c5feSopenharmony_ci    return Match(input_string, false, matched_string);
1491767c5feSopenharmony_ci  }
1501767c5feSopenharmony_ci
1511767c5feSopenharmony_ci  inline bool PartialMatch(const string& input_string) const {
1521767c5feSopenharmony_ci    return Match(input_string, false, NULL);
1531767c5feSopenharmony_ci  }
1541767c5feSopenharmony_ci
1551767c5feSopenharmony_ci  inline bool FullMatch(const string& input_string,
1561767c5feSopenharmony_ci                        string* matched_string) const {
1571767c5feSopenharmony_ci    return Match(input_string, true, matched_string);
1581767c5feSopenharmony_ci  }
1591767c5feSopenharmony_ci
1601767c5feSopenharmony_ci  inline bool FullMatch(const string& input_string) const {
1611767c5feSopenharmony_ci    return Match(input_string, true, NULL);
1621767c5feSopenharmony_ci  }
1631767c5feSopenharmony_ci
1641767c5feSopenharmony_ci  // Replaces match(es) in 'string_to_process'. If 'global' is true,
1651767c5feSopenharmony_ci  // replaces all the matches, otherwise only the first match.
1661767c5feSopenharmony_ci  // replacement_string - text the matches are replaced with. The groups in the
1671767c5feSopenharmony_ci  // replacement string are referenced with the $[0-9] notation.
1681767c5feSopenharmony_ci  // Returns true if the pattern matches and a replacement occurs, false
1691767c5feSopenharmony_ci  // otherwise.
1701767c5feSopenharmony_ci  virtual bool Replace(string* string_to_process,
1711767c5feSopenharmony_ci                       bool global,
1721767c5feSopenharmony_ci                       const string& replacement_string) const = 0;
1731767c5feSopenharmony_ci
1741767c5feSopenharmony_ci  // Helper methods calling the Replace method with the right arguments.
1751767c5feSopenharmony_ci  inline bool Replace(string* string_to_process,
1761767c5feSopenharmony_ci                      const string& replacement_string) const {
1771767c5feSopenharmony_ci    return Replace(string_to_process, false, replacement_string);
1781767c5feSopenharmony_ci  }
1791767c5feSopenharmony_ci
1801767c5feSopenharmony_ci  inline bool GlobalReplace(string* string_to_process,
1811767c5feSopenharmony_ci                            const string& replacement_string) const {
1821767c5feSopenharmony_ci    return Replace(string_to_process, true, replacement_string);
1831767c5feSopenharmony_ci  }
1841767c5feSopenharmony_ci};
1851767c5feSopenharmony_ci
1861767c5feSopenharmony_ci// Abstract factory class that lets its subclasses instantiate the classes
1871767c5feSopenharmony_ci// implementing RegExp and RegExpInput.
1881767c5feSopenharmony_ciclass AbstractRegExpFactory {
1891767c5feSopenharmony_ci public:
1901767c5feSopenharmony_ci  virtual ~AbstractRegExpFactory() {}
1911767c5feSopenharmony_ci
1921767c5feSopenharmony_ci  // Creates a new instance of RegExpInput. The deletion of the returned
1931767c5feSopenharmony_ci  // instance is under the responsibility of the caller.
1941767c5feSopenharmony_ci  virtual RegExpInput* CreateInput(const string& utf8_input) const = 0;
1951767c5feSopenharmony_ci
1961767c5feSopenharmony_ci  // Creates a new instance of RegExp. The deletion of the returned instance is
1971767c5feSopenharmony_ci  // under the responsibility of the caller.
1981767c5feSopenharmony_ci  virtual RegExp* CreateRegExp(const string& utf8_regexp) const = 0;
1991767c5feSopenharmony_ci};
2001767c5feSopenharmony_ci
2011767c5feSopenharmony_ci}  // namespace phonenumbers
2021767c5feSopenharmony_ci}  // namespace i18n
2031767c5feSopenharmony_ci
2041767c5feSopenharmony_ci#endif  // I18N_PHONENUMBERS_REGEXP_ADAPTER_H_
205