11767c5feSopenharmony_ci// Copyright (C) 2011 The Libphonenumber Authors 21767c5feSopenharmony_ci// 31767c5feSopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); 41767c5feSopenharmony_ci// you may not use this file except in compliance with the License. 51767c5feSopenharmony_ci// You may obtain a copy of the License at 61767c5feSopenharmony_ci// 71767c5feSopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 81767c5feSopenharmony_ci// 91767c5feSopenharmony_ci// Unless required by applicable law or agreed to in writing, software 101767c5feSopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, 111767c5feSopenharmony_ci// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 121767c5feSopenharmony_ci// See the License for the specific language governing permissions and 131767c5feSopenharmony_ci// limitations under the License. 141767c5feSopenharmony_ci 151767c5feSopenharmony_ci// Author: George Yakovlev 161767c5feSopenharmony_ci// Philippe Liard 171767c5feSopenharmony_ci// 181767c5feSopenharmony_ci// RegExp adapter to allow a pluggable regexp engine. It has been introduced 191767c5feSopenharmony_ci// during the integration of the open-source version of this library into 201767c5feSopenharmony_ci// Chromium to be able to use the ICU Regex engine instead of RE2, which is not 211767c5feSopenharmony_ci// officially supported on Windows. 221767c5feSopenharmony_ci// Since RE2 was initially used in this library, the interface of this adapter 231767c5feSopenharmony_ci// is very close to the subset of the RE2 API used in phonenumberutil.cc. 241767c5feSopenharmony_ci 251767c5feSopenharmony_ci#ifndef I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ 261767c5feSopenharmony_ci#define I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ 271767c5feSopenharmony_ci 281767c5feSopenharmony_ci#include <cstddef> 291767c5feSopenharmony_ci#include <string> 301767c5feSopenharmony_ci 311767c5feSopenharmony_cinamespace i18n { 321767c5feSopenharmony_cinamespace phonenumbers { 331767c5feSopenharmony_ci 341767c5feSopenharmony_ciusing std::string; 351767c5feSopenharmony_ci 361767c5feSopenharmony_ci// RegExpInput is the interface that abstracts the input that feeds the 371767c5feSopenharmony_ci// Consume() method of RegExp which may differ depending on its various 381767c5feSopenharmony_ci// implementations (StringPiece for RE2, UnicodeString for ICU Regex). 391767c5feSopenharmony_ciclass RegExpInput { 401767c5feSopenharmony_ci public: 411767c5feSopenharmony_ci virtual ~RegExpInput() {} 421767c5feSopenharmony_ci 431767c5feSopenharmony_ci // Converts to a C++ string. 441767c5feSopenharmony_ci virtual string ToString() const = 0; 451767c5feSopenharmony_ci}; 461767c5feSopenharmony_ci 471767c5feSopenharmony_ci// The regular expression abstract class. It supports only functions used in 481767c5feSopenharmony_ci// phonenumberutil.cc. Consume(), Match() and Replace() methods must be 491767c5feSopenharmony_ci// implemented. 501767c5feSopenharmony_ciclass RegExp { 511767c5feSopenharmony_ci public: 521767c5feSopenharmony_ci virtual ~RegExp() {} 531767c5feSopenharmony_ci 541767c5feSopenharmony_ci // Matches string to regular expression, returns true if expression was 551767c5feSopenharmony_ci // matched, false otherwise, advances position in the match. 561767c5feSopenharmony_ci // input_string - string to be searched. 571767c5feSopenharmony_ci // anchor_at_start - if true, match would be successful only if it appears at 581767c5feSopenharmony_ci // the beginning of the tested region of the string. 591767c5feSopenharmony_ci // matched_string1..6 - string extracted from the match in sequential order. 601767c5feSopenharmony_ci // Can be NULL. 611767c5feSopenharmony_ci virtual bool Consume(RegExpInput* input_string, 621767c5feSopenharmony_ci bool anchor_at_start, 631767c5feSopenharmony_ci string* matched_string1, 641767c5feSopenharmony_ci string* matched_string2, 651767c5feSopenharmony_ci string* matched_string3, 661767c5feSopenharmony_ci string* matched_string4, 671767c5feSopenharmony_ci string* matched_string5, 681767c5feSopenharmony_ci string* matched_string6) const = 0; 691767c5feSopenharmony_ci 701767c5feSopenharmony_ci // Helper methods calling the Consume method that assume the match must start 711767c5feSopenharmony_ci // at the beginning. 721767c5feSopenharmony_ci inline bool Consume(RegExpInput* input_string, string* matched_string1, 731767c5feSopenharmony_ci string* matched_string2, 741767c5feSopenharmony_ci string* matched_string3, 751767c5feSopenharmony_ci string* matched_string4, 761767c5feSopenharmony_ci string* matched_string5, 771767c5feSopenharmony_ci string* matched_string6) const { 781767c5feSopenharmony_ci return Consume(input_string, true, matched_string1, matched_string2, 791767c5feSopenharmony_ci matched_string3, matched_string4, matched_string5, 801767c5feSopenharmony_ci matched_string6); 811767c5feSopenharmony_ci } 821767c5feSopenharmony_ci 831767c5feSopenharmony_ci inline bool Consume(RegExpInput* input_string, string* matched_string1, 841767c5feSopenharmony_ci string* matched_string2, 851767c5feSopenharmony_ci string* matched_string3, 861767c5feSopenharmony_ci string* matched_string4, 871767c5feSopenharmony_ci string* matched_string5) const { 881767c5feSopenharmony_ci return Consume(input_string, true, matched_string1, matched_string2, 891767c5feSopenharmony_ci matched_string3, matched_string4, matched_string5, NULL); 901767c5feSopenharmony_ci } 911767c5feSopenharmony_ci 921767c5feSopenharmony_ci inline bool Consume(RegExpInput* input_string, string* matched_string1, 931767c5feSopenharmony_ci string* matched_string2, 941767c5feSopenharmony_ci string* matched_string3, 951767c5feSopenharmony_ci string* matched_string4) const { 961767c5feSopenharmony_ci return Consume(input_string, true, matched_string1, matched_string2, 971767c5feSopenharmony_ci matched_string3, matched_string4, NULL, NULL); 981767c5feSopenharmony_ci } 991767c5feSopenharmony_ci 1001767c5feSopenharmony_ci 1011767c5feSopenharmony_ci // Helper methods calling the Consume method that assume the match must start 1021767c5feSopenharmony_ci // at the beginning. 1031767c5feSopenharmony_ci inline bool Consume(RegExpInput* input_string, 1041767c5feSopenharmony_ci string* matched_string1, 1051767c5feSopenharmony_ci string* matched_string2, 1061767c5feSopenharmony_ci string* matched_string3) const { 1071767c5feSopenharmony_ci return Consume(input_string, true, matched_string1, matched_string2, 1081767c5feSopenharmony_ci matched_string3, NULL, NULL, NULL); 1091767c5feSopenharmony_ci } 1101767c5feSopenharmony_ci 1111767c5feSopenharmony_ci inline bool Consume(RegExpInput* input_string, 1121767c5feSopenharmony_ci string* matched_string1, 1131767c5feSopenharmony_ci string* matched_string2) const { 1141767c5feSopenharmony_ci return Consume(input_string, true, matched_string1, matched_string2, NULL, 1151767c5feSopenharmony_ci NULL, NULL, NULL); 1161767c5feSopenharmony_ci } 1171767c5feSopenharmony_ci 1181767c5feSopenharmony_ci inline bool Consume(RegExpInput* input_string, string* matched_string) const { 1191767c5feSopenharmony_ci return Consume(input_string, true, matched_string, NULL, NULL, NULL, NULL, 1201767c5feSopenharmony_ci NULL); 1211767c5feSopenharmony_ci } 1221767c5feSopenharmony_ci 1231767c5feSopenharmony_ci inline bool Consume(RegExpInput* input_string) const { 1241767c5feSopenharmony_ci return Consume(input_string, true, NULL, NULL, NULL, NULL, NULL, NULL); 1251767c5feSopenharmony_ci } 1261767c5feSopenharmony_ci 1271767c5feSopenharmony_ci // Helper method calling the Consume method that assumes the match can start 1281767c5feSopenharmony_ci // at any place in the string. 1291767c5feSopenharmony_ci inline bool FindAndConsume(RegExpInput* input_string, 1301767c5feSopenharmony_ci string* matched_string) const { 1311767c5feSopenharmony_ci return Consume(input_string, false, matched_string, NULL, NULL, NULL, NULL, 1321767c5feSopenharmony_ci NULL); 1331767c5feSopenharmony_ci } 1341767c5feSopenharmony_ci 1351767c5feSopenharmony_ci // Matches string to regular expression, returns true if the expression was 1361767c5feSopenharmony_ci // matched, false otherwise. 1371767c5feSopenharmony_ci // input_string - string to be searched. 1381767c5feSopenharmony_ci // full_match - if true, match would be successful only if it matches the 1391767c5feSopenharmony_ci // complete string. 1401767c5feSopenharmony_ci // matched_string - the string extracted from the match. Can be NULL. 1411767c5feSopenharmony_ci virtual bool Match(const string& input_string, 1421767c5feSopenharmony_ci bool full_match, 1431767c5feSopenharmony_ci string* matched_string) const = 0; 1441767c5feSopenharmony_ci 1451767c5feSopenharmony_ci // Helper methods calling the Match method with the right arguments. 1461767c5feSopenharmony_ci inline bool PartialMatch(const string& input_string, 1471767c5feSopenharmony_ci string* matched_string) const { 1481767c5feSopenharmony_ci return Match(input_string, false, matched_string); 1491767c5feSopenharmony_ci } 1501767c5feSopenharmony_ci 1511767c5feSopenharmony_ci inline bool PartialMatch(const string& input_string) const { 1521767c5feSopenharmony_ci return Match(input_string, false, NULL); 1531767c5feSopenharmony_ci } 1541767c5feSopenharmony_ci 1551767c5feSopenharmony_ci inline bool FullMatch(const string& input_string, 1561767c5feSopenharmony_ci string* matched_string) const { 1571767c5feSopenharmony_ci return Match(input_string, true, matched_string); 1581767c5feSopenharmony_ci } 1591767c5feSopenharmony_ci 1601767c5feSopenharmony_ci inline bool FullMatch(const string& input_string) const { 1611767c5feSopenharmony_ci return Match(input_string, true, NULL); 1621767c5feSopenharmony_ci } 1631767c5feSopenharmony_ci 1641767c5feSopenharmony_ci // Replaces match(es) in 'string_to_process'. If 'global' is true, 1651767c5feSopenharmony_ci // replaces all the matches, otherwise only the first match. 1661767c5feSopenharmony_ci // replacement_string - text the matches are replaced with. The groups in the 1671767c5feSopenharmony_ci // replacement string are referenced with the $[0-9] notation. 1681767c5feSopenharmony_ci // Returns true if the pattern matches and a replacement occurs, false 1691767c5feSopenharmony_ci // otherwise. 1701767c5feSopenharmony_ci virtual bool Replace(string* string_to_process, 1711767c5feSopenharmony_ci bool global, 1721767c5feSopenharmony_ci const string& replacement_string) const = 0; 1731767c5feSopenharmony_ci 1741767c5feSopenharmony_ci // Helper methods calling the Replace method with the right arguments. 1751767c5feSopenharmony_ci inline bool Replace(string* string_to_process, 1761767c5feSopenharmony_ci const string& replacement_string) const { 1771767c5feSopenharmony_ci return Replace(string_to_process, false, replacement_string); 1781767c5feSopenharmony_ci } 1791767c5feSopenharmony_ci 1801767c5feSopenharmony_ci inline bool GlobalReplace(string* string_to_process, 1811767c5feSopenharmony_ci const string& replacement_string) const { 1821767c5feSopenharmony_ci return Replace(string_to_process, true, replacement_string); 1831767c5feSopenharmony_ci } 1841767c5feSopenharmony_ci}; 1851767c5feSopenharmony_ci 1861767c5feSopenharmony_ci// Abstract factory class that lets its subclasses instantiate the classes 1871767c5feSopenharmony_ci// implementing RegExp and RegExpInput. 1881767c5feSopenharmony_ciclass AbstractRegExpFactory { 1891767c5feSopenharmony_ci public: 1901767c5feSopenharmony_ci virtual ~AbstractRegExpFactory() {} 1911767c5feSopenharmony_ci 1921767c5feSopenharmony_ci // Creates a new instance of RegExpInput. The deletion of the returned 1931767c5feSopenharmony_ci // instance is under the responsibility of the caller. 1941767c5feSopenharmony_ci virtual RegExpInput* CreateInput(const string& utf8_input) const = 0; 1951767c5feSopenharmony_ci 1961767c5feSopenharmony_ci // Creates a new instance of RegExp. The deletion of the returned instance is 1971767c5feSopenharmony_ci // under the responsibility of the caller. 1981767c5feSopenharmony_ci virtual RegExp* CreateRegExp(const string& utf8_regexp) const = 0; 1991767c5feSopenharmony_ci}; 2001767c5feSopenharmony_ci 2011767c5feSopenharmony_ci} // namespace phonenumbers 2021767c5feSopenharmony_ci} // namespace i18n 2031767c5feSopenharmony_ci 2041767c5feSopenharmony_ci#endif // I18N_PHONENUMBERS_REGEXP_ADAPTER_H_ 205