11767c5feSopenharmony_ci// Copyright (C) 2011 The Libphonenumber Authors 21767c5feSopenharmony_ci// 31767c5feSopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); 41767c5feSopenharmony_ci// you may not use this file except in compliance with the License. 51767c5feSopenharmony_ci// You may obtain a copy of the License at 61767c5feSopenharmony_ci// 71767c5feSopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 81767c5feSopenharmony_ci// 91767c5feSopenharmony_ci// Unless required by applicable law or agreed to in writing, software 101767c5feSopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, 111767c5feSopenharmony_ci// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 121767c5feSopenharmony_ci// See the License for the specific language governing permissions and 131767c5feSopenharmony_ci// limitations under the License. 141767c5feSopenharmony_ci 151767c5feSopenharmony_ci// Author: George Yakovlev 161767c5feSopenharmony_ci// Philippe Liard 171767c5feSopenharmony_ci 181767c5feSopenharmony_ci#include "phonenumbers/regexp_adapter_re2.h" 191767c5feSopenharmony_ci 201767c5feSopenharmony_ci#include <cstddef> 211767c5feSopenharmony_ci#include <string> 221767c5feSopenharmony_ci 231767c5feSopenharmony_ci#include <re2/re2.h> 241767c5feSopenharmony_ci#include <re2/stringpiece.h> 251767c5feSopenharmony_ci 261767c5feSopenharmony_ci#include "phonenumbers/base/basictypes.h" 271767c5feSopenharmony_ci#include "phonenumbers/base/logging.h" 281767c5feSopenharmony_ci#include "phonenumbers/stringutil.h" 291767c5feSopenharmony_ci 301767c5feSopenharmony_ci#include "absl/strings/string_view.h" 311767c5feSopenharmony_cinamespace i18n { 321767c5feSopenharmony_cinamespace phonenumbers { 331767c5feSopenharmony_ci 341767c5feSopenharmony_ci// Implementation of RegExpInput abstract class. 351767c5feSopenharmony_ciclass RE2RegExpInput : public RegExpInput { 361767c5feSopenharmony_ci public: 371767c5feSopenharmony_ci explicit RE2RegExpInput(const string& utf8_input) 381767c5feSopenharmony_ci : string_(utf8_input), 391767c5feSopenharmony_ci utf8_input_(string_) {} 401767c5feSopenharmony_ci 411767c5feSopenharmony_ci virtual string ToString() const { 421767c5feSopenharmony_ci return utf8_input_.ToString(); 431767c5feSopenharmony_ci } 441767c5feSopenharmony_ci 451767c5feSopenharmony_ci StringPiece* Data() { 461767c5feSopenharmony_ci return &utf8_input_; 471767c5feSopenharmony_ci } 481767c5feSopenharmony_ci 491767c5feSopenharmony_ci private: 501767c5feSopenharmony_ci // string_ holds the string referenced by utf8_input_ as StringPiece doesn't 511767c5feSopenharmony_ci // copy the string passed in. 521767c5feSopenharmony_ci const string string_; 531767c5feSopenharmony_ci StringPiece utf8_input_; 541767c5feSopenharmony_ci}; 551767c5feSopenharmony_ci 561767c5feSopenharmony_cinamespace { 571767c5feSopenharmony_ci 581767c5feSopenharmony_citemplate <typename Function, typename Input> 591767c5feSopenharmony_cibool DispatchRE2Call(Function regex_function, 601767c5feSopenharmony_ci Input input, 611767c5feSopenharmony_ci const RE2& regexp, 621767c5feSopenharmony_ci string* out1, 631767c5feSopenharmony_ci string* out2, 641767c5feSopenharmony_ci string* out3, 651767c5feSopenharmony_ci string* out4, 661767c5feSopenharmony_ci string* out5, 671767c5feSopenharmony_ci string* out6) { 681767c5feSopenharmony_ci const RE2::Arg outs[] = { out1, out2, out3, out4, out5, out6}; 691767c5feSopenharmony_ci const RE2::Arg* const args[] = {&outs[0], &outs[1], &outs[2], 701767c5feSopenharmony_ci &outs[3], &outs[4], &outs[5]}; 711767c5feSopenharmony_ci const int argc = 721767c5feSopenharmony_ci out6 ? 6 : out5 ? 5 : out4 ? 4 : out3 ? 3 : out2 ? 2 : out1 ? 1 : 0; 731767c5feSopenharmony_ci return regex_function(input, regexp, args, argc); 741767c5feSopenharmony_ci} 751767c5feSopenharmony_ci 761767c5feSopenharmony_ci// Replaces unescaped dollar-signs with backslashes. Backslashes are deleted 771767c5feSopenharmony_ci// when they escape dollar-signs. 781767c5feSopenharmony_cistring TransformRegularExpressionToRE2Syntax(const string& regex) { 791767c5feSopenharmony_ci string re2_regex(regex); 801767c5feSopenharmony_ci if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) { 811767c5feSopenharmony_ci return regex; 821767c5feSopenharmony_ci } 831767c5feSopenharmony_ci // If we replaced a dollar sign with a backslash and there are now two 841767c5feSopenharmony_ci // backslashes in the string, we assume that the dollar-sign was previously 851767c5feSopenharmony_ci // escaped and that we need to retain it. To do this, we replace pairs of 861767c5feSopenharmony_ci // backslashes with a dollar sign. 871767c5feSopenharmony_ci GlobalReplaceSubstring("\\\\", "$", &re2_regex); 881767c5feSopenharmony_ci return re2_regex; 891767c5feSopenharmony_ci} 901767c5feSopenharmony_ci 911767c5feSopenharmony_ci} // namespace 921767c5feSopenharmony_ci 931767c5feSopenharmony_ci// Implementation of RegExp abstract class. 941767c5feSopenharmony_ciclass RE2RegExp : public RegExp { 951767c5feSopenharmony_ci public: 961767c5feSopenharmony_ci explicit RE2RegExp(const string& utf8_regexp) 971767c5feSopenharmony_ci : utf8_regexp_(utf8_regexp) {} 981767c5feSopenharmony_ci 991767c5feSopenharmony_ci virtual bool Consume(RegExpInput* input_string, 1001767c5feSopenharmony_ci bool anchor_at_start, 1011767c5feSopenharmony_ci string* matched_string1, 1021767c5feSopenharmony_ci string* matched_string2, 1031767c5feSopenharmony_ci string* matched_string3, 1041767c5feSopenharmony_ci string* matched_string4, 1051767c5feSopenharmony_ci string* matched_string5, 1061767c5feSopenharmony_ci string* matched_string6) const { 1071767c5feSopenharmony_ci DCHECK(input_string); 1081767c5feSopenharmony_ci StringPiece* utf8_input = 1091767c5feSopenharmony_ci static_cast<RE2RegExpInput*>(input_string)->Data(); 1101767c5feSopenharmony_ci 1111767c5feSopenharmony_ci if (anchor_at_start) { 1121767c5feSopenharmony_ci return DispatchRE2Call(RE2::ConsumeN, utf8_input, utf8_regexp_, 1131767c5feSopenharmony_ci matched_string1, matched_string2, 1141767c5feSopenharmony_ci matched_string3, matched_string4, 1151767c5feSopenharmony_ci matched_string5, matched_string6); 1161767c5feSopenharmony_ci } else { 1171767c5feSopenharmony_ci return DispatchRE2Call(RE2::FindAndConsumeN, utf8_input, utf8_regexp_, 1181767c5feSopenharmony_ci matched_string1, matched_string2, 1191767c5feSopenharmony_ci matched_string3, matched_string4, 1201767c5feSopenharmony_ci matched_string5, matched_string6); 1211767c5feSopenharmony_ci } 1221767c5feSopenharmony_ci } 1231767c5feSopenharmony_ci 1241767c5feSopenharmony_ci virtual bool Match(const string& input_string, 1251767c5feSopenharmony_ci bool full_match, 1261767c5feSopenharmony_ci string* matched_string) const { 1271767c5feSopenharmony_ci if (full_match) { 1281767c5feSopenharmony_ci return DispatchRE2Call(RE2::FullMatchN, input_string, utf8_regexp_, 1291767c5feSopenharmony_ci matched_string, NULL, NULL, NULL, NULL, NULL); 1301767c5feSopenharmony_ci } else { 1311767c5feSopenharmony_ci return DispatchRE2Call(RE2::PartialMatchN, input_string, utf8_regexp_, 1321767c5feSopenharmony_ci matched_string, NULL, NULL, NULL, NULL, NULL); 1331767c5feSopenharmony_ci } 1341767c5feSopenharmony_ci } 1351767c5feSopenharmony_ci 1361767c5feSopenharmony_ci virtual bool Replace(string* string_to_process, 1371767c5feSopenharmony_ci bool global, 1381767c5feSopenharmony_ci const string& replacement_string) const { 1391767c5feSopenharmony_ci DCHECK(string_to_process); 1401767c5feSopenharmony_ci const string re2_replacement_string = 1411767c5feSopenharmony_ci TransformRegularExpressionToRE2Syntax(replacement_string); 1421767c5feSopenharmony_ci if (global) { 1431767c5feSopenharmony_ci return RE2::GlobalReplace(string_to_process, utf8_regexp_, 1441767c5feSopenharmony_ci re2_replacement_string); 1451767c5feSopenharmony_ci } else { 1461767c5feSopenharmony_ci return RE2::Replace(string_to_process, utf8_regexp_, 1471767c5feSopenharmony_ci re2_replacement_string); 1481767c5feSopenharmony_ci } 1491767c5feSopenharmony_ci } 1501767c5feSopenharmony_ci 1511767c5feSopenharmony_ci private: 1521767c5feSopenharmony_ci RE2 utf8_regexp_; 1531767c5feSopenharmony_ci}; 1541767c5feSopenharmony_ci 1551767c5feSopenharmony_ciRegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const { 1561767c5feSopenharmony_ci return new RE2RegExpInput(utf8_input); 1571767c5feSopenharmony_ci} 1581767c5feSopenharmony_ci 1591767c5feSopenharmony_ciRegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const { 1601767c5feSopenharmony_ci return new RE2RegExp(utf8_regexp); 1611767c5feSopenharmony_ci} 1621767c5feSopenharmony_ci 1631767c5feSopenharmony_ci} // namespace phonenumbers 1641767c5feSopenharmony_ci} // namespace i18n 165