11767c5feSopenharmony_ci// Copyright (C) 2011 The Libphonenumber Authors
21767c5feSopenharmony_ci//
31767c5feSopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License");
41767c5feSopenharmony_ci// you may not use this file except in compliance with the License.
51767c5feSopenharmony_ci// You may obtain a copy of the License at
61767c5feSopenharmony_ci//
71767c5feSopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0
81767c5feSopenharmony_ci//
91767c5feSopenharmony_ci// Unless required by applicable law or agreed to in writing, software
101767c5feSopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS,
111767c5feSopenharmony_ci// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
121767c5feSopenharmony_ci// See the License for the specific language governing permissions and
131767c5feSopenharmony_ci// limitations under the License.
141767c5feSopenharmony_ci
151767c5feSopenharmony_ci// Author: George Yakovlev
161767c5feSopenharmony_ci//         Philippe Liard
171767c5feSopenharmony_ci
181767c5feSopenharmony_ci#include "phonenumbers/regexp_adapter_re2.h"
191767c5feSopenharmony_ci
201767c5feSopenharmony_ci#include <cstddef>
211767c5feSopenharmony_ci#include <string>
221767c5feSopenharmony_ci
231767c5feSopenharmony_ci#include <re2/re2.h>
241767c5feSopenharmony_ci#include <re2/stringpiece.h>
251767c5feSopenharmony_ci
261767c5feSopenharmony_ci#include "phonenumbers/base/basictypes.h"
271767c5feSopenharmony_ci#include "phonenumbers/base/logging.h"
281767c5feSopenharmony_ci#include "phonenumbers/stringutil.h"
291767c5feSopenharmony_ci
301767c5feSopenharmony_ci#include "absl/strings/string_view.h"
311767c5feSopenharmony_cinamespace i18n {
321767c5feSopenharmony_cinamespace phonenumbers {
331767c5feSopenharmony_ci
341767c5feSopenharmony_ci// Implementation of RegExpInput abstract class.
351767c5feSopenharmony_ciclass RE2RegExpInput : public RegExpInput {
361767c5feSopenharmony_ci public:
371767c5feSopenharmony_ci  explicit RE2RegExpInput(const string& utf8_input)
381767c5feSopenharmony_ci      : string_(utf8_input),
391767c5feSopenharmony_ci        utf8_input_(string_) {}
401767c5feSopenharmony_ci
411767c5feSopenharmony_ci  virtual string ToString() const {
421767c5feSopenharmony_ci    return utf8_input_.ToString();
431767c5feSopenharmony_ci  }
441767c5feSopenharmony_ci
451767c5feSopenharmony_ci  StringPiece* Data() {
461767c5feSopenharmony_ci    return &utf8_input_;
471767c5feSopenharmony_ci  }
481767c5feSopenharmony_ci
491767c5feSopenharmony_ci private:
501767c5feSopenharmony_ci  // string_ holds the string referenced by utf8_input_ as StringPiece doesn't
511767c5feSopenharmony_ci  // copy the string passed in.
521767c5feSopenharmony_ci  const string string_;
531767c5feSopenharmony_ci  StringPiece utf8_input_;
541767c5feSopenharmony_ci};
551767c5feSopenharmony_ci
561767c5feSopenharmony_cinamespace {
571767c5feSopenharmony_ci
581767c5feSopenharmony_citemplate <typename Function, typename Input>
591767c5feSopenharmony_cibool DispatchRE2Call(Function regex_function,
601767c5feSopenharmony_ci                     Input input,
611767c5feSopenharmony_ci                     const RE2& regexp,
621767c5feSopenharmony_ci                     string* out1,
631767c5feSopenharmony_ci                     string* out2,
641767c5feSopenharmony_ci                     string* out3,
651767c5feSopenharmony_ci                     string* out4,
661767c5feSopenharmony_ci                     string* out5,
671767c5feSopenharmony_ci                     string* out6) {
681767c5feSopenharmony_ci  const RE2::Arg outs[] = { out1, out2, out3, out4, out5, out6};
691767c5feSopenharmony_ci  const RE2::Arg* const args[] = {&outs[0], &outs[1], &outs[2],
701767c5feSopenharmony_ci                                  &outs[3], &outs[4], &outs[5]};
711767c5feSopenharmony_ci  const int argc =
721767c5feSopenharmony_ci      out6 ? 6 : out5 ? 5 : out4 ? 4 : out3 ? 3 : out2 ? 2 : out1 ? 1 : 0;
731767c5feSopenharmony_ci  return regex_function(input, regexp, args, argc);
741767c5feSopenharmony_ci}
751767c5feSopenharmony_ci
761767c5feSopenharmony_ci// Replaces unescaped dollar-signs with backslashes. Backslashes are deleted
771767c5feSopenharmony_ci// when they escape dollar-signs.
781767c5feSopenharmony_cistring TransformRegularExpressionToRE2Syntax(const string& regex) {
791767c5feSopenharmony_ci  string re2_regex(regex);
801767c5feSopenharmony_ci  if (GlobalReplaceSubstring("$", "\\", &re2_regex) == 0) {
811767c5feSopenharmony_ci    return regex;
821767c5feSopenharmony_ci  }
831767c5feSopenharmony_ci  // If we replaced a dollar sign with a backslash and there are now two
841767c5feSopenharmony_ci  // backslashes in the string, we assume that the dollar-sign was previously
851767c5feSopenharmony_ci  // escaped and that we need to retain it. To do this, we replace pairs of
861767c5feSopenharmony_ci  // backslashes with a dollar sign.
871767c5feSopenharmony_ci  GlobalReplaceSubstring("\\\\", "$", &re2_regex);
881767c5feSopenharmony_ci  return re2_regex;
891767c5feSopenharmony_ci}
901767c5feSopenharmony_ci
911767c5feSopenharmony_ci}  // namespace
921767c5feSopenharmony_ci
931767c5feSopenharmony_ci// Implementation of RegExp abstract class.
941767c5feSopenharmony_ciclass RE2RegExp : public RegExp {
951767c5feSopenharmony_ci public:
961767c5feSopenharmony_ci  explicit RE2RegExp(const string& utf8_regexp)
971767c5feSopenharmony_ci      : utf8_regexp_(utf8_regexp) {}
981767c5feSopenharmony_ci
991767c5feSopenharmony_ci  virtual bool Consume(RegExpInput* input_string,
1001767c5feSopenharmony_ci                       bool anchor_at_start,
1011767c5feSopenharmony_ci                       string* matched_string1,
1021767c5feSopenharmony_ci                       string* matched_string2,
1031767c5feSopenharmony_ci                       string* matched_string3,
1041767c5feSopenharmony_ci                       string* matched_string4,
1051767c5feSopenharmony_ci                       string* matched_string5,
1061767c5feSopenharmony_ci                       string* matched_string6) const {
1071767c5feSopenharmony_ci    DCHECK(input_string);
1081767c5feSopenharmony_ci    StringPiece* utf8_input =
1091767c5feSopenharmony_ci        static_cast<RE2RegExpInput*>(input_string)->Data();
1101767c5feSopenharmony_ci
1111767c5feSopenharmony_ci    if (anchor_at_start) {
1121767c5feSopenharmony_ci      return DispatchRE2Call(RE2::ConsumeN, utf8_input, utf8_regexp_,
1131767c5feSopenharmony_ci                             matched_string1, matched_string2,
1141767c5feSopenharmony_ci                             matched_string3, matched_string4,
1151767c5feSopenharmony_ci                             matched_string5, matched_string6);
1161767c5feSopenharmony_ci    } else {
1171767c5feSopenharmony_ci      return DispatchRE2Call(RE2::FindAndConsumeN, utf8_input, utf8_regexp_,
1181767c5feSopenharmony_ci                             matched_string1, matched_string2,
1191767c5feSopenharmony_ci                             matched_string3, matched_string4,
1201767c5feSopenharmony_ci                             matched_string5, matched_string6);
1211767c5feSopenharmony_ci    }
1221767c5feSopenharmony_ci  }
1231767c5feSopenharmony_ci
1241767c5feSopenharmony_ci  virtual bool Match(const string& input_string,
1251767c5feSopenharmony_ci                     bool full_match,
1261767c5feSopenharmony_ci                     string* matched_string) const {
1271767c5feSopenharmony_ci    if (full_match) {
1281767c5feSopenharmony_ci      return DispatchRE2Call(RE2::FullMatchN, input_string, utf8_regexp_,
1291767c5feSopenharmony_ci                             matched_string, NULL, NULL, NULL, NULL, NULL);
1301767c5feSopenharmony_ci    } else {
1311767c5feSopenharmony_ci      return DispatchRE2Call(RE2::PartialMatchN, input_string, utf8_regexp_,
1321767c5feSopenharmony_ci                             matched_string, NULL, NULL, NULL, NULL, NULL);
1331767c5feSopenharmony_ci    }
1341767c5feSopenharmony_ci  }
1351767c5feSopenharmony_ci
1361767c5feSopenharmony_ci  virtual bool Replace(string* string_to_process,
1371767c5feSopenharmony_ci                       bool global,
1381767c5feSopenharmony_ci                       const string& replacement_string) const {
1391767c5feSopenharmony_ci    DCHECK(string_to_process);
1401767c5feSopenharmony_ci    const string re2_replacement_string =
1411767c5feSopenharmony_ci        TransformRegularExpressionToRE2Syntax(replacement_string);
1421767c5feSopenharmony_ci    if (global) {
1431767c5feSopenharmony_ci      return RE2::GlobalReplace(string_to_process, utf8_regexp_,
1441767c5feSopenharmony_ci                                re2_replacement_string);
1451767c5feSopenharmony_ci    } else {
1461767c5feSopenharmony_ci      return RE2::Replace(string_to_process, utf8_regexp_,
1471767c5feSopenharmony_ci                          re2_replacement_string);
1481767c5feSopenharmony_ci    }
1491767c5feSopenharmony_ci  }
1501767c5feSopenharmony_ci
1511767c5feSopenharmony_ci private:
1521767c5feSopenharmony_ci  RE2 utf8_regexp_;
1531767c5feSopenharmony_ci};
1541767c5feSopenharmony_ci
1551767c5feSopenharmony_ciRegExpInput* RE2RegExpFactory::CreateInput(const string& utf8_input) const {
1561767c5feSopenharmony_ci  return new RE2RegExpInput(utf8_input);
1571767c5feSopenharmony_ci}
1581767c5feSopenharmony_ci
1591767c5feSopenharmony_ciRegExp* RE2RegExpFactory::CreateRegExp(const string& utf8_regexp) const {
1601767c5feSopenharmony_ci  return new RE2RegExp(utf8_regexp);
1611767c5feSopenharmony_ci}
1621767c5feSopenharmony_ci
1631767c5feSopenharmony_ci}  // namespace phonenumbers
1641767c5feSopenharmony_ci}  // namespace i18n
165