11767c5feSopenharmony_ci// Copyright (C) 2011 The Libphonenumber Authors
21767c5feSopenharmony_ci//
31767c5feSopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License");
41767c5feSopenharmony_ci// you may not use this file except in compliance with the License.
51767c5feSopenharmony_ci// You may obtain a copy of the License at
61767c5feSopenharmony_ci//
71767c5feSopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0
81767c5feSopenharmony_ci//
91767c5feSopenharmony_ci// Unless required by applicable law or agreed to in writing, software
101767c5feSopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS,
111767c5feSopenharmony_ci// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
121767c5feSopenharmony_ci// See the License for the specific language governing permissions and
131767c5feSopenharmony_ci// limitations under the License.
141767c5feSopenharmony_ci//
151767c5feSopenharmony_ci// Author: Lara Rennie
161767c5feSopenharmony_ci// Author: Tao Huang
171767c5feSopenharmony_ci//
181767c5feSopenharmony_ci// This is a direct port from PhoneNumberMatcher.java.
191767c5feSopenharmony_ci// Changes to this class should also happen to the Java version, whenever it
201767c5feSopenharmony_ci// makes sense.
211767c5feSopenharmony_ci
221767c5feSopenharmony_ci#ifndef I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
231767c5feSopenharmony_ci#define I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
241767c5feSopenharmony_ci
251767c5feSopenharmony_ci#include <string>
261767c5feSopenharmony_ci#include <vector>
271767c5feSopenharmony_ci
281767c5feSopenharmony_ci#include "phonenumbers/base/basictypes.h"
291767c5feSopenharmony_ci#include "phonenumbers/base/memory/scoped_ptr.h"
301767c5feSopenharmony_ci#include "phonenumbers/callback.h"
311767c5feSopenharmony_ci#include "phonenumbers/regexp_adapter.h"
321767c5feSopenharmony_ci
331767c5feSopenharmony_cinamespace i18n {
341767c5feSopenharmony_cinamespace phonenumbers {
351767c5feSopenharmony_ci
361767c5feSopenharmony_citemplate <class R, class A1, class A2, class A3, class A4>
371767c5feSopenharmony_ci    class ResultCallback4;
381767c5feSopenharmony_ci
391767c5feSopenharmony_ciusing std::string;
401767c5feSopenharmony_ciusing std::vector;
411767c5feSopenharmony_ci
421767c5feSopenharmony_ciclass AlternateFormats;
431767c5feSopenharmony_ciclass NumberFormat;
441767c5feSopenharmony_ciclass PhoneNumber;
451767c5feSopenharmony_ciclass PhoneNumberMatch;
461767c5feSopenharmony_ciclass PhoneNumberMatcherRegExps;
471767c5feSopenharmony_ciclass PhoneNumberUtil;
481767c5feSopenharmony_ci
491767c5feSopenharmony_ciclass PhoneNumberMatcher {
501767c5feSopenharmony_ci  friend class PhoneNumberMatcherTest;
511767c5feSopenharmony_ci public:
521767c5feSopenharmony_ci  // Leniency when finding potential phone numbers in text segments. The levels
531767c5feSopenharmony_ci  // here are ordered in increasing strictness.
541767c5feSopenharmony_ci  enum Leniency {
551767c5feSopenharmony_ci    // Phone numbers accepted are possible, but not necessarily valid.
561767c5feSopenharmony_ci    POSSIBLE,
571767c5feSopenharmony_ci    // Phone numbers accepted are possible and valid.
581767c5feSopenharmony_ci    VALID,
591767c5feSopenharmony_ci    // Phone numbers accepted are valid and are grouped in a possible way for
601767c5feSopenharmony_ci    // this locale. For example, a US number written as "65 02 53 00 00" is not
611767c5feSopenharmony_ci    // accepted at this leniency level, whereas "650 253 0000" or "6502530000"
621767c5feSopenharmony_ci    // are. Numbers with more than one '/' symbol are also dropped at this
631767c5feSopenharmony_ci    // level.
641767c5feSopenharmony_ci    // Warning: The next two levels might result in lower coverage especially
651767c5feSopenharmony_ci    // for regions outside of country code "+1". If you are not sure about which
661767c5feSopenharmony_ci    // level to use, you can send an e-mail to the discussion group
671767c5feSopenharmony_ci    // http://groups.google.com/group/libphonenumber-discuss/
681767c5feSopenharmony_ci    STRICT_GROUPING,
691767c5feSopenharmony_ci    // Phone numbers accepted are valid and are grouped in the same way that we
701767c5feSopenharmony_ci    // would have formatted it, or as a single block. For example, a US number
711767c5feSopenharmony_ci    // written as "650 2530000" is not accepted at this leniency level, whereas
721767c5feSopenharmony_ci    // "650 253 0000" or "6502530000" are.
731767c5feSopenharmony_ci    EXACT_GROUPING,
741767c5feSopenharmony_ci  };
751767c5feSopenharmony_ci
761767c5feSopenharmony_ci  // Constructs a phone number matcher.
771767c5feSopenharmony_ci  PhoneNumberMatcher(const PhoneNumberUtil& util,
781767c5feSopenharmony_ci                     const string& text,
791767c5feSopenharmony_ci                     const string& region_code,
801767c5feSopenharmony_ci                     Leniency leniency,
811767c5feSopenharmony_ci                     int max_tries);
821767c5feSopenharmony_ci
831767c5feSopenharmony_ci  // Wrapper to construct a phone number matcher, with no limitation on the
841767c5feSopenharmony_ci  // number of retries and VALID Leniency.
851767c5feSopenharmony_ci  PhoneNumberMatcher(const string& text,
861767c5feSopenharmony_ci                     const string& region_code);
871767c5feSopenharmony_ci
881767c5feSopenharmony_ci  ~PhoneNumberMatcher();
891767c5feSopenharmony_ci
901767c5feSopenharmony_ci  // Returns true if the text sequence has another match. Return false if not.
911767c5feSopenharmony_ci  // Always returns false when input contains non UTF-8 characters.
921767c5feSopenharmony_ci  bool HasNext();
931767c5feSopenharmony_ci
941767c5feSopenharmony_ci  // Gets next match from text sequence.
951767c5feSopenharmony_ci  bool Next(PhoneNumberMatch* match);
961767c5feSopenharmony_ci
971767c5feSopenharmony_ci private:
981767c5feSopenharmony_ci  // The potential states of a PhoneNumberMatcher.
991767c5feSopenharmony_ci  enum State {
1001767c5feSopenharmony_ci    NOT_READY,
1011767c5feSopenharmony_ci    READY,
1021767c5feSopenharmony_ci    DONE,
1031767c5feSopenharmony_ci  };
1041767c5feSopenharmony_ci
1051767c5feSopenharmony_ci  // Checks if the to check if the provided text_ is in UTF-8 or not.
1061767c5feSopenharmony_ci  bool IsInputUtf8();
1071767c5feSopenharmony_ci
1081767c5feSopenharmony_ci  // Attempts to extract a match from a candidate string. Returns true if a
1091767c5feSopenharmony_ci  // match is found, otherwise returns false. The value "offset" refers to the
1101767c5feSopenharmony_ci  // start index of the candidate string within the overall text.
1111767c5feSopenharmony_ci  bool Find(int index, PhoneNumberMatch* match);
1121767c5feSopenharmony_ci
1131767c5feSopenharmony_ci  // Checks a number was formatted with a national prefix, if the number was
1141767c5feSopenharmony_ci  // found in national format, and a national prefix is required for that
1151767c5feSopenharmony_ci  // number. Returns false if the number needed to have a national prefix and
1161767c5feSopenharmony_ci  // none was found.
1171767c5feSopenharmony_ci  bool IsNationalPrefixPresentIfRequired(const PhoneNumber& number) const;
1181767c5feSopenharmony_ci
1191767c5feSopenharmony_ci  // Attempts to extract a match from candidate. Returns true if the match was
1201767c5feSopenharmony_ci  // found, otherwise returns false.
1211767c5feSopenharmony_ci  bool ExtractMatch(const string& candidate, int offset,
1221767c5feSopenharmony_ci                    PhoneNumberMatch* match);
1231767c5feSopenharmony_ci
1241767c5feSopenharmony_ci  // Attempts to extract a match from a candidate string if the whole candidate
1251767c5feSopenharmony_ci  // does not qualify as a match. Returns true if a match is found, otherwise
1261767c5feSopenharmony_ci  // returns false.
1271767c5feSopenharmony_ci  bool ExtractInnerMatch(const string& candidate, int offset,
1281767c5feSopenharmony_ci                         PhoneNumberMatch* match);
1291767c5feSopenharmony_ci
1301767c5feSopenharmony_ci  // Parses a phone number from the candidate using PhoneNumberUtil::Parse() and
1311767c5feSopenharmony_ci  // verifies it matches the requested leniency. If parsing and verification
1321767c5feSopenharmony_ci  // succeed, returns true, otherwise this method returns false;
1331767c5feSopenharmony_ci  bool ParseAndVerify(const string& candidate, int offset,
1341767c5feSopenharmony_ci                      PhoneNumberMatch* match);
1351767c5feSopenharmony_ci
1361767c5feSopenharmony_ci  bool CheckNumberGroupingIsValid(
1371767c5feSopenharmony_ci    const PhoneNumber& phone_number,
1381767c5feSopenharmony_ci    const string& candidate,
1391767c5feSopenharmony_ci    ResultCallback4<bool, const PhoneNumberUtil&, const PhoneNumber&,
1401767c5feSopenharmony_ci                    const string&, const vector<string>&>* checker) const;
1411767c5feSopenharmony_ci
1421767c5feSopenharmony_ci  // Helper method to get the national-number part of a number, formatted
1431767c5feSopenharmony_ci  // without any national prefix, and return it as a set of digit blocks that
1441767c5feSopenharmony_ci  // would be formatted together following standard formatting rules.
1451767c5feSopenharmony_ci  void GetNationalNumberGroups(
1461767c5feSopenharmony_ci      const PhoneNumber& number,
1471767c5feSopenharmony_ci      vector<string>* digit_blocks) const;
1481767c5feSopenharmony_ci
1491767c5feSopenharmony_ci  // Helper method to get the national-number part of a number, formatted
1501767c5feSopenharmony_ci  // without any national prefix, and return it as a set of digit blocks that
1511767c5feSopenharmony_ci  // should be formatted together according to the formatting pattern passed in.
1521767c5feSopenharmony_ci  void GetNationalNumberGroupsForPattern(
1531767c5feSopenharmony_ci      const PhoneNumber& number,
1541767c5feSopenharmony_ci      const NumberFormat* formatting_pattern,
1551767c5feSopenharmony_ci      vector<string>* digit_blocks) const;
1561767c5feSopenharmony_ci
1571767c5feSopenharmony_ci  bool AllNumberGroupsAreExactlyPresent(
1581767c5feSopenharmony_ci      const PhoneNumberUtil& util,
1591767c5feSopenharmony_ci      const PhoneNumber& phone_number,
1601767c5feSopenharmony_ci      const string& normalized_candidate,
1611767c5feSopenharmony_ci      const vector<string>& formatted_number_groups) const;
1621767c5feSopenharmony_ci
1631767c5feSopenharmony_ci  bool VerifyAccordingToLeniency(Leniency leniency, const PhoneNumber& number,
1641767c5feSopenharmony_ci                                 const string& candidate) const;
1651767c5feSopenharmony_ci
1661767c5feSopenharmony_ci  // In interface for testing purposes.
1671767c5feSopenharmony_ci  static bool ContainsMoreThanOneSlashInNationalNumber(
1681767c5feSopenharmony_ci      const PhoneNumber& number,
1691767c5feSopenharmony_ci      const string& candidate,
1701767c5feSopenharmony_ci      const PhoneNumberUtil& util);
1711767c5feSopenharmony_ci
1721767c5feSopenharmony_ci  // Helper method to determine if a character is a Latin-script letter or not.
1731767c5feSopenharmony_ci  // For our purposes, combining marks should also return true since we assume
1741767c5feSopenharmony_ci  // they have been added to a preceding Latin character.
1751767c5feSopenharmony_ci  static bool IsLatinLetter(char32 letter);
1761767c5feSopenharmony_ci
1771767c5feSopenharmony_ci  // Helper class holding useful regular expressions.
1781767c5feSopenharmony_ci  const PhoneNumberMatcherRegExps* reg_exps_;
1791767c5feSopenharmony_ci
1801767c5feSopenharmony_ci  // Helper class holding loaded data containing alternate ways phone numbers
1811767c5feSopenharmony_ci  // might be formatted for certain regions.
1821767c5feSopenharmony_ci  const AlternateFormats* alternate_formats_;
1831767c5feSopenharmony_ci
1841767c5feSopenharmony_ci  // The phone number utility;
1851767c5feSopenharmony_ci  const PhoneNumberUtil& phone_util_;
1861767c5feSopenharmony_ci
1871767c5feSopenharmony_ci  // The text searched for phone numbers;
1881767c5feSopenharmony_ci  const string text_;
1891767c5feSopenharmony_ci
1901767c5feSopenharmony_ci  // The region(country) to assume for phone numbers without an international
1911767c5feSopenharmony_ci  // prefix.
1921767c5feSopenharmony_ci  const string preferred_region_;
1931767c5feSopenharmony_ci
1941767c5feSopenharmony_ci  // The degree of validation requested.
1951767c5feSopenharmony_ci  Leniency leniency_;
1961767c5feSopenharmony_ci
1971767c5feSopenharmony_ci  // The maximum number of retries after matching an invalid number.
1981767c5feSopenharmony_ci  int max_tries_;
1991767c5feSopenharmony_ci
2001767c5feSopenharmony_ci  // The iteration tristate.
2011767c5feSopenharmony_ci  State state_;
2021767c5feSopenharmony_ci
2031767c5feSopenharmony_ci  // The last successful match, NULL unless in State.READY.
2041767c5feSopenharmony_ci  scoped_ptr<PhoneNumberMatch> last_match_;
2051767c5feSopenharmony_ci
2061767c5feSopenharmony_ci  // The next index to start searching at. Undefined in State.DONE.
2071767c5feSopenharmony_ci  int search_index_;
2081767c5feSopenharmony_ci
2091767c5feSopenharmony_ci  // Flag to set or check if input text is in UTF-8 or not.
2101767c5feSopenharmony_ci  bool is_input_valid_utf8_;
2111767c5feSopenharmony_ci
2121767c5feSopenharmony_ci  DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcher);
2131767c5feSopenharmony_ci};
2141767c5feSopenharmony_ci
2151767c5feSopenharmony_ci}  // namespace phonenumbers
2161767c5feSopenharmony_ci}  // namespace i18n
2171767c5feSopenharmony_ci
2181767c5feSopenharmony_ci#endif  // I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
219