11767c5feSopenharmony_ci// Copyright (C) 2011 The Libphonenumber Authors 21767c5feSopenharmony_ci// 31767c5feSopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); 41767c5feSopenharmony_ci// you may not use this file except in compliance with the License. 51767c5feSopenharmony_ci// You may obtain a copy of the License at 61767c5feSopenharmony_ci// 71767c5feSopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 81767c5feSopenharmony_ci// 91767c5feSopenharmony_ci// Unless required by applicable law or agreed to in writing, software 101767c5feSopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, 111767c5feSopenharmony_ci// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 121767c5feSopenharmony_ci// See the License for the specific language governing permissions and 131767c5feSopenharmony_ci// limitations under the License. 141767c5feSopenharmony_ci// 151767c5feSopenharmony_ci// Author: Lara Rennie 161767c5feSopenharmony_ci// Author: Tao Huang 171767c5feSopenharmony_ci// 181767c5feSopenharmony_ci// This is a direct port from PhoneNumberMatcher.java. 191767c5feSopenharmony_ci// Changes to this class should also happen to the Java version, whenever it 201767c5feSopenharmony_ci// makes sense. 211767c5feSopenharmony_ci 221767c5feSopenharmony_ci#ifndef I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_ 231767c5feSopenharmony_ci#define I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_ 241767c5feSopenharmony_ci 251767c5feSopenharmony_ci#include <string> 261767c5feSopenharmony_ci#include <vector> 271767c5feSopenharmony_ci 281767c5feSopenharmony_ci#include "phonenumbers/base/basictypes.h" 291767c5feSopenharmony_ci#include "phonenumbers/base/memory/scoped_ptr.h" 301767c5feSopenharmony_ci#include "phonenumbers/callback.h" 311767c5feSopenharmony_ci#include "phonenumbers/regexp_adapter.h" 321767c5feSopenharmony_ci 331767c5feSopenharmony_cinamespace i18n { 341767c5feSopenharmony_cinamespace phonenumbers { 351767c5feSopenharmony_ci 361767c5feSopenharmony_citemplate <class R, class A1, class A2, class A3, class A4> 371767c5feSopenharmony_ci class ResultCallback4; 381767c5feSopenharmony_ci 391767c5feSopenharmony_ciusing std::string; 401767c5feSopenharmony_ciusing std::vector; 411767c5feSopenharmony_ci 421767c5feSopenharmony_ciclass AlternateFormats; 431767c5feSopenharmony_ciclass NumberFormat; 441767c5feSopenharmony_ciclass PhoneNumber; 451767c5feSopenharmony_ciclass PhoneNumberMatch; 461767c5feSopenharmony_ciclass PhoneNumberMatcherRegExps; 471767c5feSopenharmony_ciclass PhoneNumberUtil; 481767c5feSopenharmony_ci 491767c5feSopenharmony_ciclass PhoneNumberMatcher { 501767c5feSopenharmony_ci friend class PhoneNumberMatcherTest; 511767c5feSopenharmony_ci public: 521767c5feSopenharmony_ci // Leniency when finding potential phone numbers in text segments. The levels 531767c5feSopenharmony_ci // here are ordered in increasing strictness. 541767c5feSopenharmony_ci enum Leniency { 551767c5feSopenharmony_ci // Phone numbers accepted are possible, but not necessarily valid. 561767c5feSopenharmony_ci POSSIBLE, 571767c5feSopenharmony_ci // Phone numbers accepted are possible and valid. 581767c5feSopenharmony_ci VALID, 591767c5feSopenharmony_ci // Phone numbers accepted are valid and are grouped in a possible way for 601767c5feSopenharmony_ci // this locale. For example, a US number written as "65 02 53 00 00" is not 611767c5feSopenharmony_ci // accepted at this leniency level, whereas "650 253 0000" or "6502530000" 621767c5feSopenharmony_ci // are. Numbers with more than one '/' symbol are also dropped at this 631767c5feSopenharmony_ci // level. 641767c5feSopenharmony_ci // Warning: The next two levels might result in lower coverage especially 651767c5feSopenharmony_ci // for regions outside of country code "+1". If you are not sure about which 661767c5feSopenharmony_ci // level to use, you can send an e-mail to the discussion group 671767c5feSopenharmony_ci // http://groups.google.com/group/libphonenumber-discuss/ 681767c5feSopenharmony_ci STRICT_GROUPING, 691767c5feSopenharmony_ci // Phone numbers accepted are valid and are grouped in the same way that we 701767c5feSopenharmony_ci // would have formatted it, or as a single block. For example, a US number 711767c5feSopenharmony_ci // written as "650 2530000" is not accepted at this leniency level, whereas 721767c5feSopenharmony_ci // "650 253 0000" or "6502530000" are. 731767c5feSopenharmony_ci EXACT_GROUPING, 741767c5feSopenharmony_ci }; 751767c5feSopenharmony_ci 761767c5feSopenharmony_ci // Constructs a phone number matcher. 771767c5feSopenharmony_ci PhoneNumberMatcher(const PhoneNumberUtil& util, 781767c5feSopenharmony_ci const string& text, 791767c5feSopenharmony_ci const string& region_code, 801767c5feSopenharmony_ci Leniency leniency, 811767c5feSopenharmony_ci int max_tries); 821767c5feSopenharmony_ci 831767c5feSopenharmony_ci // Wrapper to construct a phone number matcher, with no limitation on the 841767c5feSopenharmony_ci // number of retries and VALID Leniency. 851767c5feSopenharmony_ci PhoneNumberMatcher(const string& text, 861767c5feSopenharmony_ci const string& region_code); 871767c5feSopenharmony_ci 881767c5feSopenharmony_ci ~PhoneNumberMatcher(); 891767c5feSopenharmony_ci 901767c5feSopenharmony_ci // Returns true if the text sequence has another match. Return false if not. 911767c5feSopenharmony_ci // Always returns false when input contains non UTF-8 characters. 921767c5feSopenharmony_ci bool HasNext(); 931767c5feSopenharmony_ci 941767c5feSopenharmony_ci // Gets next match from text sequence. 951767c5feSopenharmony_ci bool Next(PhoneNumberMatch* match); 961767c5feSopenharmony_ci 971767c5feSopenharmony_ci private: 981767c5feSopenharmony_ci // The potential states of a PhoneNumberMatcher. 991767c5feSopenharmony_ci enum State { 1001767c5feSopenharmony_ci NOT_READY, 1011767c5feSopenharmony_ci READY, 1021767c5feSopenharmony_ci DONE, 1031767c5feSopenharmony_ci }; 1041767c5feSopenharmony_ci 1051767c5feSopenharmony_ci // Checks if the to check if the provided text_ is in UTF-8 or not. 1061767c5feSopenharmony_ci bool IsInputUtf8(); 1071767c5feSopenharmony_ci 1081767c5feSopenharmony_ci // Attempts to extract a match from a candidate string. Returns true if a 1091767c5feSopenharmony_ci // match is found, otherwise returns false. The value "offset" refers to the 1101767c5feSopenharmony_ci // start index of the candidate string within the overall text. 1111767c5feSopenharmony_ci bool Find(int index, PhoneNumberMatch* match); 1121767c5feSopenharmony_ci 1131767c5feSopenharmony_ci // Checks a number was formatted with a national prefix, if the number was 1141767c5feSopenharmony_ci // found in national format, and a national prefix is required for that 1151767c5feSopenharmony_ci // number. Returns false if the number needed to have a national prefix and 1161767c5feSopenharmony_ci // none was found. 1171767c5feSopenharmony_ci bool IsNationalPrefixPresentIfRequired(const PhoneNumber& number) const; 1181767c5feSopenharmony_ci 1191767c5feSopenharmony_ci // Attempts to extract a match from candidate. Returns true if the match was 1201767c5feSopenharmony_ci // found, otherwise returns false. 1211767c5feSopenharmony_ci bool ExtractMatch(const string& candidate, int offset, 1221767c5feSopenharmony_ci PhoneNumberMatch* match); 1231767c5feSopenharmony_ci 1241767c5feSopenharmony_ci // Attempts to extract a match from a candidate string if the whole candidate 1251767c5feSopenharmony_ci // does not qualify as a match. Returns true if a match is found, otherwise 1261767c5feSopenharmony_ci // returns false. 1271767c5feSopenharmony_ci bool ExtractInnerMatch(const string& candidate, int offset, 1281767c5feSopenharmony_ci PhoneNumberMatch* match); 1291767c5feSopenharmony_ci 1301767c5feSopenharmony_ci // Parses a phone number from the candidate using PhoneNumberUtil::Parse() and 1311767c5feSopenharmony_ci // verifies it matches the requested leniency. If parsing and verification 1321767c5feSopenharmony_ci // succeed, returns true, otherwise this method returns false; 1331767c5feSopenharmony_ci bool ParseAndVerify(const string& candidate, int offset, 1341767c5feSopenharmony_ci PhoneNumberMatch* match); 1351767c5feSopenharmony_ci 1361767c5feSopenharmony_ci bool CheckNumberGroupingIsValid( 1371767c5feSopenharmony_ci const PhoneNumber& phone_number, 1381767c5feSopenharmony_ci const string& candidate, 1391767c5feSopenharmony_ci ResultCallback4<bool, const PhoneNumberUtil&, const PhoneNumber&, 1401767c5feSopenharmony_ci const string&, const vector<string>&>* checker) const; 1411767c5feSopenharmony_ci 1421767c5feSopenharmony_ci // Helper method to get the national-number part of a number, formatted 1431767c5feSopenharmony_ci // without any national prefix, and return it as a set of digit blocks that 1441767c5feSopenharmony_ci // would be formatted together following standard formatting rules. 1451767c5feSopenharmony_ci void GetNationalNumberGroups( 1461767c5feSopenharmony_ci const PhoneNumber& number, 1471767c5feSopenharmony_ci vector<string>* digit_blocks) const; 1481767c5feSopenharmony_ci 1491767c5feSopenharmony_ci // Helper method to get the national-number part of a number, formatted 1501767c5feSopenharmony_ci // without any national prefix, and return it as a set of digit blocks that 1511767c5feSopenharmony_ci // should be formatted together according to the formatting pattern passed in. 1521767c5feSopenharmony_ci void GetNationalNumberGroupsForPattern( 1531767c5feSopenharmony_ci const PhoneNumber& number, 1541767c5feSopenharmony_ci const NumberFormat* formatting_pattern, 1551767c5feSopenharmony_ci vector<string>* digit_blocks) const; 1561767c5feSopenharmony_ci 1571767c5feSopenharmony_ci bool AllNumberGroupsAreExactlyPresent( 1581767c5feSopenharmony_ci const PhoneNumberUtil& util, 1591767c5feSopenharmony_ci const PhoneNumber& phone_number, 1601767c5feSopenharmony_ci const string& normalized_candidate, 1611767c5feSopenharmony_ci const vector<string>& formatted_number_groups) const; 1621767c5feSopenharmony_ci 1631767c5feSopenharmony_ci bool VerifyAccordingToLeniency(Leniency leniency, const PhoneNumber& number, 1641767c5feSopenharmony_ci const string& candidate) const; 1651767c5feSopenharmony_ci 1661767c5feSopenharmony_ci // In interface for testing purposes. 1671767c5feSopenharmony_ci static bool ContainsMoreThanOneSlashInNationalNumber( 1681767c5feSopenharmony_ci const PhoneNumber& number, 1691767c5feSopenharmony_ci const string& candidate, 1701767c5feSopenharmony_ci const PhoneNumberUtil& util); 1711767c5feSopenharmony_ci 1721767c5feSopenharmony_ci // Helper method to determine if a character is a Latin-script letter or not. 1731767c5feSopenharmony_ci // For our purposes, combining marks should also return true since we assume 1741767c5feSopenharmony_ci // they have been added to a preceding Latin character. 1751767c5feSopenharmony_ci static bool IsLatinLetter(char32 letter); 1761767c5feSopenharmony_ci 1771767c5feSopenharmony_ci // Helper class holding useful regular expressions. 1781767c5feSopenharmony_ci const PhoneNumberMatcherRegExps* reg_exps_; 1791767c5feSopenharmony_ci 1801767c5feSopenharmony_ci // Helper class holding loaded data containing alternate ways phone numbers 1811767c5feSopenharmony_ci // might be formatted for certain regions. 1821767c5feSopenharmony_ci const AlternateFormats* alternate_formats_; 1831767c5feSopenharmony_ci 1841767c5feSopenharmony_ci // The phone number utility; 1851767c5feSopenharmony_ci const PhoneNumberUtil& phone_util_; 1861767c5feSopenharmony_ci 1871767c5feSopenharmony_ci // The text searched for phone numbers; 1881767c5feSopenharmony_ci const string text_; 1891767c5feSopenharmony_ci 1901767c5feSopenharmony_ci // The region(country) to assume for phone numbers without an international 1911767c5feSopenharmony_ci // prefix. 1921767c5feSopenharmony_ci const string preferred_region_; 1931767c5feSopenharmony_ci 1941767c5feSopenharmony_ci // The degree of validation requested. 1951767c5feSopenharmony_ci Leniency leniency_; 1961767c5feSopenharmony_ci 1971767c5feSopenharmony_ci // The maximum number of retries after matching an invalid number. 1981767c5feSopenharmony_ci int max_tries_; 1991767c5feSopenharmony_ci 2001767c5feSopenharmony_ci // The iteration tristate. 2011767c5feSopenharmony_ci State state_; 2021767c5feSopenharmony_ci 2031767c5feSopenharmony_ci // The last successful match, NULL unless in State.READY. 2041767c5feSopenharmony_ci scoped_ptr<PhoneNumberMatch> last_match_; 2051767c5feSopenharmony_ci 2061767c5feSopenharmony_ci // The next index to start searching at. Undefined in State.DONE. 2071767c5feSopenharmony_ci int search_index_; 2081767c5feSopenharmony_ci 2091767c5feSopenharmony_ci // Flag to set or check if input text is in UTF-8 or not. 2101767c5feSopenharmony_ci bool is_input_valid_utf8_; 2111767c5feSopenharmony_ci 2121767c5feSopenharmony_ci DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcher); 2131767c5feSopenharmony_ci}; 2141767c5feSopenharmony_ci 2151767c5feSopenharmony_ci} // namespace phonenumbers 2161767c5feSopenharmony_ci} // namespace i18n 2171767c5feSopenharmony_ci 2181767c5feSopenharmony_ci#endif // I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_ 219