1 // Copyright (C) 2009 The Libphonenumber Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Utility for international phone numbers.
16 
17 #ifndef I18N_PHONENUMBERS_PHONENUMBERUTIL_H_
18 #define I18N_PHONENUMBERS_PHONENUMBERUTIL_H_
19 
20 #include <stddef.h>
21 #include <list>
22 #include <map>
23 #include <set>
24 #include <string>
25 #include <utility>
26 #include <vector>
27 
28 #include "phonenumbers/base/basictypes.h"
29 #include "phonenumbers/base/memory/scoped_ptr.h"
30 #include "phonenumbers/base/memory/singleton.h"
31 #include "phonenumbers/phonenumber.pb.h"
32 
33 class TelephoneNumber;
34 
35 namespace i18n {
36 namespace phonenumbers {
37 
38 using google::protobuf::RepeatedPtrField;
39 using std::string;
40 
41 class AsYouTypeFormatter;
42 class Logger;
43 class MatcherApi;
44 class NumberFormat;
45 class PhoneMetadata;
46 class PhoneNumberDesc;
47 class PhoneNumberRegExpsAndMappings;
48 class RegExp;
49 
50 // NOTE: A lot of methods in this class require Region Code strings. These must
51 // be provided using CLDR two-letter region-code format. These should be in
52 // upper-case. The list of the codes can be found here:
53 // http://www.unicode.org/cldr/charts/30/supplemental/territory_information.html
54 
55 class PhoneNumberUtil : public Singleton<PhoneNumberUtil> {
56  private:
57   friend class AsYouTypeFormatter;
58   friend class PhoneNumberMatcher;
59   friend class PhoneNumberMatcherRegExps;
60   friend class PhoneNumberMatcherTest;
61   friend class PhoneNumberRegExpsAndMappings;
62   friend class PhoneNumberUtilTest;
63   friend class ShortNumberInfo;
64   friend class ShortNumberInfoTest;
65   friend class Singleton<PhoneNumberUtil>;
66 
67  public:
68   // This type is neither copyable nor movable.
69   PhoneNumberUtil(const PhoneNumberUtil&) = delete;
70   PhoneNumberUtil& operator=(const PhoneNumberUtil&) = delete;
71 
72   ~PhoneNumberUtil();
73   static const char kRegionCodeForNonGeoEntity[];
74 
75   // INTERNATIONAL and NATIONAL formats are consistent with the definition
76   // in ITU-T Recommendation E.123. However we follow local conventions such as
77   // using '-' instead of whitespace as separators. For example, the number of
78   // the Google Switzerland office will be written as "+41 44 668 1800" in
79   // INTERNATIONAL format, and as "044 668 1800" in NATIONAL format. E164
80   // format is as per INTERNATIONAL format but with no formatting applied e.g.
81   // "+41446681800". RFC3966 is as per INTERNATIONAL format, but with all spaces
82   // and other separating symbols replaced with a hyphen, and with any phone
83   // number extension appended with ";ext=". It also will have a prefix of
84   // "tel:" added, e.g. "tel:+41-44-668-1800".
85   enum PhoneNumberFormat {
86     E164,
87     INTERNATIONAL,
88     NATIONAL,
89     RFC3966
90   };
91 
92   static const PhoneNumberFormat kMaxNumberFormat = RFC3966;
93 
94   // Type of phone numbers.
95   enum PhoneNumberType {
96     FIXED_LINE,
97     MOBILE,
98     // In some regions (e.g. the USA), it is impossible to distinguish between
99     // fixed-line and mobile numbers by looking at the phone number itself.
100     FIXED_LINE_OR_MOBILE,
101     // Freephone lines
102     TOLL_FREE,
103     PREMIUM_RATE,
104     // The cost of this call is shared between the caller and the recipient, and
105     // is hence typically less than PREMIUM_RATE calls. See
106     // http://en.wikipedia.org/wiki/Shared_Cost_Service for more information.
107     SHARED_COST,
108     // Voice over IP numbers. This includes TSoIP (Telephony Service over IP).
109     VOIP,
110     // A personal number is associated with a particular person, and may be
111     // routed to either a MOBILE or FIXED_LINE number. Some more information can
112     // be found here: http://en.wikipedia.org/wiki/Personal_Numbers
113     PERSONAL_NUMBER,
114     PAGER,
115     // Used for "Universal Access Numbers" or "Company Numbers". They may be
116     // further routed to specific offices, but allow one number to be used for a
117     // company.
118     UAN,
119     // Used for "Voice Mail Access Numbers".
120     VOICEMAIL,
121     // A phone number is of type UNKNOWN when it does not fit any of the known
122     // patterns for a specific region.
123     UNKNOWN
124   };
125 
126   static const PhoneNumberType kMaxNumberType = UNKNOWN;
127 
128   // Types of phone number matches. See detailed description beside the
129   // IsNumberMatch() method.
130   enum MatchType {
131     INVALID_NUMBER,  // NOT_A_NUMBER in the java version.
132     NO_MATCH,
133     SHORT_NSN_MATCH,
134     NSN_MATCH,
135     EXACT_MATCH,
136   };
137 
138   static const MatchType kMaxMatchType = EXACT_MATCH;
139 
140   enum ErrorType {
141     NO_PARSING_ERROR,
142     INVALID_COUNTRY_CODE_ERROR,  // INVALID_COUNTRY_CODE in the java version.
143     NOT_A_NUMBER,
144     TOO_SHORT_AFTER_IDD,
145     TOO_SHORT_NSN,
146     TOO_LONG_NSN,  // TOO_LONG in the java version.
147   };
148 
149   static const ErrorType kMaxErrorType = TOO_LONG_NSN;
150 
151   // Possible outcomes when testing if a PhoneNumber is possible.
152   enum ValidationResult {
153     // The number length matches that of valid numbers for this region.
154     IS_POSSIBLE,
155     // The number length matches that of local numbers for this region only
156     // (i.e. numbers that may be able to be dialled within an area, but do not
157     // have all the information to be dialled from anywhere inside or outside
158     // the country).
159     IS_POSSIBLE_LOCAL_ONLY,
160     // The number has an invalid country calling code.
161     INVALID_COUNTRY_CODE,
162     // The number is shorter than all valid numbers for this region.
163     TOO_SHORT,
164     // The number is longer than the shortest valid numbers for this region,
165     // shorter than the longest valid numbers for this region, and does not
166     // itself have a number length that matches valid numbers for this region.
167     // This can also be returned in the case where
168     // IsPossibleNumberForTypeWithReason was called, and there are no numbers of
169     // this type at all for this region.
170     INVALID_LENGTH,
171     // The number is longer than all valid numbers for this region.
172     TOO_LONG,
173   };
174 
175   static const ValidationResult kMaxValidationResult = TOO_LONG;
176 
177   // Returns all regions the library has metadata for.
178   // @returns an unordered set of the two-letter region codes for every
179   // geographical region the library supports
180   void GetSupportedRegions(
181       std::set<string>* regions) const;
182 
183   // Returns all global network calling codes the library has metadata for.
184   // @returns an unordered set of the country calling codes for every
185   // non-geographical entity the library supports
186   void GetSupportedGlobalNetworkCallingCodes(
187       std::set<int>* calling_codes) const;
188 
189   // Returns all country calling codes the library has metadata for, covering
190   // both non-geographical entities (global network calling codes) and those
191   // used for geographical entities. This could be used to populate a drop-down
192   // box of country calling codes for a phone-number widget, for instance.
193   void GetSupportedCallingCodes(std::set<int>* calling_codes) const;
194 
195   // Returns the types for a given region which the library has metadata for.
196   // Will not include FIXED_LINE_OR_MOBILE (if numbers for this non-geographical
197   // entity could be classified as FIXED_LINE_OR_MOBILE, both FIXED_LINE and
198   // MOBILE would be present) and UNKNOWN.
199   //
200   // No types will be returned for invalid or unknown region codes.
201   void GetSupportedTypesForRegion(
202       const string& region_code,
203       std::set<PhoneNumberType>* types) const;
204 
205   // Returns the types for a country-code belonging to a non-geographical entity
206   // which the library has metadata for. Will not include FIXED_LINE_OR_MOBILE
207   // (instead both FIXED_LINE and FIXED_LINE_OR_MOBILE (if numbers for this
208   // non-geographical entity could be classified as FIXED_LINE_OR_MOBILE, both
209   // FIXED_LINE and MOBILE would be present) and UNKNOWN.
210   //
211   // No types will be returned for country calling codes that do not map to a
212   // known non-geographical entity.
213   void GetSupportedTypesForNonGeoEntity(
214       int country_calling_code,
215       std::set<PhoneNumberType>* types) const;
216 
217   // Gets a PhoneNumberUtil instance to carry out international phone number
218   // formatting, parsing, or validation. The instance is loaded with phone
219   // number metadata for a number of most commonly used regions, as specified by
220   // DEFAULT_REGIONS_.
221   //
222   // The PhoneNumberUtil is implemented as a singleton. Therefore, calling
223   // GetInstance multiple times will only result in one instance being created.
224   static PhoneNumberUtil* GetInstance();
225 
226   // Returns true if the number is a valid vanity (alpha) number such as 800
227   // MICROSOFT. A valid vanity number will start with at least 3 digits and will
228   // have three or more alpha characters. This does not do region-specific
229   // checks - to work out if this number is actually valid for a region, it
230   // should be parsed and methods such as IsPossibleNumberWithReason or
231   // IsValidNumber should be used.
232   bool IsAlphaNumber(const string& number) const;
233 
234   // Converts all alpha characters in a number to their respective digits on
235   // a keypad, but retains existing formatting.
236   void ConvertAlphaCharactersInNumber(string* number) const;
237 
238   // Normalizes a string of characters representing a phone number. This
239   // converts wide-ascii and arabic-indic numerals to European numerals, and
240   // strips punctuation and alpha characters.
241   void NormalizeDigitsOnly(string* number) const;
242 
243   // Normalizes a string of characters representing a phone number. This strips
244   // all characters which are not diallable on a mobile phone keypad (including
245   // all non-ASCII digits).
246   void NormalizeDiallableCharsOnly(string* number) const;
247 
248   // Gets the national significant number of a phone number. Note a national
249   // significant number doesn't contain a national prefix or any formatting.
250   void GetNationalSignificantNumber(const PhoneNumber& number,
251                                     string* national_significant_num) const;
252 
253   // Gets the length of the geographical area code from the PhoneNumber object
254   // passed in, so that clients could use it to split a national significant
255   // number into geographical area code and subscriber number. It works in such
256   // a way that the resultant subscriber number should be diallable, at least on
257   // some devices. An example of how this could be used:
258   //
259   // const PhoneNumberUtil& phone_util(*PhoneNumberUtil::GetInstance());
260   // PhoneNumber number;
261   // phone_util.Parse("16502530000", "US", &number);
262   // string national_significant_number;
263   // phone_util.GetNationalSignificantNumber(number,
264   //                                         &national_significant_number);
265   // string area_code;
266   // string subscriber_number;
267   //
268   // int area_code_length = phone_util.GetLengthOfGeographicalAreaCode(number);
269   // if (area_code_length > 0) {
270   //   area_code = national_significant_number.substr(0, area_code_length);
271   //   subscriber_number = national_significant_number.substr(
272   //       area_code_length, string::npos);
273   // } else {
274   //   area_code = "";
275   //   subscriber_number = national_significant_number;
276   // }
277   //
278   // N.B.: area code is a very ambiguous concept, so the authors generally
279   // recommend against using it for most purposes, but recommend using the
280   // more general national_number instead. Read the following carefully before
281   // deciding to use this method:
282   //
283   //  - geographical area codes change over time, and this method honors those
284   //    changes; therefore, it doesn't guarantee the stability of the result it
285   //    produces.
286   //  - subscriber numbers may not be diallable from all devices (notably mobile
287   //    devices, which typically requires the full national_number to be dialled
288   //    in most regions).
289   //  - most non-geographical numbers have no area codes, including numbers
290   //    from non-geographical entities.
291   //  - some geographical numbers have no area codes.
292   int GetLengthOfGeographicalAreaCode(const PhoneNumber& number) const;
293 
294   // Gets the length of the national destination code (NDC) from the PhoneNumber
295   // object passed in, so that clients could use it to split a national
296   // significant number into NDC and subscriber number. The NDC of a phone
297   // number is normally the first group of digit(s) right after the country
298   // calling code when the number is formatted in the international format, if
299   // there is a subscriber number part that follows.
300   //
301   // N.B.: similar to an area code, not all numbers have an NDC!
302   //
303   // An example of how this could be used:
304   //
305   // const PhoneNumberUtil& phone_util(*PhoneNumberUtil::GetInstance());
306   // PhoneNumber number;
307   // phone_util.Parse("16502530000", "US", &number);
308   // string national_significant_number;
309   // phone_util.GetNationalSignificantNumber(number,
310   //                                         &national_significant_number);
311   // string national_destination_code;
312   // string subscriber_number;
313   //
314   // int national_destination_code_length =
315   //     phone_util.GetLengthOfNationalDestinationCode(number);
316   // if (national_destination_code_length > 0) {
317   //   national_destination_code = national_significant_number.substr(
318   //       0, national_destination_code_length);
319   //   subscriber_number = national_significant_number.substr(
320   //       national_destination_code_length, string::npos);
321   // } else {
322   //   national_destination_code = "";
323   //   subscriber_number = national_significant_number;
324   // }
325   //
326   // Refer to the unittests to see the difference between this function and
327   // GetLengthOfGeographicalAreaCode().
328   int GetLengthOfNationalDestinationCode(const PhoneNumber& number) const;
329 
330   // Returns the mobile token for the provided country calling code if it has
331   // one, otherwise returns an empty string. A mobile token is a number inserted
332   // before the area code when dialing a mobile number from that country from
333   // abroad.
334   void GetCountryMobileToken(int country_calling_code,
335                              string* mobile_token) const;
336 
337   // Formats a phone number in the specified format using default rules. Note
338   // that this does not promise to produce a phone number that the user can
339   // dial from where they are - although we do format in either NATIONAL or
340   // INTERNATIONAL format depending on what the client asks for, we do not
341   // currently support a more abbreviated format, such as for users in the
342   // same area who could potentially dial the number without area code.
343   void Format(const PhoneNumber& number,
344               PhoneNumberFormat number_format,
345               string* formatted_number) const;
346 
347   // Formats a phone number in the specified format using client-defined
348   // formatting rules.
349   void FormatByPattern(
350       const PhoneNumber& number,
351       PhoneNumberFormat number_format,
352       const RepeatedPtrField<NumberFormat>& user_defined_formats,
353       string* formatted_number) const;
354 
355   // Formats a phone number in national format for dialing using the carrier as
356   // specified in the carrier_code. The carrier_code will always be used
357   // regardless of whether the phone number already has a preferred domestic
358   // carrier code stored. If carrier_code contains an empty string, return the
359   // number in national format without any carrier code.
360   void FormatNationalNumberWithCarrierCode(const PhoneNumber& number,
361                                            const string& carrier_code,
362                                            string* formatted_number) const;
363 
364   // Formats a phone number in national format for dialing using the carrier as
365   // specified in the preferred_domestic_carrier_code field of the PhoneNumber
366   // object passed in. If that is missing, use the fallback_carrier_code passed
367   // in instead. If there is no preferred_domestic_carrier_code, and the
368   // fallback_carrier_code contains an empty string, return the number in
369   // national format without any carrier code.
370   //
371   // Use FormatNationalNumberWithCarrierCode instead if the carrier code passed
372   // in should take precedence over the number's preferred_domestic_carrier_code
373   // when formatting.
374   void FormatNationalNumberWithPreferredCarrierCode(
375       const PhoneNumber& number,
376       const string& fallback_carrier_code,
377       string* formatted_number) const;
378 
379   // Returns a number formatted in such a way that it can be dialed from a
380   // mobile phone in a specific region. If the number cannot be reached from
381   // the region (e.g. some countries block toll-free numbers from being called
382   // outside of the country), the method returns an empty string.
383   void FormatNumberForMobileDialing(
384       const PhoneNumber& number,
385       const string& region_calling_from,
386       bool with_formatting,
387       string* formatted_number) const;
388 
389   // Formats a phone number for out-of-country dialing purposes.
390   //
391   // Note this function takes care of the case for calling inside of NANPA
392   // and between Russia and Kazakhstan (who share the same country calling
393   // code). In those cases, no international prefix is used. For regions which
394   // have multiple international prefixes, the number in its INTERNATIONAL
395   // format will be returned instead.
396   void FormatOutOfCountryCallingNumber(
397       const PhoneNumber& number,
398       const string& calling_from,
399       string* formatted_number) const;
400 
401   // Formats a phone number using the original phone number format (e.g.
402   // INTERNATIONAL or NATIONAL) that the number is parsed from, provided that
403   // the number has been parsed with ParseAndKeepRawInput. Otherwise the number
404   // will be formatted in NATIONAL format. The original format is embedded in
405   // the country_code_source field of the PhoneNumber object passed in, which is
406   // only set when parsing keeps the raw input. When we don't have a formatting
407   // pattern for the number, the method falls back to returning the raw input.
408   // When the number is an invalid number, the method returns the raw input when
409   // it is available.
410   void FormatInOriginalFormat(const PhoneNumber& number,
411                               const string& region_calling_from,
412                               string* formatted_number) const;
413 
414   // Formats a phone number for out-of-country dialing purposes.
415   //
416   // Note that in this version, if the number was entered originally using alpha
417   // characters and this version of the number is stored in raw_input, this
418   // representation of the number will be used rather than the digit
419   // representation. Grouping information, as specified by characters such as
420   // "-" and " ", will be retained.
421   //
422   // Caveats:
423   // 1) This will not produce good results if the country calling code is both
424   // present in the raw input _and_ is the start of the national number. This
425   // is not a problem in the regions which typically use alpha numbers.
426   // 2) This will also not produce good results if the raw input has any
427   // grouping information within the first three digits of the national number,
428   // and if the function needs to strip preceding digits/words in the raw input
429   // before these digits. Normally people group the first three digits together
430   // so this is not a huge problem - and will be fixed if it proves to be so.
431   void FormatOutOfCountryKeepingAlphaChars(
432       const PhoneNumber& number,
433       const string& calling_from,
434       string* formatted_number) const;
435 
436   // Attempts to extract a valid number from a phone number that is too long to
437   // be valid, and resets the PhoneNumber object passed in to that valid
438   // version. If no valid number could be extracted, the PhoneNumber object
439   // passed in will not be modified. It returns true if a valid phone number can
440   // be successfully extracted.
441   bool TruncateTooLongNumber(PhoneNumber* number) const;
442 
443   // Gets the type of a valid phone number, or UNKNOWN if it is invalid.
444   PhoneNumberType GetNumberType(const PhoneNumber& number) const;
445 
446   // Tests whether a phone number matches a valid pattern. Note this doesn't
447   // verify the number is actually in use, which is impossible to tell by just
448   // looking at a number itself.
449   // It only verifies whether the parsed, canonicalised number is valid: not
450   // whether a particular series of digits entered by the user is diallable from
451   // the region provided when parsing. For example, the number +41 (0) 78 927
452   // 2696 can be parsed into a number with country code "41" and national
453   // significant number "789272696". This is valid, while the original string
454   // is not diallable.
455   bool IsValidNumber(const PhoneNumber& number) const;
456 
457   // Tests whether a phone number is valid for a certain region. Note this
458   // doesn't verify the number is actually in use, which is impossible to tell
459   // by just looking at a number itself. If the country calling code is not the
460   // same as the country calling code for the region, this immediately exits
461   // with false. After this, the specific number pattern rules for the region
462   // are examined.
463   // This is useful for determining for example whether a particular number is
464   // valid for Canada, rather than just a valid NANPA number.
465   // Warning: In most cases, you want to use IsValidNumber instead. For
466   // example, this method will mark numbers from British Crown dependencies
467   // such as the Isle of Man as invalid for the region "GB" (United Kingdom),
468   // since it has its own region code, "IM", which may be undesirable.
469   bool IsValidNumberForRegion(
470       const PhoneNumber& number,
471       const string& region_code) const;
472 
473   // Returns the region where a phone number is from. This could be used for
474   // geocoding at the region level. Only guarantees correct results for valid,
475   // full numbers (not short-codes, or invalid numbers).
476   void GetRegionCodeForNumber(const PhoneNumber& number,
477                               string* region_code) const;
478 
479   // Returns the country calling code for a specific region. For example,
480   // this would be 1 for the United States, and 64 for New Zealand.
481   int GetCountryCodeForRegion(const string& region_code) const;
482 
483   // Returns the region code that matches the specific country code. Note that
484   // it is possible that several regions share the same country calling code
485   // (e.g. US and Canada), and in that case, only one of the regions (normally
486   // the one with the largest population) is returned. If the
487   // countryCallingCode entered is valid but doesn't match a specific region
488   // (such as in the case of non-geographical calling codes like 800) the
489   // RegionCode 001 will be returned (corresponding to the value for World in
490   // the UN M.49 schema).
491   void GetRegionCodeForCountryCode(int country_code, string* region_code) const;
492 
493   // Populates a list with the region codes that match the specific country
494   // calling code. For non-geographical country calling codes, the region code
495   // 001 is returned. Also, in the case of no region code being found, the list
496   // is left unchanged.
497   void GetRegionCodesForCountryCallingCode(
498       int country_calling_code,
499       std::list<string>* region_codes) const;
500 
501   // Checks if this is a region under the North American Numbering Plan
502   // Administration (NANPA).
503   bool IsNANPACountry(const string& region_code) const;
504 
505   // Returns the national dialling prefix for a specific region. For example,
506   // this would be 1 for the United States, and 0 for New Zealand. Set
507   // strip_non_digits to true to strip symbols like "~" (which indicates a wait
508   // for a dialling tone) from the prefix returned. If no national prefix is
509   // present, we return an empty string.
510   void GetNddPrefixForRegion(const string& region_code,
511                              bool strip_non_digits,
512                              string* national_prefix) const;
513 
514   // Checks whether a phone number is a possible number. It provides a more
515   // lenient check than IsValidNumber() in the following sense:
516   //   1. It only checks the length of phone numbers. In particular, it doesn't
517   //      check starting digits of the number.
518   //   2. It doesn't attempt to figure out the type of the number, but uses
519   //      general rules which applies to all types of phone numbers in a
520   //      region. Therefore, it is much faster than IsValidNumber().
521   //   3. For some numbers (particularly fixed-line), many regions have the
522   //      concept of area code, which together with subscriber number constitute
523   //      the national significant number. It is sometimes okay to dial only the
524   //      subscriber number when dialing in the same area. This function will
525   //      return IS_POSSIBLE_LOCAL_ONLY if the subscriber-number-only version is
526   //      passed in. On the other hand, because IsValidNumber() validates using
527   //      information on both starting digits (for fixed line numbers, that
528   //      would most likely be area codes) and length (obviously includes the
529   //      length of area codes for fixed line numbers), it will return false for
530   //      the subscriber-number-only version.
531   ValidationResult IsPossibleNumberWithReason(const PhoneNumber& number) const;
532 
533   // Convenience wrapper around IsPossibleNumberWithReason(). Instead of
534   // returning the reason for failure, this method returns true if the number is
535   // either a possible fully-qualified number (containing the area code and
536   // country code), or if the number could be a possible local number (with a
537   // country code, but missing an area code). Local numbers are considered
538   // possible if they could be possibly dialled in this format: if the area code
539   // is needed for a call to connect, the number is not considered possible
540   // without it.
541   bool IsPossibleNumber(const PhoneNumber& number) const;
542 
543   // Check whether a phone number is a possible number of a particular type. For
544   // types that don't exist in a particular region, this will return a result
545   // that isn't so useful; it is recommended that you use
546   // GetSupportedTypesForRegion() or GetSupportedTypesForNonGeoEntity()
547   // respectively before calling this method to determine whether you should
548   // call it for this number at all.
549   //
550   // This provides a more lenient check than IsValidNumber() in the following
551   // sense:
552   //
553   //   1. It only checks the length of phone numbers. In particular, it doesn't
554   //      check starting digits of the number.
555   //   2. For some numbers (particularly fixed-line), many regions have the
556   //      concept of area code, which together with subscriber number constitute
557   //      the national significant number. It is sometimes okay to dial only the
558   //      subscriber number when dialing in the same area. This function will
559   //      return IS_POSSIBLE_LOCAL_ONLY if the subscriber-number-only version is
560   //      passed in. On the other hand, because IsValidNumber() validates using
561   //      information on both starting digits (for fixed line numbers, that
562   //      would most likely be area codes) and length (obviously includes the
563   //      length of area codes for fixed line numbers), it will return false for
564   //      the subscriber-number-only version.
565   ValidationResult IsPossibleNumberForTypeWithReason(
566       const PhoneNumber& number, PhoneNumberType type) const;
567 
568   // Convenience wrapper around IsPossibleNumberForTypeWithReason(). Instead of
569   // returning the reason for failure, this method returns true if the number is
570   // either a possible fully-qualified number (containing the area code and
571   // country code), or if the number could be a possible local number (with a
572   // country code, but missing an area code). Local numbers are considered
573   // possible if they could be possibly dialled in this format: if the area code
574   // is needed for a call to connect, the number is not considered possible
575   // without it.
576   bool IsPossibleNumberForType(const PhoneNumber& number,
577                                PhoneNumberType type) const;
578 
579   // Checks whether a phone number is a possible number given a number in the
580   // form of a string, and the country where the number could be dialed from.
581   // It provides a more lenient check than IsValidNumber(). See
582   // IsPossibleNumber(const PhoneNumber& number) for details.
583   //
584   // This method first parses the number, then invokes
585   // IsPossibleNumber(const PhoneNumber& number) with the resultant PhoneNumber
586   // object.
587   //
588   // region_dialing_from represents the region that we are expecting the number
589   // to be dialed from. Note this is different from the region where the number
590   // belongs. For example, the number +1 650 253 0000 is a number that belongs
591   // to US. When written in this form, it could be dialed from any region. When
592   // it is written as 00 1 650 253 0000, it could be dialed from any region
593   // which uses an international dialling prefix of 00. When it is written as
594   // 650 253 0000, it could only be dialed from within the US, and when written
595   // as 253 0000, it could only be dialed from within a smaller area in the US
596   // (Mountain View, CA, to be more specific).
597   bool IsPossibleNumberForString(
598       const string& number,
599       const string& region_dialing_from) const;
600 
601   // Returns true if the number can be dialled from outside the region, or
602   // unknown. If the number can only be dialled from within the region, returns
603   // false. Does not check the number is a valid number. Note that, at the
604   // moment, this method does not handle short numbers (which are currently all
605   // presumed to not be diallable from outside their country).
606   bool CanBeInternationallyDialled(const PhoneNumber& number) const;
607 
608   // Tests whether a phone number has a geographical association. It checks if
609   // the number is associated with a certain region in the country to which it
610   // belongs. Note that this doesn't verify if the number is actually in use.
611   bool IsNumberGeographical(const PhoneNumber& phone_number) const;
612 
613   // Overload of IsNumberGeographical(PhoneNumber), since calculating the phone
614   // number type is expensive; if we have already done this, we don't want to do
615   // it again.
616   bool IsNumberGeographical(PhoneNumberType phone_number_type,
617                             int country_calling_code) const;
618 
619   // Gets a valid fixed-line number for the specified region. Returns false if
620   // the region was unknown, or the region 001 is passed in. For 001
621   // (representing non-geographical numbers), call
622   // GetExampleNumberForNonGeoEntity instead.
623   bool GetExampleNumber(const string& region_code,
624                         PhoneNumber* number) const;
625 
626   // Gets an invalid number for the specified region. This is useful for
627   // unit-testing purposes, where you want to test that will happen with an
628   // invalid number. Note that the number that is returned will always be able
629   // to be parsed and will have the correct country code. It may also be a valid
630   // *short* number/code for this region. Validity checking such
631   // numbers is handled with ShortNumberInfo.
632   //
633   // Returns false when an unsupported region or the region 001 (Earth) is
634   // passed in.
635   bool GetInvalidExampleNumber(const string& region_code,
636                                PhoneNumber* number) const;
637 
638   // Gets a valid number of the specified type for the specified region.
639   // Returns false if the region was unknown or 001, or if no example number of
640   // that type could be found. For 001 (representing non-geographical numbers),
641   // call GetExampleNumberForNonGeoEntity instead.
642   bool GetExampleNumberForType(const string& region_code,
643                                PhoneNumberType type,
644                                PhoneNumber* number) const;
645 
646   // Gets a valid number for the specified type (it may belong to any country).
647   // Returns false when the metadata does not contain such information.  This
648   // should only happen when no numbers of this type are allocated anywhere in
649   // the world anymore.
650   bool GetExampleNumberForType(PhoneNumberType type,
651                                PhoneNumber* number) const;
652 
653   // Gets a valid number for the specified country calling code for a
654   // non-geographical entity. Returns false if the metadata does not contain
655   // such information, or the country calling code passed in does not belong to
656   // a non-geographical entity.
657   bool GetExampleNumberForNonGeoEntity(
658       int country_calling_code, PhoneNumber* number) const;
659 
660   // Parses a string and returns it as a phone number in proto buffer format.
661   // The method is quite lenient and looks for a number in the input text
662   // (raw input) and does not check whether the string is definitely only a
663   // phone number. To do this, it ignores punctuation and white-space, as well
664   // as any text before the number (e.g. a leading “Tel: ”) and trims the
665   // non-number bits. It will accept a number in any format (E164, national,
666   // international etc), assuming it can be interpreted with the defaultRegion
667   // supplied. It also attempts to convert any alpha characters into digits
668   // if it thinks this is a vanity number of the type "1800 MICROSOFT".
669   //
670   // This method will return an error if the number is not considered to be a
671   // possible number, and NO_PARSING_ERROR if it is parsed correctly.
672   // Note that validation of whether the number is actually a valid number for
673   // a particular region is not performed. This can be done separately with
674   // IsValidNumber().
675   //
676   // Note this method canonicalizes the phone number such that different
677   // representations can be easily compared, no matter what form it was
678   // originally entered in (e.g. national, international). If you want to record
679   // context about the number being parsed, such as the raw input that was
680   // entered, how the country code was derived etc. then call
681   // ParseAndKeepRawInput() instead.
682   //
683   // number_to_parse can contain formatting such as +, ( and -, as well as a
684   // phone number extension. It can also be provided in RFC3966 format.
685   //
686   // default_region represents the country that we are expecting the number to
687   // be from. This is only used if the number being parsed is not written in
688   // international format. The country_code for the number in this case would be
689   // stored as that of the default country supplied. If the number is guaranteed
690   // to start with a '+' followed by the country calling code, then
691   // "ZZ" can be supplied.
692   //
693   // Returns an error if the string is not considered to be a viable phone
694   // number (e.g.too few or too many digits) or if no default region was
695   // supplied and the number is not in international format (does not start with
696   // +).
697   ErrorType Parse(const string& number_to_parse,
698                   const string& default_region,
699                   PhoneNumber* number) const;
700   // Parses a string and returns it in proto buffer format. This method differs
701   // from Parse() in that it always populates the raw_input field of the
702   // protocol buffer with number_to_parse as well as the country_code_source
703   // field.
704   ErrorType ParseAndKeepRawInput(const string& number_to_parse,
705                                  const string& default_region,
706                                  PhoneNumber* number) const;
707 
708   // Takes two phone numbers and compares them for equality.
709   //
710   // Returns EXACT_MATCH if the country calling code, NSN, presence of a leading
711   // zero for Italian numbers and any extension present are the same.
712   // Returns NSN_MATCH if either or both has no country calling code specified,
713   // and the NSNs and extensions are the same.
714   // Returns SHORT_NSN_MATCH if either or both has no country calling code
715   // specified, or the country calling code specified is the same, and one NSN
716   // could be a shorter version of the other number. This includes the case
717   // where one has an extension specified, and the other does not.
718   // Returns NO_MATCH otherwise.
719   // For example, the numbers +1 345 657 1234 and 657 1234 are a
720   // SHORT_NSN_MATCH. The numbers +1 345 657 1234 and 345 657 are a NO_MATCH.
721   MatchType IsNumberMatch(const PhoneNumber& first_number,
722                           const PhoneNumber& second_number) const;
723 
724   // Takes two phone numbers as strings and compares them for equality. This
725   // is a convenience wrapper for IsNumberMatch(PhoneNumber firstNumber,
726   // PhoneNumber secondNumber). No default region is known.
727   // Returns INVALID_NUMBER if either number cannot be parsed into a phone
728   // number.
729   MatchType IsNumberMatchWithTwoStrings(const string& first_number,
730                                         const string& second_number) const;
731 
732   // Takes two phone numbers and compares them for equality. This is a
733   // convenience wrapper for IsNumberMatch(PhoneNumber firstNumber,
734   // PhoneNumber secondNumber). No default region is known.
735   // Returns INVALID_NUMBER if second_number cannot be parsed into a phone
736   // number.
737   MatchType IsNumberMatchWithOneString(const PhoneNumber& first_number,
738                                        const string& second_number) const;
739 
740   // Overrides the default logging system. This takes ownership of the provided
741   // logger.
742   void SetLogger(Logger* logger);
743 
744   // Gets an AsYouTypeFormatter for the specific region.
745   // Returns an AsYouTypeFormatter object, which could be used to format phone
746   // numbers in the specific region "as you type".
747   // The deletion of the returned instance is under the responsibility of the
748   // caller.
749   AsYouTypeFormatter* GetAsYouTypeFormatter(const string& region_code) const;
750 
751   friend bool ConvertFromTelephoneNumberProto(
752       const TelephoneNumber& proto_to_convert,
753       PhoneNumber* new_proto);
754   friend bool ConvertToTelephoneNumberProto(const PhoneNumber& proto_to_convert,
755                                             TelephoneNumber* resulting_proto);
756 
757  protected:
758   bool IsNumberMatchingDesc(const string& national_number,
759                             const PhoneNumberDesc& number_desc) const;
760 
761   PhoneNumberUtil::PhoneNumberType GetNumberTypeHelper(
762       const string& national_number, const PhoneMetadata& metadata) const;
763 
764  private:
765   scoped_ptr<Logger> logger_;
766 
767   typedef std::pair<int, std::list<string>*> IntRegionsPair;
768 
769   // The minimum and maximum length of the national significant number.
770   static const size_t kMinLengthForNsn = 2;
771   // The ITU says the maximum length should be 15, but we have found longer
772   // numbers in Germany.
773   static const size_t kMaxLengthForNsn = 17;
774   // The maximum length of the country calling code.
775   static const size_t kMaxLengthCountryCode = 3;
776 
777   static const char kPlusChars[];
778   // Regular expression of acceptable punctuation found in phone numbers. This
779   // excludes punctuation found as a leading character only. This consists of
780   // dash characters, white space characters, full stops, slashes, square
781   // brackets, parentheses and tildes. It also includes the letter 'x' as that
782   // is found as a placeholder for carrier information in some phone numbers.
783   // Full-width variants are also present.
784   static const char kValidPunctuation[];
785 
786   // Regular expression of characters typically used to start a second phone
787   // number for the purposes of parsing. This allows us to strip off parts of
788   // the number that are actually the start of another number, such as for:
789   // (530) 583-6985 x302/x2303 -> the second extension here makes this actually
790   // two phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove
791   // the second extension so that the first number is parsed correctly. The
792   // string preceding this is captured.
793   // This corresponds to SECOND_NUMBER_START in the java version.
794   static const char kCaptureUpToSecondNumberStart[];
795 
796   // An API for validation checking.
797   scoped_ptr<MatcherApi> matcher_api_;
798 
799   // Helper class holding useful regular expressions and character mappings.
800   scoped_ptr<PhoneNumberRegExpsAndMappings> reg_exps_;
801 
802   // A mapping from a country calling code to a RegionCode object which denotes
803   // the region represented by that country calling code. Note regions under
804   // NANPA share the country calling code 1 and Russia and Kazakhstan share the
805   // country calling code 7. Under this map, 1 is mapped to region code "US" and
806   // 7 is mapped to region code "RU". This is implemented as a sorted vector to
807   // achieve better performance.
808   scoped_ptr<std::vector<IntRegionsPair> >
809       country_calling_code_to_region_code_map_;
810 
811   // The set of regions that share country calling code 1.
812   scoped_ptr<std::set<string> > nanpa_regions_;
813   static const int kNanpaCountryCode = 1;
814 
815   // A mapping from a region code to a PhoneMetadata for that region.
816   scoped_ptr<std::map<string, PhoneMetadata> > region_to_metadata_map_;
817 
818   // A mapping from a country calling code for a non-geographical entity to the
819   // PhoneMetadata for that country calling code. Examples of the country
820   // calling codes include 800 (International Toll Free Service) and 808
821   // (International Shared Cost Service).
822   scoped_ptr<std::map<int, PhoneMetadata> >
823       country_code_to_non_geographical_metadata_map_;
824 
825   PhoneNumberUtil();
826 
827   // Returns a regular expression for the possible extensions that may be found
828   // in a number, for use when matching.
829   const string& GetExtnPatternsForMatching() const;
830 
831   // Checks if a number matches the plus chars pattern.
832   bool StartsWithPlusCharsPattern(const string& number) const;
833 
834   void SetItalianLeadingZerosForPhoneNumber(
835       const string& national_number, PhoneNumber* phone_number) const;
836 
837   // Checks whether a string contains only valid digits.
838   bool ContainsOnlyValidDigits(const string& s) const;
839 
840   // Checks if a format is eligible to be used by the AsYouTypeFormatter. This
841   // method is here rather than in asyoutypeformatter.h since it depends on the
842   // valid punctuation declared by the phone number util.
843   bool IsFormatEligibleForAsYouTypeFormatter(const string& format) const;
844 
845   // Helper function to check if the national prefix formatting rule has the
846   // first group only, i.e., does not start with the national prefix.
847   bool FormattingRuleHasFirstGroupOnly(
848       const string& national_prefix_formatting_rule) const;
849 
850   // Trims unwanted end characters from a phone number string.
851   void TrimUnwantedEndChars(string* number) const;
852 
853   // Helper function to check region code is not unknown or null.
854   bool IsValidRegionCode(const string& region_code) const;
855 
856   // Helper function to check the country calling code is valid.
857   bool HasValidCountryCallingCode(int country_calling_code) const;
858 
859   const i18n::phonenumbers::PhoneMetadata* GetMetadataForRegion(
860       const string& region_code) const;
861 
862   const i18n::phonenumbers::PhoneMetadata* GetMetadataForNonGeographicalRegion(
863       int country_calling_code) const;
864 
865   const i18n::phonenumbers::PhoneMetadata* GetMetadataForRegionOrCallingCode(
866       int country_calling_code,
867       const string& region_code) const;
868 
869   // As per GetCountryCodeForRegion, but assumes the validity of the region_code
870   // has already been checked.
871   int GetCountryCodeForValidRegion(const string& region_code) const;
872 
873   const NumberFormat* ChooseFormattingPatternForNumber(
874       const RepeatedPtrField<NumberFormat>& available_formats,
875       const string& national_number) const;
876 
877   void FormatNsnUsingPatternWithCarrier(
878       const string& national_number,
879       const NumberFormat& formatting_pattern,
880       PhoneNumberUtil::PhoneNumberFormat number_format,
881       const string& carrier_code,
882       string* formatted_number) const;
883 
884   void FormatNsnUsingPattern(
885       const string& national_number,
886       const NumberFormat& formatting_pattern,
887       PhoneNumberUtil::PhoneNumberFormat number_format,
888       string* formatted_number) const;
889 
890   // Check if raw_input, which is assumed to be in the national format, has a
891   // national prefix. The national prefix is assumed to be in digits-only form.
892   bool RawInputContainsNationalPrefix(
893       const string& raw_input,
894       const string& national_prefix,
895       const string& region_code) const;
896 
897   bool HasFormattingPatternForNumber(const PhoneNumber& number) const;
898 
899   // Simple wrapper of FormatNsnWithCarrier for the common case of
900   // no carrier code.
901   void FormatNsn(const string& number,
902                  const PhoneMetadata& metadata,
903                  PhoneNumberFormat number_format,
904                  string* formatted_number) const;
905 
906   void FormatNsnWithCarrier(const string& number,
907                             const PhoneMetadata& metadata,
908                             PhoneNumberFormat number_format,
909                             const string& carrier_code,
910                             string* formatted_number) const;
911 
912   void MaybeAppendFormattedExtension(
913       const PhoneNumber& number,
914       const PhoneMetadata& metadata,
915       PhoneNumberFormat number_format,
916       string* extension) const;
917 
918   void GetRegionCodeForNumberFromRegionList(
919       const PhoneNumber& number,
920       const std::list<string>& region_codes,
921       string* region_code) const;
922 
923   // Strips the IDD from the start of the number if present. Helper function
924   // used by MaybeStripInternationalPrefixAndNormalize.
925   bool ParsePrefixAsIdd(const RegExp& idd_pattern, string* number) const;
926 
927   void Normalize(string* number) const;
928 
929   PhoneNumber::CountryCodeSource MaybeStripInternationalPrefixAndNormalize(
930       const string& possible_idd_prefix,
931       string* number) const;
932 
933   bool MaybeStripNationalPrefixAndCarrierCode(
934       const PhoneMetadata& metadata,
935       string* number,
936       string* carrier_code) const;
937 
938   void ExtractPossibleNumber(const string& number,
939                              string* extracted_number) const;
940 
941   bool IsViablePhoneNumber(const string& number) const;
942 
943   bool MaybeStripExtension(string* number, string* extension) const;
944 
945   int ExtractCountryCode(string* national_number) const;
946   ErrorType MaybeExtractCountryCode(
947       const PhoneMetadata* default_region_metadata,
948       bool keepRawInput,
949       string* national_number,
950       PhoneNumber* phone_number) const;
951 
952   bool CheckRegionForParsing(
953       const string& number_to_parse,
954       const string& default_region) const;
955 
956   ErrorType ParseHelper(const string& number_to_parse,
957                         const string& default_region,
958                         bool keep_raw_input,
959                         bool check_region,
960                         PhoneNumber* phone_number) const;
961 
962   void BuildNationalNumberForParsing(const string& number_to_parse,
963                                           string* national_number) const;
964 
965   bool IsShorterThanPossibleNormalNumber(const PhoneMetadata* country_metadata,
966                                          const string& number) const;
967 
968 };
969 
970 }  // namespace phonenumbers
971 }  // namespace i18n
972 
973 #endif  // I18N_PHONENUMBERS_PHONENUMBERUTIL_H_
974