1 // Copyright (C) 2009 The Libphonenumber Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Utility for international phone numbers. 16 17 #ifndef I18N_PHONENUMBERS_PHONENUMBERUTIL_H_ 18 #define I18N_PHONENUMBERS_PHONENUMBERUTIL_H_ 19 20 #include <stddef.h> 21 #include <list> 22 #include <map> 23 #include <set> 24 #include <string> 25 #include <utility> 26 #include <vector> 27 28 #include "phonenumbers/base/basictypes.h" 29 #include "phonenumbers/base/memory/scoped_ptr.h" 30 #include "phonenumbers/base/memory/singleton.h" 31 #include "phonenumbers/phonenumber.pb.h" 32 33 class TelephoneNumber; 34 35 namespace i18n { 36 namespace phonenumbers { 37 38 using google::protobuf::RepeatedPtrField; 39 using std::string; 40 41 class AsYouTypeFormatter; 42 class Logger; 43 class MatcherApi; 44 class NumberFormat; 45 class PhoneMetadata; 46 class PhoneNumberDesc; 47 class PhoneNumberRegExpsAndMappings; 48 class RegExp; 49 50 // NOTE: A lot of methods in this class require Region Code strings. These must 51 // be provided using CLDR two-letter region-code format. These should be in 52 // upper-case. The list of the codes can be found here: 53 // http://www.unicode.org/cldr/charts/30/supplemental/territory_information.html 54 55 class PhoneNumberUtil : public Singleton<PhoneNumberUtil> { 56 private: 57 friend class AsYouTypeFormatter; 58 friend class PhoneNumberMatcher; 59 friend class PhoneNumberMatcherRegExps; 60 friend class PhoneNumberMatcherTest; 61 friend class PhoneNumberRegExpsAndMappings; 62 friend class PhoneNumberUtilTest; 63 friend class ShortNumberInfo; 64 friend class ShortNumberInfoTest; 65 friend class Singleton<PhoneNumberUtil>; 66 67 public: 68 // This type is neither copyable nor movable. 69 PhoneNumberUtil(const PhoneNumberUtil&) = delete; 70 PhoneNumberUtil& operator=(const PhoneNumberUtil&) = delete; 71 72 ~PhoneNumberUtil(); 73 static const char kRegionCodeForNonGeoEntity[]; 74 75 // INTERNATIONAL and NATIONAL formats are consistent with the definition 76 // in ITU-T Recommendation E.123. However we follow local conventions such as 77 // using '-' instead of whitespace as separators. For example, the number of 78 // the Google Switzerland office will be written as "+41 44 668 1800" in 79 // INTERNATIONAL format, and as "044 668 1800" in NATIONAL format. E164 80 // format is as per INTERNATIONAL format but with no formatting applied e.g. 81 // "+41446681800". RFC3966 is as per INTERNATIONAL format, but with all spaces 82 // and other separating symbols replaced with a hyphen, and with any phone 83 // number extension appended with ";ext=". It also will have a prefix of 84 // "tel:" added, e.g. "tel:+41-44-668-1800". 85 enum PhoneNumberFormat { 86 E164, 87 INTERNATIONAL, 88 NATIONAL, 89 RFC3966 90 }; 91 92 static const PhoneNumberFormat kMaxNumberFormat = RFC3966; 93 94 // Type of phone numbers. 95 enum PhoneNumberType { 96 FIXED_LINE, 97 MOBILE, 98 // In some regions (e.g. the USA), it is impossible to distinguish between 99 // fixed-line and mobile numbers by looking at the phone number itself. 100 FIXED_LINE_OR_MOBILE, 101 // Freephone lines 102 TOLL_FREE, 103 PREMIUM_RATE, 104 // The cost of this call is shared between the caller and the recipient, and 105 // is hence typically less than PREMIUM_RATE calls. See 106 // http://en.wikipedia.org/wiki/Shared_Cost_Service for more information. 107 SHARED_COST, 108 // Voice over IP numbers. This includes TSoIP (Telephony Service over IP). 109 VOIP, 110 // A personal number is associated with a particular person, and may be 111 // routed to either a MOBILE or FIXED_LINE number. Some more information can 112 // be found here: http://en.wikipedia.org/wiki/Personal_Numbers 113 PERSONAL_NUMBER, 114 PAGER, 115 // Used for "Universal Access Numbers" or "Company Numbers". They may be 116 // further routed to specific offices, but allow one number to be used for a 117 // company. 118 UAN, 119 // Used for "Voice Mail Access Numbers". 120 VOICEMAIL, 121 // A phone number is of type UNKNOWN when it does not fit any of the known 122 // patterns for a specific region. 123 UNKNOWN 124 }; 125 126 static const PhoneNumberType kMaxNumberType = UNKNOWN; 127 128 // Types of phone number matches. See detailed description beside the 129 // IsNumberMatch() method. 130 enum MatchType { 131 INVALID_NUMBER, // NOT_A_NUMBER in the java version. 132 NO_MATCH, 133 SHORT_NSN_MATCH, 134 NSN_MATCH, 135 EXACT_MATCH, 136 }; 137 138 static const MatchType kMaxMatchType = EXACT_MATCH; 139 140 enum ErrorType { 141 NO_PARSING_ERROR, 142 INVALID_COUNTRY_CODE_ERROR, // INVALID_COUNTRY_CODE in the java version. 143 NOT_A_NUMBER, 144 TOO_SHORT_AFTER_IDD, 145 TOO_SHORT_NSN, 146 TOO_LONG_NSN, // TOO_LONG in the java version. 147 }; 148 149 static const ErrorType kMaxErrorType = TOO_LONG_NSN; 150 151 // Possible outcomes when testing if a PhoneNumber is possible. 152 enum ValidationResult { 153 // The number length matches that of valid numbers for this region. 154 IS_POSSIBLE, 155 // The number length matches that of local numbers for this region only 156 // (i.e. numbers that may be able to be dialled within an area, but do not 157 // have all the information to be dialled from anywhere inside or outside 158 // the country). 159 IS_POSSIBLE_LOCAL_ONLY, 160 // The number has an invalid country calling code. 161 INVALID_COUNTRY_CODE, 162 // The number is shorter than all valid numbers for this region. 163 TOO_SHORT, 164 // The number is longer than the shortest valid numbers for this region, 165 // shorter than the longest valid numbers for this region, and does not 166 // itself have a number length that matches valid numbers for this region. 167 // This can also be returned in the case where 168 // IsPossibleNumberForTypeWithReason was called, and there are no numbers of 169 // this type at all for this region. 170 INVALID_LENGTH, 171 // The number is longer than all valid numbers for this region. 172 TOO_LONG, 173 }; 174 175 static const ValidationResult kMaxValidationResult = TOO_LONG; 176 177 // Returns all regions the library has metadata for. 178 // @returns an unordered set of the two-letter region codes for every 179 // geographical region the library supports 180 void GetSupportedRegions( 181 std::set<string>* regions) const; 182 183 // Returns all global network calling codes the library has metadata for. 184 // @returns an unordered set of the country calling codes for every 185 // non-geographical entity the library supports 186 void GetSupportedGlobalNetworkCallingCodes( 187 std::set<int>* calling_codes) const; 188 189 // Returns all country calling codes the library has metadata for, covering 190 // both non-geographical entities (global network calling codes) and those 191 // used for geographical entities. This could be used to populate a drop-down 192 // box of country calling codes for a phone-number widget, for instance. 193 void GetSupportedCallingCodes(std::set<int>* calling_codes) const; 194 195 // Returns the types for a given region which the library has metadata for. 196 // Will not include FIXED_LINE_OR_MOBILE (if numbers for this non-geographical 197 // entity could be classified as FIXED_LINE_OR_MOBILE, both FIXED_LINE and 198 // MOBILE would be present) and UNKNOWN. 199 // 200 // No types will be returned for invalid or unknown region codes. 201 void GetSupportedTypesForRegion( 202 const string& region_code, 203 std::set<PhoneNumberType>* types) const; 204 205 // Returns the types for a country-code belonging to a non-geographical entity 206 // which the library has metadata for. Will not include FIXED_LINE_OR_MOBILE 207 // (instead both FIXED_LINE and FIXED_LINE_OR_MOBILE (if numbers for this 208 // non-geographical entity could be classified as FIXED_LINE_OR_MOBILE, both 209 // FIXED_LINE and MOBILE would be present) and UNKNOWN. 210 // 211 // No types will be returned for country calling codes that do not map to a 212 // known non-geographical entity. 213 void GetSupportedTypesForNonGeoEntity( 214 int country_calling_code, 215 std::set<PhoneNumberType>* types) const; 216 217 // Gets a PhoneNumberUtil instance to carry out international phone number 218 // formatting, parsing, or validation. The instance is loaded with phone 219 // number metadata for a number of most commonly used regions, as specified by 220 // DEFAULT_REGIONS_. 221 // 222 // The PhoneNumberUtil is implemented as a singleton. Therefore, calling 223 // GetInstance multiple times will only result in one instance being created. 224 static PhoneNumberUtil* GetInstance(); 225 226 // Returns true if the number is a valid vanity (alpha) number such as 800 227 // MICROSOFT. A valid vanity number will start with at least 3 digits and will 228 // have three or more alpha characters. This does not do region-specific 229 // checks - to work out if this number is actually valid for a region, it 230 // should be parsed and methods such as IsPossibleNumberWithReason or 231 // IsValidNumber should be used. 232 bool IsAlphaNumber(const string& number) const; 233 234 // Converts all alpha characters in a number to their respective digits on 235 // a keypad, but retains existing formatting. 236 void ConvertAlphaCharactersInNumber(string* number) const; 237 238 // Normalizes a string of characters representing a phone number. This 239 // converts wide-ascii and arabic-indic numerals to European numerals, and 240 // strips punctuation and alpha characters. 241 void NormalizeDigitsOnly(string* number) const; 242 243 // Normalizes a string of characters representing a phone number. This strips 244 // all characters which are not diallable on a mobile phone keypad (including 245 // all non-ASCII digits). 246 void NormalizeDiallableCharsOnly(string* number) const; 247 248 // Gets the national significant number of a phone number. Note a national 249 // significant number doesn't contain a national prefix or any formatting. 250 void GetNationalSignificantNumber(const PhoneNumber& number, 251 string* national_significant_num) const; 252 253 // Gets the length of the geographical area code from the PhoneNumber object 254 // passed in, so that clients could use it to split a national significant 255 // number into geographical area code and subscriber number. It works in such 256 // a way that the resultant subscriber number should be diallable, at least on 257 // some devices. An example of how this could be used: 258 // 259 // const PhoneNumberUtil& phone_util(*PhoneNumberUtil::GetInstance()); 260 // PhoneNumber number; 261 // phone_util.Parse("16502530000", "US", &number); 262 // string national_significant_number; 263 // phone_util.GetNationalSignificantNumber(number, 264 // &national_significant_number); 265 // string area_code; 266 // string subscriber_number; 267 // 268 // int area_code_length = phone_util.GetLengthOfGeographicalAreaCode(number); 269 // if (area_code_length > 0) { 270 // area_code = national_significant_number.substr(0, area_code_length); 271 // subscriber_number = national_significant_number.substr( 272 // area_code_length, string::npos); 273 // } else { 274 // area_code = ""; 275 // subscriber_number = national_significant_number; 276 // } 277 // 278 // N.B.: area code is a very ambiguous concept, so the authors generally 279 // recommend against using it for most purposes, but recommend using the 280 // more general national_number instead. Read the following carefully before 281 // deciding to use this method: 282 // 283 // - geographical area codes change over time, and this method honors those 284 // changes; therefore, it doesn't guarantee the stability of the result it 285 // produces. 286 // - subscriber numbers may not be diallable from all devices (notably mobile 287 // devices, which typically requires the full national_number to be dialled 288 // in most regions). 289 // - most non-geographical numbers have no area codes, including numbers 290 // from non-geographical entities. 291 // - some geographical numbers have no area codes. 292 int GetLengthOfGeographicalAreaCode(const PhoneNumber& number) const; 293 294 // Gets the length of the national destination code (NDC) from the PhoneNumber 295 // object passed in, so that clients could use it to split a national 296 // significant number into NDC and subscriber number. The NDC of a phone 297 // number is normally the first group of digit(s) right after the country 298 // calling code when the number is formatted in the international format, if 299 // there is a subscriber number part that follows. 300 // 301 // N.B.: similar to an area code, not all numbers have an NDC! 302 // 303 // An example of how this could be used: 304 // 305 // const PhoneNumberUtil& phone_util(*PhoneNumberUtil::GetInstance()); 306 // PhoneNumber number; 307 // phone_util.Parse("16502530000", "US", &number); 308 // string national_significant_number; 309 // phone_util.GetNationalSignificantNumber(number, 310 // &national_significant_number); 311 // string national_destination_code; 312 // string subscriber_number; 313 // 314 // int national_destination_code_length = 315 // phone_util.GetLengthOfNationalDestinationCode(number); 316 // if (national_destination_code_length > 0) { 317 // national_destination_code = national_significant_number.substr( 318 // 0, national_destination_code_length); 319 // subscriber_number = national_significant_number.substr( 320 // national_destination_code_length, string::npos); 321 // } else { 322 // national_destination_code = ""; 323 // subscriber_number = national_significant_number; 324 // } 325 // 326 // Refer to the unittests to see the difference between this function and 327 // GetLengthOfGeographicalAreaCode(). 328 int GetLengthOfNationalDestinationCode(const PhoneNumber& number) const; 329 330 // Returns the mobile token for the provided country calling code if it has 331 // one, otherwise returns an empty string. A mobile token is a number inserted 332 // before the area code when dialing a mobile number from that country from 333 // abroad. 334 void GetCountryMobileToken(int country_calling_code, 335 string* mobile_token) const; 336 337 // Formats a phone number in the specified format using default rules. Note 338 // that this does not promise to produce a phone number that the user can 339 // dial from where they are - although we do format in either NATIONAL or 340 // INTERNATIONAL format depending on what the client asks for, we do not 341 // currently support a more abbreviated format, such as for users in the 342 // same area who could potentially dial the number without area code. 343 void Format(const PhoneNumber& number, 344 PhoneNumberFormat number_format, 345 string* formatted_number) const; 346 347 // Formats a phone number in the specified format using client-defined 348 // formatting rules. 349 void FormatByPattern( 350 const PhoneNumber& number, 351 PhoneNumberFormat number_format, 352 const RepeatedPtrField<NumberFormat>& user_defined_formats, 353 string* formatted_number) const; 354 355 // Formats a phone number in national format for dialing using the carrier as 356 // specified in the carrier_code. The carrier_code will always be used 357 // regardless of whether the phone number already has a preferred domestic 358 // carrier code stored. If carrier_code contains an empty string, return the 359 // number in national format without any carrier code. 360 void FormatNationalNumberWithCarrierCode(const PhoneNumber& number, 361 const string& carrier_code, 362 string* formatted_number) const; 363 364 // Formats a phone number in national format for dialing using the carrier as 365 // specified in the preferred_domestic_carrier_code field of the PhoneNumber 366 // object passed in. If that is missing, use the fallback_carrier_code passed 367 // in instead. If there is no preferred_domestic_carrier_code, and the 368 // fallback_carrier_code contains an empty string, return the number in 369 // national format without any carrier code. 370 // 371 // Use FormatNationalNumberWithCarrierCode instead if the carrier code passed 372 // in should take precedence over the number's preferred_domestic_carrier_code 373 // when formatting. 374 void FormatNationalNumberWithPreferredCarrierCode( 375 const PhoneNumber& number, 376 const string& fallback_carrier_code, 377 string* formatted_number) const; 378 379 // Returns a number formatted in such a way that it can be dialed from a 380 // mobile phone in a specific region. If the number cannot be reached from 381 // the region (e.g. some countries block toll-free numbers from being called 382 // outside of the country), the method returns an empty string. 383 void FormatNumberForMobileDialing( 384 const PhoneNumber& number, 385 const string& region_calling_from, 386 bool with_formatting, 387 string* formatted_number) const; 388 389 // Formats a phone number for out-of-country dialing purposes. 390 // 391 // Note this function takes care of the case for calling inside of NANPA 392 // and between Russia and Kazakhstan (who share the same country calling 393 // code). In those cases, no international prefix is used. For regions which 394 // have multiple international prefixes, the number in its INTERNATIONAL 395 // format will be returned instead. 396 void FormatOutOfCountryCallingNumber( 397 const PhoneNumber& number, 398 const string& calling_from, 399 string* formatted_number) const; 400 401 // Formats a phone number using the original phone number format (e.g. 402 // INTERNATIONAL or NATIONAL) that the number is parsed from, provided that 403 // the number has been parsed with ParseAndKeepRawInput. Otherwise the number 404 // will be formatted in NATIONAL format. The original format is embedded in 405 // the country_code_source field of the PhoneNumber object passed in, which is 406 // only set when parsing keeps the raw input. When we don't have a formatting 407 // pattern for the number, the method falls back to returning the raw input. 408 // When the number is an invalid number, the method returns the raw input when 409 // it is available. 410 void FormatInOriginalFormat(const PhoneNumber& number, 411 const string& region_calling_from, 412 string* formatted_number) const; 413 414 // Formats a phone number for out-of-country dialing purposes. 415 // 416 // Note that in this version, if the number was entered originally using alpha 417 // characters and this version of the number is stored in raw_input, this 418 // representation of the number will be used rather than the digit 419 // representation. Grouping information, as specified by characters such as 420 // "-" and " ", will be retained. 421 // 422 // Caveats: 423 // 1) This will not produce good results if the country calling code is both 424 // present in the raw input _and_ is the start of the national number. This 425 // is not a problem in the regions which typically use alpha numbers. 426 // 2) This will also not produce good results if the raw input has any 427 // grouping information within the first three digits of the national number, 428 // and if the function needs to strip preceding digits/words in the raw input 429 // before these digits. Normally people group the first three digits together 430 // so this is not a huge problem - and will be fixed if it proves to be so. 431 void FormatOutOfCountryKeepingAlphaChars( 432 const PhoneNumber& number, 433 const string& calling_from, 434 string* formatted_number) const; 435 436 // Attempts to extract a valid number from a phone number that is too long to 437 // be valid, and resets the PhoneNumber object passed in to that valid 438 // version. If no valid number could be extracted, the PhoneNumber object 439 // passed in will not be modified. It returns true if a valid phone number can 440 // be successfully extracted. 441 bool TruncateTooLongNumber(PhoneNumber* number) const; 442 443 // Gets the type of a valid phone number, or UNKNOWN if it is invalid. 444 PhoneNumberType GetNumberType(const PhoneNumber& number) const; 445 446 // Tests whether a phone number matches a valid pattern. Note this doesn't 447 // verify the number is actually in use, which is impossible to tell by just 448 // looking at a number itself. 449 // It only verifies whether the parsed, canonicalised number is valid: not 450 // whether a particular series of digits entered by the user is diallable from 451 // the region provided when parsing. For example, the number +41 (0) 78 927 452 // 2696 can be parsed into a number with country code "41" and national 453 // significant number "789272696". This is valid, while the original string 454 // is not diallable. 455 bool IsValidNumber(const PhoneNumber& number) const; 456 457 // Tests whether a phone number is valid for a certain region. Note this 458 // doesn't verify the number is actually in use, which is impossible to tell 459 // by just looking at a number itself. If the country calling code is not the 460 // same as the country calling code for the region, this immediately exits 461 // with false. After this, the specific number pattern rules for the region 462 // are examined. 463 // This is useful for determining for example whether a particular number is 464 // valid for Canada, rather than just a valid NANPA number. 465 // Warning: In most cases, you want to use IsValidNumber instead. For 466 // example, this method will mark numbers from British Crown dependencies 467 // such as the Isle of Man as invalid for the region "GB" (United Kingdom), 468 // since it has its own region code, "IM", which may be undesirable. 469 bool IsValidNumberForRegion( 470 const PhoneNumber& number, 471 const string& region_code) const; 472 473 // Returns the region where a phone number is from. This could be used for 474 // geocoding at the region level. Only guarantees correct results for valid, 475 // full numbers (not short-codes, or invalid numbers). 476 void GetRegionCodeForNumber(const PhoneNumber& number, 477 string* region_code) const; 478 479 // Returns the country calling code for a specific region. For example, 480 // this would be 1 for the United States, and 64 for New Zealand. 481 int GetCountryCodeForRegion(const string& region_code) const; 482 483 // Returns the region code that matches the specific country code. Note that 484 // it is possible that several regions share the same country calling code 485 // (e.g. US and Canada), and in that case, only one of the regions (normally 486 // the one with the largest population) is returned. If the 487 // countryCallingCode entered is valid but doesn't match a specific region 488 // (such as in the case of non-geographical calling codes like 800) the 489 // RegionCode 001 will be returned (corresponding to the value for World in 490 // the UN M.49 schema). 491 void GetRegionCodeForCountryCode(int country_code, string* region_code) const; 492 493 // Populates a list with the region codes that match the specific country 494 // calling code. For non-geographical country calling codes, the region code 495 // 001 is returned. Also, in the case of no region code being found, the list 496 // is left unchanged. 497 void GetRegionCodesForCountryCallingCode( 498 int country_calling_code, 499 std::list<string>* region_codes) const; 500 501 // Checks if this is a region under the North American Numbering Plan 502 // Administration (NANPA). 503 bool IsNANPACountry(const string& region_code) const; 504 505 // Returns the national dialling prefix for a specific region. For example, 506 // this would be 1 for the United States, and 0 for New Zealand. Set 507 // strip_non_digits to true to strip symbols like "~" (which indicates a wait 508 // for a dialling tone) from the prefix returned. If no national prefix is 509 // present, we return an empty string. 510 void GetNddPrefixForRegion(const string& region_code, 511 bool strip_non_digits, 512 string* national_prefix) const; 513 514 // Checks whether a phone number is a possible number. It provides a more 515 // lenient check than IsValidNumber() in the following sense: 516 // 1. It only checks the length of phone numbers. In particular, it doesn't 517 // check starting digits of the number. 518 // 2. It doesn't attempt to figure out the type of the number, but uses 519 // general rules which applies to all types of phone numbers in a 520 // region. Therefore, it is much faster than IsValidNumber(). 521 // 3. For some numbers (particularly fixed-line), many regions have the 522 // concept of area code, which together with subscriber number constitute 523 // the national significant number. It is sometimes okay to dial only the 524 // subscriber number when dialing in the same area. This function will 525 // return IS_POSSIBLE_LOCAL_ONLY if the subscriber-number-only version is 526 // passed in. On the other hand, because IsValidNumber() validates using 527 // information on both starting digits (for fixed line numbers, that 528 // would most likely be area codes) and length (obviously includes the 529 // length of area codes for fixed line numbers), it will return false for 530 // the subscriber-number-only version. 531 ValidationResult IsPossibleNumberWithReason(const PhoneNumber& number) const; 532 533 // Convenience wrapper around IsPossibleNumberWithReason(). Instead of 534 // returning the reason for failure, this method returns true if the number is 535 // either a possible fully-qualified number (containing the area code and 536 // country code), or if the number could be a possible local number (with a 537 // country code, but missing an area code). Local numbers are considered 538 // possible if they could be possibly dialled in this format: if the area code 539 // is needed for a call to connect, the number is not considered possible 540 // without it. 541 bool IsPossibleNumber(const PhoneNumber& number) const; 542 543 // Check whether a phone number is a possible number of a particular type. For 544 // types that don't exist in a particular region, this will return a result 545 // that isn't so useful; it is recommended that you use 546 // GetSupportedTypesForRegion() or GetSupportedTypesForNonGeoEntity() 547 // respectively before calling this method to determine whether you should 548 // call it for this number at all. 549 // 550 // This provides a more lenient check than IsValidNumber() in the following 551 // sense: 552 // 553 // 1. It only checks the length of phone numbers. In particular, it doesn't 554 // check starting digits of the number. 555 // 2. For some numbers (particularly fixed-line), many regions have the 556 // concept of area code, which together with subscriber number constitute 557 // the national significant number. It is sometimes okay to dial only the 558 // subscriber number when dialing in the same area. This function will 559 // return IS_POSSIBLE_LOCAL_ONLY if the subscriber-number-only version is 560 // passed in. On the other hand, because IsValidNumber() validates using 561 // information on both starting digits (for fixed line numbers, that 562 // would most likely be area codes) and length (obviously includes the 563 // length of area codes for fixed line numbers), it will return false for 564 // the subscriber-number-only version. 565 ValidationResult IsPossibleNumberForTypeWithReason( 566 const PhoneNumber& number, PhoneNumberType type) const; 567 568 // Convenience wrapper around IsPossibleNumberForTypeWithReason(). Instead of 569 // returning the reason for failure, this method returns true if the number is 570 // either a possible fully-qualified number (containing the area code and 571 // country code), or if the number could be a possible local number (with a 572 // country code, but missing an area code). Local numbers are considered 573 // possible if they could be possibly dialled in this format: if the area code 574 // is needed for a call to connect, the number is not considered possible 575 // without it. 576 bool IsPossibleNumberForType(const PhoneNumber& number, 577 PhoneNumberType type) const; 578 579 // Checks whether a phone number is a possible number given a number in the 580 // form of a string, and the country where the number could be dialed from. 581 // It provides a more lenient check than IsValidNumber(). See 582 // IsPossibleNumber(const PhoneNumber& number) for details. 583 // 584 // This method first parses the number, then invokes 585 // IsPossibleNumber(const PhoneNumber& number) with the resultant PhoneNumber 586 // object. 587 // 588 // region_dialing_from represents the region that we are expecting the number 589 // to be dialed from. Note this is different from the region where the number 590 // belongs. For example, the number +1 650 253 0000 is a number that belongs 591 // to US. When written in this form, it could be dialed from any region. When 592 // it is written as 00 1 650 253 0000, it could be dialed from any region 593 // which uses an international dialling prefix of 00. When it is written as 594 // 650 253 0000, it could only be dialed from within the US, and when written 595 // as 253 0000, it could only be dialed from within a smaller area in the US 596 // (Mountain View, CA, to be more specific). 597 bool IsPossibleNumberForString( 598 const string& number, 599 const string& region_dialing_from) const; 600 601 // Returns true if the number can be dialled from outside the region, or 602 // unknown. If the number can only be dialled from within the region, returns 603 // false. Does not check the number is a valid number. Note that, at the 604 // moment, this method does not handle short numbers (which are currently all 605 // presumed to not be diallable from outside their country). 606 bool CanBeInternationallyDialled(const PhoneNumber& number) const; 607 608 // Tests whether a phone number has a geographical association. It checks if 609 // the number is associated with a certain region in the country to which it 610 // belongs. Note that this doesn't verify if the number is actually in use. 611 bool IsNumberGeographical(const PhoneNumber& phone_number) const; 612 613 // Overload of IsNumberGeographical(PhoneNumber), since calculating the phone 614 // number type is expensive; if we have already done this, we don't want to do 615 // it again. 616 bool IsNumberGeographical(PhoneNumberType phone_number_type, 617 int country_calling_code) const; 618 619 // Gets a valid fixed-line number for the specified region. Returns false if 620 // the region was unknown, or the region 001 is passed in. For 001 621 // (representing non-geographical numbers), call 622 // GetExampleNumberForNonGeoEntity instead. 623 bool GetExampleNumber(const string& region_code, 624 PhoneNumber* number) const; 625 626 // Gets an invalid number for the specified region. This is useful for 627 // unit-testing purposes, where you want to test that will happen with an 628 // invalid number. Note that the number that is returned will always be able 629 // to be parsed and will have the correct country code. It may also be a valid 630 // *short* number/code for this region. Validity checking such 631 // numbers is handled with ShortNumberInfo. 632 // 633 // Returns false when an unsupported region or the region 001 (Earth) is 634 // passed in. 635 bool GetInvalidExampleNumber(const string& region_code, 636 PhoneNumber* number) const; 637 638 // Gets a valid number of the specified type for the specified region. 639 // Returns false if the region was unknown or 001, or if no example number of 640 // that type could be found. For 001 (representing non-geographical numbers), 641 // call GetExampleNumberForNonGeoEntity instead. 642 bool GetExampleNumberForType(const string& region_code, 643 PhoneNumberType type, 644 PhoneNumber* number) const; 645 646 // Gets a valid number for the specified type (it may belong to any country). 647 // Returns false when the metadata does not contain such information. This 648 // should only happen when no numbers of this type are allocated anywhere in 649 // the world anymore. 650 bool GetExampleNumberForType(PhoneNumberType type, 651 PhoneNumber* number) const; 652 653 // Gets a valid number for the specified country calling code for a 654 // non-geographical entity. Returns false if the metadata does not contain 655 // such information, or the country calling code passed in does not belong to 656 // a non-geographical entity. 657 bool GetExampleNumberForNonGeoEntity( 658 int country_calling_code, PhoneNumber* number) const; 659 660 // Parses a string and returns it as a phone number in proto buffer format. 661 // The method is quite lenient and looks for a number in the input text 662 // (raw input) and does not check whether the string is definitely only a 663 // phone number. To do this, it ignores punctuation and white-space, as well 664 // as any text before the number (e.g. a leading “Tel: ”) and trims the 665 // non-number bits. It will accept a number in any format (E164, national, 666 // international etc), assuming it can be interpreted with the defaultRegion 667 // supplied. It also attempts to convert any alpha characters into digits 668 // if it thinks this is a vanity number of the type "1800 MICROSOFT". 669 // 670 // This method will return an error if the number is not considered to be a 671 // possible number, and NO_PARSING_ERROR if it is parsed correctly. 672 // Note that validation of whether the number is actually a valid number for 673 // a particular region is not performed. This can be done separately with 674 // IsValidNumber(). 675 // 676 // Note this method canonicalizes the phone number such that different 677 // representations can be easily compared, no matter what form it was 678 // originally entered in (e.g. national, international). If you want to record 679 // context about the number being parsed, such as the raw input that was 680 // entered, how the country code was derived etc. then call 681 // ParseAndKeepRawInput() instead. 682 // 683 // number_to_parse can contain formatting such as +, ( and -, as well as a 684 // phone number extension. It can also be provided in RFC3966 format. 685 // 686 // default_region represents the country that we are expecting the number to 687 // be from. This is only used if the number being parsed is not written in 688 // international format. The country_code for the number in this case would be 689 // stored as that of the default country supplied. If the number is guaranteed 690 // to start with a '+' followed by the country calling code, then 691 // "ZZ" can be supplied. 692 // 693 // Returns an error if the string is not considered to be a viable phone 694 // number (e.g.too few or too many digits) or if no default region was 695 // supplied and the number is not in international format (does not start with 696 // +). 697 ErrorType Parse(const string& number_to_parse, 698 const string& default_region, 699 PhoneNumber* number) const; 700 // Parses a string and returns it in proto buffer format. This method differs 701 // from Parse() in that it always populates the raw_input field of the 702 // protocol buffer with number_to_parse as well as the country_code_source 703 // field. 704 ErrorType ParseAndKeepRawInput(const string& number_to_parse, 705 const string& default_region, 706 PhoneNumber* number) const; 707 708 // Takes two phone numbers and compares them for equality. 709 // 710 // Returns EXACT_MATCH if the country calling code, NSN, presence of a leading 711 // zero for Italian numbers and any extension present are the same. 712 // Returns NSN_MATCH if either or both has no country calling code specified, 713 // and the NSNs and extensions are the same. 714 // Returns SHORT_NSN_MATCH if either or both has no country calling code 715 // specified, or the country calling code specified is the same, and one NSN 716 // could be a shorter version of the other number. This includes the case 717 // where one has an extension specified, and the other does not. 718 // Returns NO_MATCH otherwise. 719 // For example, the numbers +1 345 657 1234 and 657 1234 are a 720 // SHORT_NSN_MATCH. The numbers +1 345 657 1234 and 345 657 are a NO_MATCH. 721 MatchType IsNumberMatch(const PhoneNumber& first_number, 722 const PhoneNumber& second_number) const; 723 724 // Takes two phone numbers as strings and compares them for equality. This 725 // is a convenience wrapper for IsNumberMatch(PhoneNumber firstNumber, 726 // PhoneNumber secondNumber). No default region is known. 727 // Returns INVALID_NUMBER if either number cannot be parsed into a phone 728 // number. 729 MatchType IsNumberMatchWithTwoStrings(const string& first_number, 730 const string& second_number) const; 731 732 // Takes two phone numbers and compares them for equality. This is a 733 // convenience wrapper for IsNumberMatch(PhoneNumber firstNumber, 734 // PhoneNumber secondNumber). No default region is known. 735 // Returns INVALID_NUMBER if second_number cannot be parsed into a phone 736 // number. 737 MatchType IsNumberMatchWithOneString(const PhoneNumber& first_number, 738 const string& second_number) const; 739 740 // Overrides the default logging system. This takes ownership of the provided 741 // logger. 742 void SetLogger(Logger* logger); 743 744 // Gets an AsYouTypeFormatter for the specific region. 745 // Returns an AsYouTypeFormatter object, which could be used to format phone 746 // numbers in the specific region "as you type". 747 // The deletion of the returned instance is under the responsibility of the 748 // caller. 749 AsYouTypeFormatter* GetAsYouTypeFormatter(const string& region_code) const; 750 751 friend bool ConvertFromTelephoneNumberProto( 752 const TelephoneNumber& proto_to_convert, 753 PhoneNumber* new_proto); 754 friend bool ConvertToTelephoneNumberProto(const PhoneNumber& proto_to_convert, 755 TelephoneNumber* resulting_proto); 756 757 protected: 758 bool IsNumberMatchingDesc(const string& national_number, 759 const PhoneNumberDesc& number_desc) const; 760 761 PhoneNumberUtil::PhoneNumberType GetNumberTypeHelper( 762 const string& national_number, const PhoneMetadata& metadata) const; 763 764 private: 765 scoped_ptr<Logger> logger_; 766 767 typedef std::pair<int, std::list<string>*> IntRegionsPair; 768 769 // The minimum and maximum length of the national significant number. 770 static const size_t kMinLengthForNsn = 2; 771 // The ITU says the maximum length should be 15, but we have found longer 772 // numbers in Germany. 773 static const size_t kMaxLengthForNsn = 17; 774 // The maximum length of the country calling code. 775 static const size_t kMaxLengthCountryCode = 3; 776 777 static const char kPlusChars[]; 778 // Regular expression of acceptable punctuation found in phone numbers. This 779 // excludes punctuation found as a leading character only. This consists of 780 // dash characters, white space characters, full stops, slashes, square 781 // brackets, parentheses and tildes. It also includes the letter 'x' as that 782 // is found as a placeholder for carrier information in some phone numbers. 783 // Full-width variants are also present. 784 static const char kValidPunctuation[]; 785 786 // Regular expression of characters typically used to start a second phone 787 // number for the purposes of parsing. This allows us to strip off parts of 788 // the number that are actually the start of another number, such as for: 789 // (530) 583-6985 x302/x2303 -> the second extension here makes this actually 790 // two phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove 791 // the second extension so that the first number is parsed correctly. The 792 // string preceding this is captured. 793 // This corresponds to SECOND_NUMBER_START in the java version. 794 static const char kCaptureUpToSecondNumberStart[]; 795 796 // An API for validation checking. 797 scoped_ptr<MatcherApi> matcher_api_; 798 799 // Helper class holding useful regular expressions and character mappings. 800 scoped_ptr<PhoneNumberRegExpsAndMappings> reg_exps_; 801 802 // A mapping from a country calling code to a RegionCode object which denotes 803 // the region represented by that country calling code. Note regions under 804 // NANPA share the country calling code 1 and Russia and Kazakhstan share the 805 // country calling code 7. Under this map, 1 is mapped to region code "US" and 806 // 7 is mapped to region code "RU". This is implemented as a sorted vector to 807 // achieve better performance. 808 scoped_ptr<std::vector<IntRegionsPair> > 809 country_calling_code_to_region_code_map_; 810 811 // The set of regions that share country calling code 1. 812 scoped_ptr<std::set<string> > nanpa_regions_; 813 static const int kNanpaCountryCode = 1; 814 815 // A mapping from a region code to a PhoneMetadata for that region. 816 scoped_ptr<std::map<string, PhoneMetadata> > region_to_metadata_map_; 817 818 // A mapping from a country calling code for a non-geographical entity to the 819 // PhoneMetadata for that country calling code. Examples of the country 820 // calling codes include 800 (International Toll Free Service) and 808 821 // (International Shared Cost Service). 822 scoped_ptr<std::map<int, PhoneMetadata> > 823 country_code_to_non_geographical_metadata_map_; 824 825 PhoneNumberUtil(); 826 827 // Returns a regular expression for the possible extensions that may be found 828 // in a number, for use when matching. 829 const string& GetExtnPatternsForMatching() const; 830 831 // Checks if a number matches the plus chars pattern. 832 bool StartsWithPlusCharsPattern(const string& number) const; 833 834 void SetItalianLeadingZerosForPhoneNumber( 835 const string& national_number, PhoneNumber* phone_number) const; 836 837 // Checks whether a string contains only valid digits. 838 bool ContainsOnlyValidDigits(const string& s) const; 839 840 // Checks if a format is eligible to be used by the AsYouTypeFormatter. This 841 // method is here rather than in asyoutypeformatter.h since it depends on the 842 // valid punctuation declared by the phone number util. 843 bool IsFormatEligibleForAsYouTypeFormatter(const string& format) const; 844 845 // Helper function to check if the national prefix formatting rule has the 846 // first group only, i.e., does not start with the national prefix. 847 bool FormattingRuleHasFirstGroupOnly( 848 const string& national_prefix_formatting_rule) const; 849 850 // Trims unwanted end characters from a phone number string. 851 void TrimUnwantedEndChars(string* number) const; 852 853 // Helper function to check region code is not unknown or null. 854 bool IsValidRegionCode(const string& region_code) const; 855 856 // Helper function to check the country calling code is valid. 857 bool HasValidCountryCallingCode(int country_calling_code) const; 858 859 const i18n::phonenumbers::PhoneMetadata* GetMetadataForRegion( 860 const string& region_code) const; 861 862 const i18n::phonenumbers::PhoneMetadata* GetMetadataForNonGeographicalRegion( 863 int country_calling_code) const; 864 865 const i18n::phonenumbers::PhoneMetadata* GetMetadataForRegionOrCallingCode( 866 int country_calling_code, 867 const string& region_code) const; 868 869 // As per GetCountryCodeForRegion, but assumes the validity of the region_code 870 // has already been checked. 871 int GetCountryCodeForValidRegion(const string& region_code) const; 872 873 const NumberFormat* ChooseFormattingPatternForNumber( 874 const RepeatedPtrField<NumberFormat>& available_formats, 875 const string& national_number) const; 876 877 void FormatNsnUsingPatternWithCarrier( 878 const string& national_number, 879 const NumberFormat& formatting_pattern, 880 PhoneNumberUtil::PhoneNumberFormat number_format, 881 const string& carrier_code, 882 string* formatted_number) const; 883 884 void FormatNsnUsingPattern( 885 const string& national_number, 886 const NumberFormat& formatting_pattern, 887 PhoneNumberUtil::PhoneNumberFormat number_format, 888 string* formatted_number) const; 889 890 // Check if raw_input, which is assumed to be in the national format, has a 891 // national prefix. The national prefix is assumed to be in digits-only form. 892 bool RawInputContainsNationalPrefix( 893 const string& raw_input, 894 const string& national_prefix, 895 const string& region_code) const; 896 897 bool HasFormattingPatternForNumber(const PhoneNumber& number) const; 898 899 // Simple wrapper of FormatNsnWithCarrier for the common case of 900 // no carrier code. 901 void FormatNsn(const string& number, 902 const PhoneMetadata& metadata, 903 PhoneNumberFormat number_format, 904 string* formatted_number) const; 905 906 void FormatNsnWithCarrier(const string& number, 907 const PhoneMetadata& metadata, 908 PhoneNumberFormat number_format, 909 const string& carrier_code, 910 string* formatted_number) const; 911 912 void MaybeAppendFormattedExtension( 913 const PhoneNumber& number, 914 const PhoneMetadata& metadata, 915 PhoneNumberFormat number_format, 916 string* extension) const; 917 918 void GetRegionCodeForNumberFromRegionList( 919 const PhoneNumber& number, 920 const std::list<string>& region_codes, 921 string* region_code) const; 922 923 // Strips the IDD from the start of the number if present. Helper function 924 // used by MaybeStripInternationalPrefixAndNormalize. 925 bool ParsePrefixAsIdd(const RegExp& idd_pattern, string* number) const; 926 927 void Normalize(string* number) const; 928 929 PhoneNumber::CountryCodeSource MaybeStripInternationalPrefixAndNormalize( 930 const string& possible_idd_prefix, 931 string* number) const; 932 933 bool MaybeStripNationalPrefixAndCarrierCode( 934 const PhoneMetadata& metadata, 935 string* number, 936 string* carrier_code) const; 937 938 void ExtractPossibleNumber(const string& number, 939 string* extracted_number) const; 940 941 bool IsViablePhoneNumber(const string& number) const; 942 943 bool MaybeStripExtension(string* number, string* extension) const; 944 945 int ExtractCountryCode(string* national_number) const; 946 ErrorType MaybeExtractCountryCode( 947 const PhoneMetadata* default_region_metadata, 948 bool keepRawInput, 949 string* national_number, 950 PhoneNumber* phone_number) const; 951 952 bool CheckRegionForParsing( 953 const string& number_to_parse, 954 const string& default_region) const; 955 956 ErrorType ParseHelper(const string& number_to_parse, 957 const string& default_region, 958 bool keep_raw_input, 959 bool check_region, 960 PhoneNumber* phone_number) const; 961 962 void BuildNationalNumberForParsing(const string& number_to_parse, 963 string* national_number) const; 964 965 bool IsShorterThanPossibleNormalNumber(const PhoneMetadata* country_metadata, 966 const string& number) const; 967 968 }; 969 970 } // namespace phonenumbers 971 } // namespace i18n 972 973 #endif // I18N_PHONENUMBERS_PHONENUMBERUTIL_H_ 974