1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4******************************************************************************* 5* 6* Copyright (C) 1997-2016, International Business Machines 7* Corporation and others. All Rights Reserved. 8* 9******************************************************************************* 10* file name: loclikely.cpp 11* encoding: UTF-8 12* tab size: 8 (not used) 13* indentation:4 14* 15* created on: 2010feb25 16* created by: Markus W. Scherer 17* 18* Code for likely and minimized locale subtags, separated out from other .cpp files 19* that then do not depend on resource bundle code and likely-subtags data. 20*/ 21 22#include "unicode/bytestream.h" 23#include "unicode/utypes.h" 24#include "unicode/locid.h" 25#include "unicode/putil.h" 26#include "unicode/uchar.h" 27#include "unicode/uloc.h" 28#include "unicode/ures.h" 29#include "unicode/uscript.h" 30#include "bytesinkutil.h" 31#include "charstr.h" 32#include "cmemory.h" 33#include "cstring.h" 34#include "ulocimp.h" 35#include "ustr_imp.h" 36 37/** 38 * These are the canonical strings for unknown languages, scripts and regions. 39 **/ 40static const char* const unknownLanguage = "und"; 41static const char* const unknownScript = "Zzzz"; 42static const char* const unknownRegion = "ZZ"; 43 44/** 45 * This function looks for the localeID in the likelySubtags resource. 46 * 47 * @param localeID The tag to find. 48 * @param buffer A buffer to hold the matching entry 49 * @param bufferLength The length of the output buffer 50 * @return A pointer to "buffer" if found, or a null pointer if not. 51 */ 52static const char* U_CALLCONV 53findLikelySubtags(const char* localeID, 54 char* buffer, 55 int32_t bufferLength, 56 UErrorCode* err) { 57 const char* result = NULL; 58 59 if (!U_FAILURE(*err)) { 60 int32_t resLen = 0; 61 const UChar* s = NULL; 62 UErrorCode tmpErr = U_ZERO_ERROR; 63 icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr)); 64 if (U_SUCCESS(tmpErr)) { 65 icu::CharString und; 66 if (localeID != NULL) { 67 if (*localeID == '\0') { 68 localeID = unknownLanguage; 69 } else if (*localeID == '_') { 70 und.append(unknownLanguage, *err); 71 und.append(localeID, *err); 72 if (U_FAILURE(*err)) { 73 return NULL; 74 } 75 localeID = und.data(); 76 } 77 } 78 s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr); 79 80 if (U_FAILURE(tmpErr)) { 81 /* 82 * If a resource is missing, it's not really an error, it's 83 * just that we don't have any data for that particular locale ID. 84 */ 85 if (tmpErr != U_MISSING_RESOURCE_ERROR) { 86 *err = tmpErr; 87 } 88 } 89 else if (resLen >= bufferLength) { 90 /* The buffer should never overflow. */ 91 *err = U_INTERNAL_PROGRAM_ERROR; 92 } 93 else { 94 u_UCharsToChars(s, buffer, resLen + 1); 95 if (resLen >= 3 && 96 uprv_strnicmp(buffer, unknownLanguage, 3) == 0 && 97 (resLen == 3 || buffer[3] == '_')) { 98 uprv_memmove(buffer, buffer + 3, resLen - 3 + 1); 99 } 100 result = buffer; 101 } 102 } else { 103 *err = tmpErr; 104 } 105 } 106 107 return result; 108} 109 110/** 111 * Append a tag to a buffer, adding the separator if necessary. The buffer 112 * must be large enough to contain the resulting tag plus any separator 113 * necessary. The tag must not be a zero-length string. 114 * 115 * @param tag The tag to add. 116 * @param tagLength The length of the tag. 117 * @param buffer The output buffer. 118 * @param bufferLength The length of the output buffer. This is an input/output parameter. 119 **/ 120static void U_CALLCONV 121appendTag( 122 const char* tag, 123 int32_t tagLength, 124 char* buffer, 125 int32_t* bufferLength, 126 UBool withSeparator) { 127 128 if (withSeparator) { 129 buffer[*bufferLength] = '_'; 130 ++(*bufferLength); 131 } 132 133 uprv_memmove( 134 &buffer[*bufferLength], 135 tag, 136 tagLength); 137 138 *bufferLength += tagLength; 139} 140 141/** 142 * Create a tag string from the supplied parameters. The lang, script and region 143 * parameters may be NULL pointers. If they are, their corresponding length parameters 144 * must be less than or equal to 0. 145 * 146 * If any of the language, script or region parameters are empty, and the alternateTags 147 * parameter is not NULL, it will be parsed for potential language, script and region tags 148 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or 149 * it contains no language tag, the default tag for the unknown language is used. 150 * 151 * If the length of the new string exceeds the capacity of the output buffer, 152 * the function copies as many bytes to the output buffer as it can, and returns 153 * the error U_BUFFER_OVERFLOW_ERROR. 154 * 155 * If an illegal argument is provided, the function returns the error 156 * U_ILLEGAL_ARGUMENT_ERROR. 157 * 158 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if 159 * the tag string fits in the output buffer, but the null terminator doesn't. 160 * 161 * @param lang The language tag to use. 162 * @param langLength The length of the language tag. 163 * @param script The script tag to use. 164 * @param scriptLength The length of the script tag. 165 * @param region The region tag to use. 166 * @param regionLength The length of the region tag. 167 * @param trailing Any trailing data to append to the new tag. 168 * @param trailingLength The length of the trailing data. 169 * @param alternateTags A string containing any alternate tags. 170 * @param sink The output sink receiving the tag string. 171 * @param err A pointer to a UErrorCode for error reporting. 172 **/ 173static void U_CALLCONV 174createTagStringWithAlternates( 175 const char* lang, 176 int32_t langLength, 177 const char* script, 178 int32_t scriptLength, 179 const char* region, 180 int32_t regionLength, 181 const char* trailing, 182 int32_t trailingLength, 183 const char* alternateTags, 184 icu::ByteSink& sink, 185 UErrorCode* err) { 186 187 if (U_FAILURE(*err)) { 188 goto error; 189 } 190 else if (langLength >= ULOC_LANG_CAPACITY || 191 scriptLength >= ULOC_SCRIPT_CAPACITY || 192 regionLength >= ULOC_COUNTRY_CAPACITY) { 193 goto error; 194 } 195 else { 196 /** 197 * ULOC_FULLNAME_CAPACITY will provide enough capacity 198 * that we can build a string that contains the language, 199 * script and region code without worrying about overrunning 200 * the user-supplied buffer. 201 **/ 202 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 203 int32_t tagLength = 0; 204 UBool regionAppended = false; 205 206 if (langLength > 0) { 207 appendTag( 208 lang, 209 langLength, 210 tagBuffer, 211 &tagLength, 212 /*withSeparator=*/false); 213 } 214 else if (alternateTags == NULL) { 215 /* 216 * Use the empty string for an unknown language, if 217 * we found no language. 218 */ 219 } 220 else { 221 /* 222 * Parse the alternateTags string for the language. 223 */ 224 char alternateLang[ULOC_LANG_CAPACITY]; 225 int32_t alternateLangLength = sizeof(alternateLang); 226 227 alternateLangLength = 228 uloc_getLanguage( 229 alternateTags, 230 alternateLang, 231 alternateLangLength, 232 err); 233 if(U_FAILURE(*err) || 234 alternateLangLength >= ULOC_LANG_CAPACITY) { 235 goto error; 236 } 237 else if (alternateLangLength == 0) { 238 /* 239 * Use the empty string for an unknown language, if 240 * we found no language. 241 */ 242 } 243 else { 244 appendTag( 245 alternateLang, 246 alternateLangLength, 247 tagBuffer, 248 &tagLength, 249 /*withSeparator=*/false); 250 } 251 } 252 253 if (scriptLength > 0) { 254 appendTag( 255 script, 256 scriptLength, 257 tagBuffer, 258 &tagLength, 259 /*withSeparator=*/true); 260 } 261 else if (alternateTags != NULL) { 262 /* 263 * Parse the alternateTags string for the script. 264 */ 265 char alternateScript[ULOC_SCRIPT_CAPACITY]; 266 267 const int32_t alternateScriptLength = 268 uloc_getScript( 269 alternateTags, 270 alternateScript, 271 sizeof(alternateScript), 272 err); 273 274 if (U_FAILURE(*err) || 275 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { 276 goto error; 277 } 278 else if (alternateScriptLength > 0) { 279 appendTag( 280 alternateScript, 281 alternateScriptLength, 282 tagBuffer, 283 &tagLength, 284 /*withSeparator=*/true); 285 } 286 } 287 288 if (regionLength > 0) { 289 appendTag( 290 region, 291 regionLength, 292 tagBuffer, 293 &tagLength, 294 /*withSeparator=*/true); 295 296 regionAppended = true; 297 } 298 else if (alternateTags != NULL) { 299 /* 300 * Parse the alternateTags string for the region. 301 */ 302 char alternateRegion[ULOC_COUNTRY_CAPACITY]; 303 304 const int32_t alternateRegionLength = 305 uloc_getCountry( 306 alternateTags, 307 alternateRegion, 308 sizeof(alternateRegion), 309 err); 310 if (U_FAILURE(*err) || 311 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { 312 goto error; 313 } 314 else if (alternateRegionLength > 0) { 315 appendTag( 316 alternateRegion, 317 alternateRegionLength, 318 tagBuffer, 319 &tagLength, 320 /*withSeparator=*/true); 321 322 regionAppended = true; 323 } 324 } 325 326 /** 327 * Copy the partial tag from our internal buffer to the supplied 328 * target. 329 **/ 330 sink.Append(tagBuffer, tagLength); 331 332 if (trailingLength > 0) { 333 if (*trailing != '@') { 334 sink.Append("_", 1); 335 if (!regionAppended) { 336 /* extra separator is required */ 337 sink.Append("_", 1); 338 } 339 } 340 341 /* 342 * Copy the trailing data into the supplied buffer. 343 */ 344 sink.Append(trailing, trailingLength); 345 } 346 347 return; 348 } 349 350error: 351 352 /** 353 * An overflow indicates the locale ID passed in 354 * is ill-formed. If we got here, and there was 355 * no previous error, it's an implicit overflow. 356 **/ 357 if (*err == U_BUFFER_OVERFLOW_ERROR || 358 U_SUCCESS(*err)) { 359 *err = U_ILLEGAL_ARGUMENT_ERROR; 360 } 361} 362 363/** 364 * Create a tag string from the supplied parameters. The lang, script and region 365 * parameters may be NULL pointers. If they are, their corresponding length parameters 366 * must be less than or equal to 0. If the lang parameter is an empty string, the 367 * default value for an unknown language is written to the output buffer. 368 * 369 * If the length of the new string exceeds the capacity of the output buffer, 370 * the function copies as many bytes to the output buffer as it can, and returns 371 * the error U_BUFFER_OVERFLOW_ERROR. 372 * 373 * If an illegal argument is provided, the function returns the error 374 * U_ILLEGAL_ARGUMENT_ERROR. 375 * 376 * @param lang The language tag to use. 377 * @param langLength The length of the language tag. 378 * @param script The script tag to use. 379 * @param scriptLength The length of the script tag. 380 * @param region The region tag to use. 381 * @param regionLength The length of the region tag. 382 * @param trailing Any trailing data to append to the new tag. 383 * @param trailingLength The length of the trailing data. 384 * @param sink The output sink receiving the tag string. 385 * @param err A pointer to a UErrorCode for error reporting. 386 **/ 387static void U_CALLCONV 388createTagString( 389 const char* lang, 390 int32_t langLength, 391 const char* script, 392 int32_t scriptLength, 393 const char* region, 394 int32_t regionLength, 395 const char* trailing, 396 int32_t trailingLength, 397 icu::ByteSink& sink, 398 UErrorCode* err) 399{ 400 createTagStringWithAlternates( 401 lang, 402 langLength, 403 script, 404 scriptLength, 405 region, 406 regionLength, 407 trailing, 408 trailingLength, 409 NULL, 410 sink, 411 err); 412} 413 414/** 415 * Parse the language, script, and region subtags from a tag string, and copy the 416 * results into the corresponding output parameters. The buffers are null-terminated, 417 * unless overflow occurs. 418 * 419 * The langLength, scriptLength, and regionLength parameters are input/output 420 * parameters, and must contain the capacity of their corresponding buffers on 421 * input. On output, they will contain the actual length of the buffers, not 422 * including the null terminator. 423 * 424 * If the length of any of the output subtags exceeds the capacity of the corresponding 425 * buffer, the function copies as many bytes to the output buffer as it can, and returns 426 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow 427 * occurs. 428 * 429 * If an illegal argument is provided, the function returns the error 430 * U_ILLEGAL_ARGUMENT_ERROR. 431 * 432 * @param localeID The locale ID to parse. 433 * @param lang The language tag buffer. 434 * @param langLength The length of the language tag. 435 * @param script The script tag buffer. 436 * @param scriptLength The length of the script tag. 437 * @param region The region tag buffer. 438 * @param regionLength The length of the region tag. 439 * @param err A pointer to a UErrorCode for error reporting. 440 * @return The number of chars of the localeID parameter consumed. 441 **/ 442static int32_t U_CALLCONV 443parseTagString( 444 const char* localeID, 445 char* lang, 446 int32_t* langLength, 447 char* script, 448 int32_t* scriptLength, 449 char* region, 450 int32_t* regionLength, 451 UErrorCode* err) 452{ 453 const char* position = localeID; 454 int32_t subtagLength = 0; 455 456 if(U_FAILURE(*err) || 457 localeID == NULL || 458 lang == NULL || 459 langLength == NULL || 460 script == NULL || 461 scriptLength == NULL || 462 region == NULL || 463 regionLength == NULL) { 464 goto error; 465 } 466 467 subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err); 468 469 /* 470 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING 471 * to be an error, because it indicates the user-supplied tag is 472 * not well-formed. 473 */ 474 if(U_FAILURE(*err)) { 475 goto error; 476 } 477 478 *langLength = subtagLength; 479 480 /* 481 * If no language was present, use the empty string instead. 482 * Otherwise, move past any separator. 483 */ 484 if (_isIDSeparator(*position)) { 485 ++position; 486 } 487 488 subtagLength = ulocimp_getScript(position, &position, *err).extract(script, *scriptLength, *err); 489 490 if(U_FAILURE(*err)) { 491 goto error; 492 } 493 494 *scriptLength = subtagLength; 495 496 if (*scriptLength > 0) { 497 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { 498 /** 499 * If the script part is the "unknown" script, then don't return it. 500 **/ 501 *scriptLength = 0; 502 } 503 504 /* 505 * Move past any separator. 506 */ 507 if (_isIDSeparator(*position)) { 508 ++position; 509 } 510 } 511 512 subtagLength = ulocimp_getCountry(position, &position, *err).extract(region, *regionLength, *err); 513 514 if(U_FAILURE(*err)) { 515 goto error; 516 } 517 518 *regionLength = subtagLength; 519 520 if (*regionLength > 0) { 521 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { 522 /** 523 * If the region part is the "unknown" region, then don't return it. 524 **/ 525 *regionLength = 0; 526 } 527 } else if (*position != 0 && *position != '@') { 528 /* back up over consumed trailing separator */ 529 --position; 530 } 531 532exit: 533 534 return (int32_t)(position - localeID); 535 536error: 537 538 /** 539 * If we get here, we have no explicit error, it's the result of an 540 * illegal argument. 541 **/ 542 if (!U_FAILURE(*err)) { 543 *err = U_ILLEGAL_ARGUMENT_ERROR; 544 } 545 546 goto exit; 547} 548 549static UBool U_CALLCONV 550createLikelySubtagsString( 551 const char* lang, 552 int32_t langLength, 553 const char* script, 554 int32_t scriptLength, 555 const char* region, 556 int32_t regionLength, 557 const char* variants, 558 int32_t variantsLength, 559 icu::ByteSink& sink, 560 UErrorCode* err) { 561 /** 562 * ULOC_FULLNAME_CAPACITY will provide enough capacity 563 * that we can build a string that contains the language, 564 * script and region code without worrying about overrunning 565 * the user-supplied buffer. 566 **/ 567 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; 568 569 if(U_FAILURE(*err)) { 570 goto error; 571 } 572 573 /** 574 * Try the language with the script and region first. 575 **/ 576 if (scriptLength > 0 && regionLength > 0) { 577 578 const char* likelySubtags = NULL; 579 580 icu::CharString tagBuffer; 581 { 582 icu::CharStringByteSink sink(&tagBuffer); 583 createTagString( 584 lang, 585 langLength, 586 script, 587 scriptLength, 588 region, 589 regionLength, 590 NULL, 591 0, 592 sink, 593 err); 594 } 595 if(U_FAILURE(*err)) { 596 goto error; 597 } 598 599 likelySubtags = 600 findLikelySubtags( 601 tagBuffer.data(), 602 likelySubtagsBuffer, 603 sizeof(likelySubtagsBuffer), 604 err); 605 if(U_FAILURE(*err)) { 606 goto error; 607 } 608 609 if (likelySubtags != NULL) { 610 /* Always use the language tag from the 611 maximal string, since it may be more 612 specific than the one provided. */ 613 createTagStringWithAlternates( 614 NULL, 615 0, 616 NULL, 617 0, 618 NULL, 619 0, 620 variants, 621 variantsLength, 622 likelySubtags, 623 sink, 624 err); 625 return true; 626 } 627 } 628 629 /** 630 * Try the language with just the script. 631 **/ 632 if (scriptLength > 0) { 633 634 const char* likelySubtags = NULL; 635 636 icu::CharString tagBuffer; 637 { 638 icu::CharStringByteSink sink(&tagBuffer); 639 createTagString( 640 lang, 641 langLength, 642 script, 643 scriptLength, 644 NULL, 645 0, 646 NULL, 647 0, 648 sink, 649 err); 650 } 651 if(U_FAILURE(*err)) { 652 goto error; 653 } 654 655 likelySubtags = 656 findLikelySubtags( 657 tagBuffer.data(), 658 likelySubtagsBuffer, 659 sizeof(likelySubtagsBuffer), 660 err); 661 if(U_FAILURE(*err)) { 662 goto error; 663 } 664 665 if (likelySubtags != NULL) { 666 /* Always use the language tag from the 667 maximal string, since it may be more 668 specific than the one provided. */ 669 createTagStringWithAlternates( 670 NULL, 671 0, 672 NULL, 673 0, 674 region, 675 regionLength, 676 variants, 677 variantsLength, 678 likelySubtags, 679 sink, 680 err); 681 return true; 682 } 683 } 684 685 /** 686 * Try the language with just the region. 687 **/ 688 if (regionLength > 0) { 689 690 const char* likelySubtags = NULL; 691 692 icu::CharString tagBuffer; 693 { 694 icu::CharStringByteSink sink(&tagBuffer); 695 createTagString( 696 lang, 697 langLength, 698 NULL, 699 0, 700 region, 701 regionLength, 702 NULL, 703 0, 704 sink, 705 err); 706 } 707 if(U_FAILURE(*err)) { 708 goto error; 709 } 710 711 likelySubtags = 712 findLikelySubtags( 713 tagBuffer.data(), 714 likelySubtagsBuffer, 715 sizeof(likelySubtagsBuffer), 716 err); 717 if(U_FAILURE(*err)) { 718 goto error; 719 } 720 721 if (likelySubtags != NULL) { 722 /* Always use the language tag from the 723 maximal string, since it may be more 724 specific than the one provided. */ 725 createTagStringWithAlternates( 726 NULL, 727 0, 728 script, 729 scriptLength, 730 NULL, 731 0, 732 variants, 733 variantsLength, 734 likelySubtags, 735 sink, 736 err); 737 return true; 738 } 739 } 740 741 /** 742 * Finally, try just the language. 743 **/ 744 { 745 const char* likelySubtags = NULL; 746 747 icu::CharString tagBuffer; 748 { 749 icu::CharStringByteSink sink(&tagBuffer); 750 createTagString( 751 lang, 752 langLength, 753 NULL, 754 0, 755 NULL, 756 0, 757 NULL, 758 0, 759 sink, 760 err); 761 } 762 if(U_FAILURE(*err)) { 763 goto error; 764 } 765 766 likelySubtags = 767 findLikelySubtags( 768 tagBuffer.data(), 769 likelySubtagsBuffer, 770 sizeof(likelySubtagsBuffer), 771 err); 772 if(U_FAILURE(*err)) { 773 goto error; 774 } 775 776 if (likelySubtags != NULL) { 777 /* Always use the language tag from the 778 maximal string, since it may be more 779 specific than the one provided. */ 780 createTagStringWithAlternates( 781 NULL, 782 0, 783 script, 784 scriptLength, 785 region, 786 regionLength, 787 variants, 788 variantsLength, 789 likelySubtags, 790 sink, 791 err); 792 return true; 793 } 794 } 795 796 return false; 797 798error: 799 800 if (!U_FAILURE(*err)) { 801 *err = U_ILLEGAL_ARGUMENT_ERROR; 802 } 803 804 return false; 805} 806 807#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \ 808 int32_t count = 0; \ 809 int32_t i; \ 810 for (i = 0; i < trailingLength; i++) { \ 811 if (trailing[i] == '-' || trailing[i] == '_') { \ 812 count = 0; \ 813 if (count > 8) { \ 814 goto error; \ 815 } \ 816 } else if (trailing[i] == '@') { \ 817 break; \ 818 } else if (count > 8) { \ 819 goto error; \ 820 } else { \ 821 count++; \ 822 } \ 823 } \ 824} UPRV_BLOCK_MACRO_END 825 826static UBool 827_uloc_addLikelySubtags(const char* localeID, 828 icu::ByteSink& sink, 829 UErrorCode* err) { 830 char lang[ULOC_LANG_CAPACITY]; 831 int32_t langLength = sizeof(lang); 832 char script[ULOC_SCRIPT_CAPACITY]; 833 int32_t scriptLength = sizeof(script); 834 char region[ULOC_COUNTRY_CAPACITY]; 835 int32_t regionLength = sizeof(region); 836 const char* trailing = ""; 837 int32_t trailingLength = 0; 838 int32_t trailingIndex = 0; 839 UBool success = false; 840 841 if(U_FAILURE(*err)) { 842 goto error; 843 } 844 if (localeID == NULL) { 845 goto error; 846 } 847 848 trailingIndex = parseTagString( 849 localeID, 850 lang, 851 &langLength, 852 script, 853 &scriptLength, 854 region, 855 ®ionLength, 856 err); 857 if(U_FAILURE(*err)) { 858 /* Overflow indicates an illegal argument error */ 859 if (*err == U_BUFFER_OVERFLOW_ERROR) { 860 *err = U_ILLEGAL_ARGUMENT_ERROR; 861 } 862 863 goto error; 864 } 865 866 /* Find the length of the trailing portion. */ 867 while (_isIDSeparator(localeID[trailingIndex])) { 868 trailingIndex++; 869 } 870 trailing = &localeID[trailingIndex]; 871 trailingLength = (int32_t)uprv_strlen(trailing); 872 873 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 874 875 success = 876 createLikelySubtagsString( 877 lang, 878 langLength, 879 script, 880 scriptLength, 881 region, 882 regionLength, 883 trailing, 884 trailingLength, 885 sink, 886 err); 887 888 if (!success) { 889 const int32_t localIDLength = (int32_t)uprv_strlen(localeID); 890 891 /* 892 * If we get here, we need to return localeID. 893 */ 894 sink.Append(localeID, localIDLength); 895 } 896 897 return success; 898 899error: 900 901 if (!U_FAILURE(*err)) { 902 *err = U_ILLEGAL_ARGUMENT_ERROR; 903 } 904 return false; 905} 906 907// Add likely subtags to the sink 908// return true if the value in the sink is produced by a match during the lookup 909// return false if the value in the sink is the same as input because there are 910// no match after the lookup. 911static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*); 912 913static void 914_uloc_minimizeSubtags(const char* localeID, 915 icu::ByteSink& sink, 916 UErrorCode* err) { 917 icu::CharString maximizedTagBuffer; 918 919 char lang[ULOC_LANG_CAPACITY]; 920 int32_t langLength = sizeof(lang); 921 char script[ULOC_SCRIPT_CAPACITY]; 922 int32_t scriptLength = sizeof(script); 923 char region[ULOC_COUNTRY_CAPACITY]; 924 int32_t regionLength = sizeof(region); 925 const char* trailing = ""; 926 int32_t trailingLength = 0; 927 int32_t trailingIndex = 0; 928 UBool successGetMax = false; 929 930 if(U_FAILURE(*err)) { 931 goto error; 932 } 933 else if (localeID == NULL) { 934 goto error; 935 } 936 937 trailingIndex = 938 parseTagString( 939 localeID, 940 lang, 941 &langLength, 942 script, 943 &scriptLength, 944 region, 945 ®ionLength, 946 err); 947 if(U_FAILURE(*err)) { 948 949 /* Overflow indicates an illegal argument error */ 950 if (*err == U_BUFFER_OVERFLOW_ERROR) { 951 *err = U_ILLEGAL_ARGUMENT_ERROR; 952 } 953 954 goto error; 955 } 956 957 /* Find the spot where the variants or the keywords begin, if any. */ 958 while (_isIDSeparator(localeID[trailingIndex])) { 959 trailingIndex++; 960 } 961 trailing = &localeID[trailingIndex]; 962 trailingLength = (int32_t)uprv_strlen(trailing); 963 964 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 965 966 { 967 icu::CharString base; 968 { 969 icu::CharStringByteSink baseSink(&base); 970 createTagString( 971 lang, 972 langLength, 973 script, 974 scriptLength, 975 region, 976 regionLength, 977 NULL, 978 0, 979 baseSink, 980 err); 981 } 982 983 /** 984 * First, we need to first get the maximization 985 * from AddLikelySubtags. 986 **/ 987 { 988 icu::CharStringByteSink maxSink(&maximizedTagBuffer); 989 successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err); 990 } 991 } 992 993 if(U_FAILURE(*err)) { 994 goto error; 995 } 996 997 if (!successGetMax) { 998 /** 999 * If we got here, return the locale ID parameter unchanged. 1000 **/ 1001 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); 1002 sink.Append(localeID, localeIDLength); 1003 return; 1004 } 1005 1006 // In the following, the lang, script, region are referring to those in 1007 // the maximizedTagBuffer, not the one in the localeID. 1008 langLength = sizeof(lang); 1009 scriptLength = sizeof(script); 1010 regionLength = sizeof(region); 1011 parseTagString( 1012 maximizedTagBuffer.data(), 1013 lang, 1014 &langLength, 1015 script, 1016 &scriptLength, 1017 region, 1018 ®ionLength, 1019 err); 1020 if(U_FAILURE(*err)) { 1021 goto error; 1022 } 1023 1024 /** 1025 * Start first with just the language. 1026 **/ 1027 { 1028 icu::CharString tagBuffer; 1029 { 1030 icu::CharStringByteSink tagSink(&tagBuffer); 1031 createLikelySubtagsString( 1032 lang, 1033 langLength, 1034 NULL, 1035 0, 1036 NULL, 1037 0, 1038 NULL, 1039 0, 1040 tagSink, 1041 err); 1042 } 1043 1044 if(U_FAILURE(*err)) { 1045 goto error; 1046 } 1047 else if (!tagBuffer.isEmpty() && 1048 uprv_strnicmp( 1049 maximizedTagBuffer.data(), 1050 tagBuffer.data(), 1051 tagBuffer.length()) == 0) { 1052 1053 createTagString( 1054 lang, 1055 langLength, 1056 NULL, 1057 0, 1058 NULL, 1059 0, 1060 trailing, 1061 trailingLength, 1062 sink, 1063 err); 1064 return; 1065 } 1066 } 1067 1068 /** 1069 * Next, try the language and region. 1070 **/ 1071 if (regionLength > 0) { 1072 1073 icu::CharString tagBuffer; 1074 { 1075 icu::CharStringByteSink tagSink(&tagBuffer); 1076 createLikelySubtagsString( 1077 lang, 1078 langLength, 1079 NULL, 1080 0, 1081 region, 1082 regionLength, 1083 NULL, 1084 0, 1085 tagSink, 1086 err); 1087 } 1088 1089 if(U_FAILURE(*err)) { 1090 goto error; 1091 } 1092 else if (!tagBuffer.isEmpty() && 1093 uprv_strnicmp( 1094 maximizedTagBuffer.data(), 1095 tagBuffer.data(), 1096 tagBuffer.length()) == 0) { 1097 1098 createTagString( 1099 lang, 1100 langLength, 1101 NULL, 1102 0, 1103 region, 1104 regionLength, 1105 trailing, 1106 trailingLength, 1107 sink, 1108 err); 1109 return; 1110 } 1111 } 1112 1113 /** 1114 * Finally, try the language and script. This is our last chance, 1115 * since trying with all three subtags would only yield the 1116 * maximal version that we already have. 1117 **/ 1118 if (scriptLength > 0) { 1119 icu::CharString tagBuffer; 1120 { 1121 icu::CharStringByteSink tagSink(&tagBuffer); 1122 createLikelySubtagsString( 1123 lang, 1124 langLength, 1125 script, 1126 scriptLength, 1127 NULL, 1128 0, 1129 NULL, 1130 0, 1131 tagSink, 1132 err); 1133 } 1134 1135 if(U_FAILURE(*err)) { 1136 goto error; 1137 } 1138 else if (!tagBuffer.isEmpty() && 1139 uprv_strnicmp( 1140 maximizedTagBuffer.data(), 1141 tagBuffer.data(), 1142 tagBuffer.length()) == 0) { 1143 1144 createTagString( 1145 lang, 1146 langLength, 1147 script, 1148 scriptLength, 1149 NULL, 1150 0, 1151 trailing, 1152 trailingLength, 1153 sink, 1154 err); 1155 return; 1156 } 1157 } 1158 1159 { 1160 /** 1161 * If we got here, return the max + trail. 1162 **/ 1163 createTagString( 1164 lang, 1165 langLength, 1166 script, 1167 scriptLength, 1168 region, 1169 regionLength, 1170 trailing, 1171 trailingLength, 1172 sink, 1173 err); 1174 return; 1175 } 1176 1177error: 1178 1179 if (!U_FAILURE(*err)) { 1180 *err = U_ILLEGAL_ARGUMENT_ERROR; 1181 } 1182} 1183 1184static int32_t 1185do_canonicalize(const char* localeID, 1186 char* buffer, 1187 int32_t bufferCapacity, 1188 UErrorCode* err) 1189{ 1190 int32_t canonicalizedSize = uloc_canonicalize( 1191 localeID, 1192 buffer, 1193 bufferCapacity, 1194 err); 1195 1196 if (*err == U_STRING_NOT_TERMINATED_WARNING || 1197 *err == U_BUFFER_OVERFLOW_ERROR) { 1198 return canonicalizedSize; 1199 } 1200 else if (U_FAILURE(*err)) { 1201 1202 return -1; 1203 } 1204 else { 1205 return canonicalizedSize; 1206 } 1207} 1208 1209U_CAPI int32_t U_EXPORT2 1210uloc_addLikelySubtags(const char* localeID, 1211 char* maximizedLocaleID, 1212 int32_t maximizedLocaleIDCapacity, 1213 UErrorCode* status) { 1214 if (U_FAILURE(*status)) { 1215 return 0; 1216 } 1217 1218 icu::CheckedArrayByteSink sink( 1219 maximizedLocaleID, maximizedLocaleIDCapacity); 1220 1221 ulocimp_addLikelySubtags(localeID, sink, status); 1222 int32_t reslen = sink.NumberOfBytesAppended(); 1223 1224 if (U_FAILURE(*status)) { 1225 return sink.Overflowed() ? reslen : -1; 1226 } 1227 1228 if (sink.Overflowed()) { 1229 *status = U_BUFFER_OVERFLOW_ERROR; 1230 } else { 1231 u_terminateChars( 1232 maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status); 1233 } 1234 1235 return reslen; 1236} 1237 1238static UBool 1239_ulocimp_addLikelySubtags(const char* localeID, 1240 icu::ByteSink& sink, 1241 UErrorCode* status) { 1242 PreflightingLocaleIDBuffer localeBuffer; 1243 do { 1244 localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(), 1245 localeBuffer.getCapacity(), status); 1246 } while (localeBuffer.needToTryAgain(status)); 1247 1248 if (U_SUCCESS(*status)) { 1249 return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status); 1250 } else { 1251 return false; 1252 } 1253} 1254 1255U_CAPI void U_EXPORT2 1256ulocimp_addLikelySubtags(const char* localeID, 1257 icu::ByteSink& sink, 1258 UErrorCode* status) { 1259 _ulocimp_addLikelySubtags(localeID, sink, status); 1260} 1261 1262U_CAPI int32_t U_EXPORT2 1263uloc_minimizeSubtags(const char* localeID, 1264 char* minimizedLocaleID, 1265 int32_t minimizedLocaleIDCapacity, 1266 UErrorCode* status) { 1267 if (U_FAILURE(*status)) { 1268 return 0; 1269 } 1270 1271 icu::CheckedArrayByteSink sink( 1272 minimizedLocaleID, minimizedLocaleIDCapacity); 1273 1274 ulocimp_minimizeSubtags(localeID, sink, status); 1275 int32_t reslen = sink.NumberOfBytesAppended(); 1276 1277 if (U_FAILURE(*status)) { 1278 return sink.Overflowed() ? reslen : -1; 1279 } 1280 1281 if (sink.Overflowed()) { 1282 *status = U_BUFFER_OVERFLOW_ERROR; 1283 } else { 1284 u_terminateChars( 1285 minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status); 1286 } 1287 1288 return reslen; 1289} 1290 1291U_CAPI void U_EXPORT2 1292ulocimp_minimizeSubtags(const char* localeID, 1293 icu::ByteSink& sink, 1294 UErrorCode* status) { 1295 PreflightingLocaleIDBuffer localeBuffer; 1296 do { 1297 localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(), 1298 localeBuffer.getCapacity(), status); 1299 } while (localeBuffer.needToTryAgain(status)); 1300 1301 _uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status); 1302} 1303 1304// Pairs of (language subtag, + or -) for finding out fast if common languages 1305// are LTR (minus) or RTL (plus). 1306static const char LANG_DIR_STRING[] = 1307 "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-"; 1308 1309// Implemented here because this calls ulocimp_addLikelySubtags(). 1310U_CAPI UBool U_EXPORT2 1311uloc_isRightToLeft(const char *locale) { 1312 UErrorCode errorCode = U_ZERO_ERROR; 1313 char script[8]; 1314 int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode); 1315 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || 1316 scriptLength == 0) { 1317 // Fastpath: We know the likely scripts and their writing direction 1318 // for some common languages. 1319 errorCode = U_ZERO_ERROR; 1320 char lang[8]; 1321 int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode); 1322 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) { 1323 return false; 1324 } 1325 if (langLength > 0) { 1326 const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang); 1327 if (langPtr != NULL) { 1328 switch (langPtr[langLength]) { 1329 case '-': return false; 1330 case '+': return true; 1331 default: break; // partial match of a longer code 1332 } 1333 } 1334 } 1335 // Otherwise, find the likely script. 1336 errorCode = U_ZERO_ERROR; 1337 icu::CharString likely; 1338 { 1339 icu::CharStringByteSink sink(&likely); 1340 ulocimp_addLikelySubtags(locale, sink, &errorCode); 1341 } 1342 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) { 1343 return false; 1344 } 1345 scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode); 1346 if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || 1347 scriptLength == 0) { 1348 return false; 1349 } 1350 } 1351 UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); 1352 return uscript_isRightToLeft(scriptCode); 1353} 1354 1355U_NAMESPACE_BEGIN 1356 1357UBool 1358Locale::isRightToLeft() const { 1359 return uloc_isRightToLeft(getBaseName()); 1360} 1361 1362U_NAMESPACE_END 1363 1364// The following must at least allow for rg key value (6) plus terminator (1). 1365#define ULOC_RG_BUFLEN 8 1366 1367U_CAPI int32_t U_EXPORT2 1368ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, 1369 char *region, int32_t regionCapacity, UErrorCode* status) { 1370 if (U_FAILURE(*status)) { 1371 return 0; 1372 } 1373 char rgBuf[ULOC_RG_BUFLEN]; 1374 UErrorCode rgStatus = U_ZERO_ERROR; 1375 1376 // First check for rg keyword value 1377 int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus); 1378 if (U_FAILURE(rgStatus) || rgLen != 6) { 1379 rgLen = 0; 1380 } else { 1381 // rgBuf guaranteed to be zero terminated here, with text len 6 1382 char *rgPtr = rgBuf; 1383 for (; *rgPtr!= 0; rgPtr++) { 1384 *rgPtr = uprv_toupper(*rgPtr); 1385 } 1386 rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0; 1387 } 1388 1389 if (rgLen == 0) { 1390 // No valid rg keyword value, try for unicode_region_subtag 1391 rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status); 1392 if (U_FAILURE(*status)) { 1393 rgLen = 0; 1394 } else if (rgLen == 0 && inferRegion) { 1395 // no unicode_region_subtag but inferRegion true, try likely subtags 1396 rgStatus = U_ZERO_ERROR; 1397 icu::CharString locBuf; 1398 { 1399 icu::CharStringByteSink sink(&locBuf); 1400 ulocimp_addLikelySubtags(localeID, sink, &rgStatus); 1401 } 1402 if (U_SUCCESS(rgStatus)) { 1403 rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status); 1404 if (U_FAILURE(*status)) { 1405 rgLen = 0; 1406 } 1407 } 1408 } 1409 } 1410 1411 rgBuf[rgLen] = 0; 1412 uprv_strncpy(region, rgBuf, regionCapacity); 1413 return u_terminateChars(region, regionCapacity, rgLen, status); 1414} 1415 1416