12e5b6d6dSopenharmony_ci// © 2017 and later: Unicode, Inc. and others. 22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 32e5b6d6dSopenharmony_ci/* 42e5b6d6dSopenharmony_ci******************************************************************************* 52e5b6d6dSopenharmony_ci* 62e5b6d6dSopenharmony_ci* Copyright (C) 2003, International Business Machines 72e5b6d6dSopenharmony_ci* Corporation and others. All Rights Reserved. 82e5b6d6dSopenharmony_ci* 92e5b6d6dSopenharmony_ci******************************************************************************* 102e5b6d6dSopenharmony_ci* 112e5b6d6dSopenharmony_ci* File colprobe.cpp 122e5b6d6dSopenharmony_ci* 132e5b6d6dSopenharmony_ci* Modification History: 142e5b6d6dSopenharmony_ci* 152e5b6d6dSopenharmony_ci* Date Name Description 162e5b6d6dSopenharmony_ci* 03/18/2003 weiv Creation. 172e5b6d6dSopenharmony_ci******************************************************************************* 182e5b6d6dSopenharmony_ci*/ 192e5b6d6dSopenharmony_ci 202e5b6d6dSopenharmony_ci#include "uoptions.h" 212e5b6d6dSopenharmony_ci#include "unicode/ucol.h" 222e5b6d6dSopenharmony_ci#include "unicode/ucoleitr.h" 232e5b6d6dSopenharmony_ci#include "unicode/ures.h" 242e5b6d6dSopenharmony_ci#include "unicode/uniset.h" 252e5b6d6dSopenharmony_ci#include "unicode/usetiter.h" 262e5b6d6dSopenharmony_ci#include "unicode/ustring.h" 272e5b6d6dSopenharmony_ci#include "unicode/uchar.h" 282e5b6d6dSopenharmony_ci#include "unicode/uscript.h" 292e5b6d6dSopenharmony_ci#include "unicode/locid.h" 302e5b6d6dSopenharmony_ci#include "unicode/ucnv.h" 312e5b6d6dSopenharmony_ci#include "uprops.h" 322e5b6d6dSopenharmony_ci#include "hash.h" 332e5b6d6dSopenharmony_ci#include "ucol_imp.h" 342e5b6d6dSopenharmony_ci 352e5b6d6dSopenharmony_ci#include "unicode/ustdio.h" 362e5b6d6dSopenharmony_ci#include "unicode/utrans.h" 372e5b6d6dSopenharmony_ci 382e5b6d6dSopenharmony_ci#include <stdio.h> 392e5b6d6dSopenharmony_ci#include <stdlib.h> 402e5b6d6dSopenharmony_ci#include <string.h> 412e5b6d6dSopenharmony_ci#include <fcntl.h> 422e5b6d6dSopenharmony_ci 432e5b6d6dSopenharmony_ci// unix tolower 442e5b6d6dSopenharmony_ci#include <ctype.h> 452e5b6d6dSopenharmony_ci// unix setlocale 462e5b6d6dSopenharmony_ci#include <locale.h> 472e5b6d6dSopenharmony_ci 482e5b6d6dSopenharmony_ci#include "colprobe.h" 492e5b6d6dSopenharmony_ci 502e5b6d6dSopenharmony_ci#include "line.h" 512e5b6d6dSopenharmony_ci#include "sortedlines.h" 522e5b6d6dSopenharmony_ci#include "strengthprobe.h" 532e5b6d6dSopenharmony_ci 542e5b6d6dSopenharmony_civoid testWin(StrengthProbe &probe, UErrorCode &status) ; 552e5b6d6dSopenharmony_ci 562e5b6d6dSopenharmony_ci#if defined WIN32 572e5b6d6dSopenharmony_ci#include <io.h> 582e5b6d6dSopenharmony_ci#include <windows.h> 592e5b6d6dSopenharmony_ci#include <sys/types.h> 602e5b6d6dSopenharmony_ci#include <sys/stat.h> 612e5b6d6dSopenharmony_ci#include <direct.h> 622e5b6d6dSopenharmony_ci 632e5b6d6dSopenharmony_ciint createDir(const char* dirName) { 642e5b6d6dSopenharmony_ci struct _stat myStat; 652e5b6d6dSopenharmony_ci int result = _stat(dirName, &myStat); 662e5b6d6dSopenharmony_ci 672e5b6d6dSopenharmony_ci if(result == -1) { 682e5b6d6dSopenharmony_ci result = _mkdir(dirName); 692e5b6d6dSopenharmony_ci return result; 702e5b6d6dSopenharmony_ci } else if(myStat.st_mode & _S_IFDIR) { 712e5b6d6dSopenharmony_ci return 0; 722e5b6d6dSopenharmony_ci } else { 732e5b6d6dSopenharmony_ci return 1; 742e5b6d6dSopenharmony_ci } 752e5b6d6dSopenharmony_ci} 762e5b6d6dSopenharmony_ci 772e5b6d6dSopenharmony_ci//#elif defined POSIX 782e5b6d6dSopenharmony_ci#else 792e5b6d6dSopenharmony_ci#include <sys/stat.h> 802e5b6d6dSopenharmony_ci#include <unistd.h> 812e5b6d6dSopenharmony_ci 822e5b6d6dSopenharmony_ciint createDir(const char* dirName) { 832e5b6d6dSopenharmony_ci struct stat myStat; 842e5b6d6dSopenharmony_ci int result = stat(dirName, &myStat); 852e5b6d6dSopenharmony_ci 862e5b6d6dSopenharmony_ci if(result == -1) { 872e5b6d6dSopenharmony_ci result = mkdir(dirName, S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IWOTH|S_IXOTH); 882e5b6d6dSopenharmony_ci return result; 892e5b6d6dSopenharmony_ci } else if(S_ISDIR(myStat.st_mode)) { 902e5b6d6dSopenharmony_ci return 0; 912e5b6d6dSopenharmony_ci } else { 922e5b6d6dSopenharmony_ci return 1; 932e5b6d6dSopenharmony_ci } 942e5b6d6dSopenharmony_ci} 952e5b6d6dSopenharmony_ci// 962e5b6d6dSopenharmony_ci// Stubs for Windows API functions when building on UNIXes. 972e5b6d6dSopenharmony_ci// 982e5b6d6dSopenharmony_citypedef int DWORD; 992e5b6d6dSopenharmony_ciinline int CompareStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;}; 1002e5b6d6dSopenharmony_ci//#else 1012e5b6d6dSopenharmony_ci//#error "Not POSIX or Windows. Won't work." 1022e5b6d6dSopenharmony_ci#endif 1032e5b6d6dSopenharmony_ci 1042e5b6d6dSopenharmony_ci#include "line.h" 1052e5b6d6dSopenharmony_ci 1062e5b6d6dSopenharmony_cistatic UBool gVerbose = false; 1072e5b6d6dSopenharmony_cistatic UBool gDebug = false; 1082e5b6d6dSopenharmony_cistatic UBool gQuiet = false; 1092e5b6d6dSopenharmony_cistatic UBool gExemplar = false; 1102e5b6d6dSopenharmony_ci 1112e5b6d6dSopenharmony_ciDWORD gWinLCID; 1122e5b6d6dSopenharmony_ciint gCount; 1132e5b6d6dSopenharmony_ciUCollator *gCol; 1142e5b6d6dSopenharmony_ciUCollator *gUCA; 1152e5b6d6dSopenharmony_ciUConverter *utf8cnv; 1162e5b6d6dSopenharmony_ciCompareFn gComparer; 1172e5b6d6dSopenharmony_ciint gRefNum; 1182e5b6d6dSopenharmony_ciUnicodeSet gExcludeSet; 1192e5b6d6dSopenharmony_ciUnicodeSet gRepertoire; 1202e5b6d6dSopenharmony_ci 1212e5b6d6dSopenharmony_ciconst UChar separatorChar = 0x0030; 1222e5b6d6dSopenharmony_ci 1232e5b6d6dSopenharmony_ciUPrinter *logger; 1242e5b6d6dSopenharmony_ciUPrinter *debug; 1252e5b6d6dSopenharmony_ciUPrinter *tailoringBundle; 1262e5b6d6dSopenharmony_ciUPrinter *referenceBundle; 1272e5b6d6dSopenharmony_ciUPrinter *bundle; 1282e5b6d6dSopenharmony_ciFILE *fTailoringDump; 1292e5b6d6dSopenharmony_ciFILE *fDefaultDump; 1302e5b6d6dSopenharmony_ci 1312e5b6d6dSopenharmony_ciconst char *progName = "colprobe"; 1322e5b6d6dSopenharmony_ci 1332e5b6d6dSopenharmony_ciconst char *gLocale = NULL; 1342e5b6d6dSopenharmony_ciint32_t platformIndex = -1; 1352e5b6d6dSopenharmony_ciint32_t gPlatformNo = 0; 1362e5b6d6dSopenharmony_ciint32_t gPlatformIndexes[10]; 1372e5b6d6dSopenharmony_ciint32_t gLocaleNo = 0; 1382e5b6d6dSopenharmony_ciconst char* gLocales[100]; 1392e5b6d6dSopenharmony_ciUBool gRulesStdin = false; 1402e5b6d6dSopenharmony_ciconst char *outputFormat = "HTML"; 1412e5b6d6dSopenharmony_ciconst char *outExtension = "html"; 1422e5b6d6dSopenharmony_ci 1432e5b6d6dSopenharmony_cienum { 1442e5b6d6dSopenharmony_ci HELP1, 1452e5b6d6dSopenharmony_ci HELP2, 1462e5b6d6dSopenharmony_ci VERBOSE, 1472e5b6d6dSopenharmony_ci QUIET, 1482e5b6d6dSopenharmony_ci VERSION, 1492e5b6d6dSopenharmony_ci ICUDATADIR, 1502e5b6d6dSopenharmony_ci COPYRIGHT, 1512e5b6d6dSopenharmony_ci LOCALE, 1522e5b6d6dSopenharmony_ci PLATFORM, 1532e5b6d6dSopenharmony_ci DEBUG, 1542e5b6d6dSopenharmony_ci EXEMPLAR, 1552e5b6d6dSopenharmony_ci RULESSTDIN, 1562e5b6d6dSopenharmony_ci REFERENCE, 1572e5b6d6dSopenharmony_ci EXCLUDESET, 1582e5b6d6dSopenharmony_ci REPERTOIRE, 1592e5b6d6dSopenharmony_ci INTERACTIVE, 1602e5b6d6dSopenharmony_ci PRINTREF, 1612e5b6d6dSopenharmony_ci DIFF, 1622e5b6d6dSopenharmony_ci OUTPUT 1632e5b6d6dSopenharmony_ci}; 1642e5b6d6dSopenharmony_ci 1652e5b6d6dSopenharmony_ciUOption options[]={ 1662e5b6d6dSopenharmony_ci /*0*/ UOPTION_HELP_H, 1672e5b6d6dSopenharmony_ci /*1*/ UOPTION_HELP_QUESTION_MARK, 1682e5b6d6dSopenharmony_ci /*2*/ UOPTION_VERBOSE, 1692e5b6d6dSopenharmony_ci /*3*/ UOPTION_QUIET, 1702e5b6d6dSopenharmony_ci /*4*/ UOPTION_VERSION, 1712e5b6d6dSopenharmony_ci /*5*/ UOPTION_ICUDATADIR, 1722e5b6d6dSopenharmony_ci /*6*/ UOPTION_COPYRIGHT, 1732e5b6d6dSopenharmony_ci /*7*/ UOPTION_DEF("locale", 'l', UOPT_REQUIRES_ARG), 1742e5b6d6dSopenharmony_ci /*8*/ UOPTION_DEF("platform", 'p', UOPT_REQUIRES_ARG), 1752e5b6d6dSopenharmony_ci /*9*/ UOPTION_DEF("debug", 'D', UOPT_NO_ARG), 1762e5b6d6dSopenharmony_ci /*10*/ UOPTION_DEF("exemplar", 'E', UOPT_NO_ARG), 1772e5b6d6dSopenharmony_ci /*11*/ UOPTION_DEF("rulesstdin", 'R', UOPT_NO_ARG), 1782e5b6d6dSopenharmony_ci /*12*/ UOPTION_DEF("ref", 'c', UOPT_REQUIRES_ARG), 1792e5b6d6dSopenharmony_ci /*13*/ UOPTION_DEF("excludeset", 'x', UOPT_REQUIRES_ARG), 1802e5b6d6dSopenharmony_ci /*14*/ UOPTION_DEF("repertoire", 't', UOPT_REQUIRES_ARG), 1812e5b6d6dSopenharmony_ci /*15*/ UOPTION_DEF("interactive", 'I', UOPT_NO_ARG), 1822e5b6d6dSopenharmony_ci /*16*/ UOPTION_DEF("printref", 0, UOPT_NO_ARG), 1832e5b6d6dSopenharmony_ci /*17*/ UOPTION_DEF("diff", 0, UOPT_NO_ARG), 1842e5b6d6dSopenharmony_ci /*18*/ UOPTION_DEF("output", 0, UOPT_REQUIRES_ARG) 1852e5b6d6dSopenharmony_ci}; 1862e5b6d6dSopenharmony_ci 1872e5b6d6dSopenharmony_ciUChar compA[256]; 1882e5b6d6dSopenharmony_ciUChar compB[256]; 1892e5b6d6dSopenharmony_ciint32_t compALen = 0; 1902e5b6d6dSopenharmony_ciint32_t compBLen = 0; 1912e5b6d6dSopenharmony_ci 1922e5b6d6dSopenharmony_cichar compUTF8A[256]; 1932e5b6d6dSopenharmony_cichar compUTF8B[256]; 1942e5b6d6dSopenharmony_ciint32_t compUTF8ALen = 0; 1952e5b6d6dSopenharmony_ciint32_t compUTF8BLen = 0; 1962e5b6d6dSopenharmony_ci 1972e5b6d6dSopenharmony_ciint UNIXstrcmp(const void *a, const void *b) { 1982e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 1992e5b6d6dSopenharmony_ci gCount++; 2002e5b6d6dSopenharmony_ci int t; 2012e5b6d6dSopenharmony_ci compALen = unorm_normalize((*(Line **)a)->name, (*(Line **)a)->len, UNORM_NFC, 0, compA, 256, &status); 2022e5b6d6dSopenharmony_ci compBLen = unorm_normalize((*(Line **)b)->name, (*(Line **)b)->len, UNORM_NFC, 0, compB, 256, &status); 2032e5b6d6dSopenharmony_ci compUTF8ALen = ucnv_fromUChars(utf8cnv, compUTF8A, 256, compA, compALen, &status); 2042e5b6d6dSopenharmony_ci compUTF8A[compUTF8ALen] = 0; 2052e5b6d6dSopenharmony_ci compUTF8BLen = ucnv_fromUChars(utf8cnv, compUTF8B, 256, compB, compBLen, &status); 2062e5b6d6dSopenharmony_ci compUTF8B[compUTF8BLen] = 0; 2072e5b6d6dSopenharmony_ci t = strcoll(compUTF8A, compUTF8B); 2082e5b6d6dSopenharmony_ci return t; 2092e5b6d6dSopenharmony_ci} 2102e5b6d6dSopenharmony_ci 2112e5b6d6dSopenharmony_ciint UNIXgetSortKey(const UChar *string, int32_t len, uint8_t *buffer, int32_t buffCapacity) { 2122e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 2132e5b6d6dSopenharmony_ci compALen = unorm_normalize(string, len, UNORM_NFC, 0, compA, 256, &status); 2142e5b6d6dSopenharmony_ci compUTF8ALen = ucnv_fromUChars(utf8cnv, compUTF8A, 256, compA, compALen, &status); 2152e5b6d6dSopenharmony_ci compUTF8A[compUTF8ALen] = 0; 2162e5b6d6dSopenharmony_ci return (strxfrm((char *)buffer, compUTF8A, buffCapacity)+1); 2172e5b6d6dSopenharmony_ci} 2182e5b6d6dSopenharmony_ci 2192e5b6d6dSopenharmony_ci#ifdef WIN32 2202e5b6d6dSopenharmony_ciint Winstrcmp(const void *a, const void *b) { 2212e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 2222e5b6d6dSopenharmony_ci gCount++; 2232e5b6d6dSopenharmony_ci int t; 2242e5b6d6dSopenharmony_ci //compALen = unorm_compose(compA, 256, (*(Line **)a)->name, (*(Line **)a)->len, false, 0, &status); 2252e5b6d6dSopenharmony_ci //compBLen = unorm_compose(compB, 256, (*(Line **)b)->name, (*(Line **)b)->len, false, 0, &status); 2262e5b6d6dSopenharmony_ci compALen = unorm_normalize((*(Line **)a)->name, (*(Line **)a)->len, UNORM_NFC, 0, compA, 256, &status); 2272e5b6d6dSopenharmony_ci compBLen = unorm_normalize((*(Line **)b)->name, (*(Line **)b)->len, UNORM_NFC, 0, compB, 256, &status); 2282e5b6d6dSopenharmony_ci t = CompareStringW(gWinLCID, SORT_STRINGSORT, //0, 2292e5b6d6dSopenharmony_ci compA, compALen, 2302e5b6d6dSopenharmony_ci compB, compBLen); 2312e5b6d6dSopenharmony_ci 2322e5b6d6dSopenharmony_ci/* 2332e5b6d6dSopenharmony_ci t = CompareStringW(gWinLCID, 0, 2342e5b6d6dSopenharmony_ci (*(Line **)a)->name, (*(Line **)a)->len, 2352e5b6d6dSopenharmony_ci (*(Line **)b)->name, (*(Line **)b)->len); 2362e5b6d6dSopenharmony_ci*/ 2372e5b6d6dSopenharmony_ci return t-2; 2382e5b6d6dSopenharmony_ci} 2392e5b6d6dSopenharmony_ci 2402e5b6d6dSopenharmony_ciint WingetSortKey(const UChar *string, int32_t len, uint8_t *buffer, int32_t buffCapacity) { 2412e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 2422e5b6d6dSopenharmony_ci compALen = unorm_normalize(string, len, UNORM_NFC, 0, compA, 256, &status); 2432e5b6d6dSopenharmony_ci return LCMapStringW(gWinLCID, LCMAP_SORTKEY | SORT_STRINGSORT, compA, compALen, (unsigned short *)buffer, buffCapacity); 2442e5b6d6dSopenharmony_ci} 2452e5b6d6dSopenharmony_ci 2462e5b6d6dSopenharmony_ci#if 0 2472e5b6d6dSopenharmony_ciint Winstrcmp(const void *a, const void *b) { 2482e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 2492e5b6d6dSopenharmony_ci uint8_t b1[256], b2[256]; 2502e5b6d6dSopenharmony_ci int32_t b1Len, b2Len; 2512e5b6d6dSopenharmony_ci b1Len = WingetSortKey((*(Line **)a)->name, (*(Line **)a)->len, b1, 256); 2522e5b6d6dSopenharmony_ci b2Len = WingetSortKey((*(Line **)b)->name, (*(Line **)b)->len, b2, 256); 2532e5b6d6dSopenharmony_ci 2542e5b6d6dSopenharmony_ci b1[b1Len] = 0; 2552e5b6d6dSopenharmony_ci b2[b2Len] = 0; 2562e5b6d6dSopenharmony_ci 2572e5b6d6dSopenharmony_ci return strcmp((const char *)b1, (const char *)b2); 2582e5b6d6dSopenharmony_ci} 2592e5b6d6dSopenharmony_ci#endif 2602e5b6d6dSopenharmony_ci 2612e5b6d6dSopenharmony_ci#else 2622e5b6d6dSopenharmony_ciint Winstrcmp(const void *a, const void *b) { 2632e5b6d6dSopenharmony_ci if(a == b); 2642e5b6d6dSopenharmony_ci return 0; 2652e5b6d6dSopenharmony_ci} 2662e5b6d6dSopenharmony_ciint WingetSortKey(const UChar *, int32_t , uint8_t *, int32_t ) { 2672e5b6d6dSopenharmony_ci return 0; 2682e5b6d6dSopenharmony_ci} 2692e5b6d6dSopenharmony_ci#endif 2702e5b6d6dSopenharmony_ci 2712e5b6d6dSopenharmony_ciint ICUstrcmp(const void *a, const void *b) { 2722e5b6d6dSopenharmony_ci gCount++; 2732e5b6d6dSopenharmony_ci UCollationResult t; 2742e5b6d6dSopenharmony_ci t = ucol_strcoll(gCol, 2752e5b6d6dSopenharmony_ci (*(Line **)a)->name, (*(Line **)a)->len, 2762e5b6d6dSopenharmony_ci (*(Line **)b)->name, (*(Line **)b)->len); 2772e5b6d6dSopenharmony_ci if (t == UCOL_LESS) return -1; 2782e5b6d6dSopenharmony_ci if (t == UCOL_GREATER) return +1; 2792e5b6d6dSopenharmony_ci return 0; 2802e5b6d6dSopenharmony_ci} 2812e5b6d6dSopenharmony_ci 2822e5b6d6dSopenharmony_ciint ICUgetSortKey(const UChar *string, int32_t len, uint8_t *buffer, int32_t buffCapacity) { 2832e5b6d6dSopenharmony_ci return ucol_getSortKey(gCol, string, len, buffer, buffCapacity); 2842e5b6d6dSopenharmony_ci} 2852e5b6d6dSopenharmony_ci 2862e5b6d6dSopenharmony_cistruct { 2872e5b6d6dSopenharmony_ci const char* name; 2882e5b6d6dSopenharmony_ci CompareFn comparer; 2892e5b6d6dSopenharmony_ci GetSortKeyFn skgetter; 2902e5b6d6dSopenharmony_ci} platforms[] = { 2912e5b6d6dSopenharmony_ci { "icu", ICUstrcmp, ICUgetSortKey }, 2922e5b6d6dSopenharmony_ci { "w2k", Winstrcmp, WingetSortKey}, 2932e5b6d6dSopenharmony_ci { "winxp", Winstrcmp, WingetSortKey}, 2942e5b6d6dSopenharmony_ci { "aix", UNIXstrcmp, UNIXgetSortKey}, 2952e5b6d6dSopenharmony_ci { "linux", UNIXstrcmp, UNIXgetSortKey} 2962e5b6d6dSopenharmony_ci}; 2972e5b6d6dSopenharmony_ci 2982e5b6d6dSopenharmony_ci 2992e5b6d6dSopenharmony_civoid stringToLower(char *string) { 3002e5b6d6dSopenharmony_ci uint32_t i = 0; 3012e5b6d6dSopenharmony_ci for(i = 0; i < strlen(string); i++) { 3022e5b6d6dSopenharmony_ci string[i] = tolower(string[i]); 3032e5b6d6dSopenharmony_ci } 3042e5b6d6dSopenharmony_ci} 3052e5b6d6dSopenharmony_ci 3062e5b6d6dSopenharmony_civoid usage(const char *name) { 3072e5b6d6dSopenharmony_ci logger->log("Usage: %s --locale loc_name --platform platform\n", name); 3082e5b6d6dSopenharmony_ci} 3092e5b6d6dSopenharmony_ci 3102e5b6d6dSopenharmony_civoid listKnownPlatforms() { 3112e5b6d6dSopenharmony_ci uint32_t i = 0; 3122e5b6d6dSopenharmony_ci logger->log("Known platforms:\n"); 3132e5b6d6dSopenharmony_ci for(i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) { 3142e5b6d6dSopenharmony_ci logger->log("\t%s\n", platforms[i]); 3152e5b6d6dSopenharmony_ci } 3162e5b6d6dSopenharmony_ci} 3172e5b6d6dSopenharmony_ci 3182e5b6d6dSopenharmony_civoid addPlatform(const char *platform) { 3192e5b6d6dSopenharmony_ci uint32_t i; 3202e5b6d6dSopenharmony_ci //stringToLower(platform); 3212e5b6d6dSopenharmony_ci int32_t oldPlatformNo = gPlatformNo; 3222e5b6d6dSopenharmony_ci 3232e5b6d6dSopenharmony_ci for(i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) { 3242e5b6d6dSopenharmony_ci if(strcmp(platform, platforms[i].name) == 0) { 3252e5b6d6dSopenharmony_ci gPlatformIndexes[gPlatformNo++] = i; 3262e5b6d6dSopenharmony_ci } 3272e5b6d6dSopenharmony_ci } 3282e5b6d6dSopenharmony_ci if(gPlatformNo == oldPlatformNo) { 3292e5b6d6dSopenharmony_ci logger->log("Unknown platform %s\n", platform); 3302e5b6d6dSopenharmony_ci listKnownPlatforms(); 3312e5b6d6dSopenharmony_ci } 3322e5b6d6dSopenharmony_ci} 3332e5b6d6dSopenharmony_ci 3342e5b6d6dSopenharmony_civoid processArgs(int argc, char* argv[], UErrorCode &status) 3352e5b6d6dSopenharmony_ci{ 3362e5b6d6dSopenharmony_ci int32_t i = 0; 3372e5b6d6dSopenharmony_ci U_MAIN_INIT_ARGS(argc, argv); 3382e5b6d6dSopenharmony_ci 3392e5b6d6dSopenharmony_ci argc = u_parseArgs(argc, argv, (int32_t)(sizeof(options)/sizeof(options[0])), options); 3402e5b6d6dSopenharmony_ci 3412e5b6d6dSopenharmony_ci if(argc < 0) { 3422e5b6d6dSopenharmony_ci logger->log("Unknown option: %s\n", argv[-argc]); 3432e5b6d6dSopenharmony_ci usage(progName); 3442e5b6d6dSopenharmony_ci return; 3452e5b6d6dSopenharmony_ci } 3462e5b6d6dSopenharmony_ci 3472e5b6d6dSopenharmony_ci if(options[0].doesOccur || options[1].doesOccur) { 3482e5b6d6dSopenharmony_ci usage(progName); 3492e5b6d6dSopenharmony_ci return; 3502e5b6d6dSopenharmony_ci } 3512e5b6d6dSopenharmony_ci if(options[VERBOSE].doesOccur) { 3522e5b6d6dSopenharmony_ci gVerbose = true; 3532e5b6d6dSopenharmony_ci } 3542e5b6d6dSopenharmony_ci if(options[DEBUG].doesOccur) { 3552e5b6d6dSopenharmony_ci gDebug = true; 3562e5b6d6dSopenharmony_ci gVerbose = true; 3572e5b6d6dSopenharmony_ci } 3582e5b6d6dSopenharmony_ci if(options[EXEMPLAR].doesOccur) { 3592e5b6d6dSopenharmony_ci gExemplar = true; 3602e5b6d6dSopenharmony_ci } 3612e5b6d6dSopenharmony_ci if(options[QUIET].doesOccur) { 3622e5b6d6dSopenharmony_ci gQuiet = true; 3632e5b6d6dSopenharmony_ci } 3642e5b6d6dSopenharmony_ci 3652e5b6d6dSopenharmony_ci // ASCII based options specified on the command line 3662e5b6d6dSopenharmony_ci // this is for testing purposes, will allow to load 3672e5b6d6dSopenharmony_ci // up ICU rules and then poke through them. 3682e5b6d6dSopenharmony_ci // In that case, we test only ICU and don't need 3692e5b6d6dSopenharmony_ci // a locale. 3702e5b6d6dSopenharmony_ci if(options[RULESSTDIN].doesOccur) { 3712e5b6d6dSopenharmony_ci gRulesStdin = true; 3722e5b6d6dSopenharmony_ci addPlatform("icu"); 3732e5b6d6dSopenharmony_ci return; 3742e5b6d6dSopenharmony_ci } 3752e5b6d6dSopenharmony_ci 3762e5b6d6dSopenharmony_ci if(options[LOCALE].doesOccur) { 3772e5b6d6dSopenharmony_ci gLocale = options[LOCALE].value; 3782e5b6d6dSopenharmony_ci } else { 3792e5b6d6dSopenharmony_ci gLocale = argv[1]; 3802e5b6d6dSopenharmony_ci //for(i = 1; i < argc; i++) { 3812e5b6d6dSopenharmony_ci //gLocales[gLocaleNo++] = argv[i]; 3822e5b6d6dSopenharmony_ci //} 3832e5b6d6dSopenharmony_ci } 3842e5b6d6dSopenharmony_ci 3852e5b6d6dSopenharmony_ci if(options[PLATFORM].doesOccur) { 3862e5b6d6dSopenharmony_ci addPlatform(options[PLATFORM].value); 3872e5b6d6dSopenharmony_ci } else { // there is a list of platforms 3882e5b6d6dSopenharmony_ci addPlatform("icu"); 3892e5b6d6dSopenharmony_ci } 3902e5b6d6dSopenharmony_ci 3912e5b6d6dSopenharmony_ci if(options[REFERENCE].doesOccur) { 3922e5b6d6dSopenharmony_ci for(i = 0; i < (int32_t)(sizeof(platforms)/sizeof(platforms[0])); i++) { 3932e5b6d6dSopenharmony_ci if(strcmp(options[REFERENCE].value, platforms[i].name) == 0) { 3942e5b6d6dSopenharmony_ci gRefNum = i; 3952e5b6d6dSopenharmony_ci break; 3962e5b6d6dSopenharmony_ci } 3972e5b6d6dSopenharmony_ci } 3982e5b6d6dSopenharmony_ci if(i == sizeof(platforms)/sizeof(platforms[0])) { 3992e5b6d6dSopenharmony_ci logger->log("Unknown reference %s!\n", options[REFERENCE].value); 4002e5b6d6dSopenharmony_ci status = U_ILLEGAL_ARGUMENT_ERROR; 4012e5b6d6dSopenharmony_ci return; 4022e5b6d6dSopenharmony_ci } 4032e5b6d6dSopenharmony_ci } else { 4042e5b6d6dSopenharmony_ci gRefNum = 0; 4052e5b6d6dSopenharmony_ci } 4062e5b6d6dSopenharmony_ci 4072e5b6d6dSopenharmony_ci if(options[EXCLUDESET].doesOccur) { 4082e5b6d6dSopenharmony_ci gExcludeSet.applyPattern(UnicodeString(options[EXCLUDESET].value), status); 4092e5b6d6dSopenharmony_ci if(U_FAILURE(status)) { 4102e5b6d6dSopenharmony_ci logger->log("Cannot construct exclude set from argument %s. Error %s\n", options[EXCLUDESET].value, u_errorName(status)); 4112e5b6d6dSopenharmony_ci return; 4122e5b6d6dSopenharmony_ci } else { 4132e5b6d6dSopenharmony_ci UnicodeString pattern; 4142e5b6d6dSopenharmony_ci logger->log(gExcludeSet.toPattern(pattern, true), true); 4152e5b6d6dSopenharmony_ci } 4162e5b6d6dSopenharmony_ci } 4172e5b6d6dSopenharmony_ci 4182e5b6d6dSopenharmony_ci if(options[REPERTOIRE].doesOccur) { 4192e5b6d6dSopenharmony_ci gRepertoire.applyPattern(UnicodeString(options[REPERTOIRE].value), status); 4202e5b6d6dSopenharmony_ci if(U_FAILURE(status)) { 4212e5b6d6dSopenharmony_ci logger->log("Cannot construct repertoire from argument %s. Error %s\n", options[REPERTOIRE].value, u_errorName(status)); 4222e5b6d6dSopenharmony_ci return; 4232e5b6d6dSopenharmony_ci } 4242e5b6d6dSopenharmony_ci } 4252e5b6d6dSopenharmony_ci 4262e5b6d6dSopenharmony_ci if(options[OUTPUT].doesOccur) { 4272e5b6d6dSopenharmony_ci outputFormat = options[OUTPUT].value; 4282e5b6d6dSopenharmony_ci if(strcmp(outputFormat, "HTML") == 0) { 4292e5b6d6dSopenharmony_ci outExtension = "html"; 4302e5b6d6dSopenharmony_ci } else if(strcmp(outputFormat, "XML") == 0) { 4312e5b6d6dSopenharmony_ci outExtension = "xml"; 4322e5b6d6dSopenharmony_ci } else { 4332e5b6d6dSopenharmony_ci outExtension = "txt"; 4342e5b6d6dSopenharmony_ci } 4352e5b6d6dSopenharmony_ci } 4362e5b6d6dSopenharmony_ci 4372e5b6d6dSopenharmony_ci} 4382e5b6d6dSopenharmony_ci 4392e5b6d6dSopenharmony_ci// Check whether upper case comes before lower case or vice-versa 4402e5b6d6dSopenharmony_ciint32_t 4412e5b6d6dSopenharmony_cicheckCaseOrdering(void) { 4422e5b6d6dSopenharmony_ci UChar stuff[][3] = { 4432e5b6d6dSopenharmony_ci { 0x0061, separatorChar, 0x0061}, //"aa", 4442e5b6d6dSopenharmony_ci { 0x0061, separatorChar, 0x0041 }, //"a\\u00E0", 4452e5b6d6dSopenharmony_ci { 0x0041, separatorChar, 0x0061 }, //"\\u00E0a", 4462e5b6d6dSopenharmony_ci { 0x0041, separatorChar, 0x0041 }, //"\\u00E0a", 4472e5b6d6dSopenharmony_ci //{ 0x00E0, separatorChar, 0x00E0 } //"\\u00E0\\u00E0" 4482e5b6d6dSopenharmony_ci }; 4492e5b6d6dSopenharmony_ci const int32_t size = sizeof(stuff)/sizeof(stuff[0]); 4502e5b6d6dSopenharmony_ci 4512e5b6d6dSopenharmony_ci Line **sortedLines = new Line*[size]; 4522e5b6d6dSopenharmony_ci Line lines[size]; 4532e5b6d6dSopenharmony_ci 4542e5b6d6dSopenharmony_ci int32_t i = 0; 4552e5b6d6dSopenharmony_ci int32_t ordered = 0, reversed = 0; 4562e5b6d6dSopenharmony_ci 4572e5b6d6dSopenharmony_ci for(i = 0; i < size; i++) { 4582e5b6d6dSopenharmony_ci lines[i].setName(stuff[i], 3); 4592e5b6d6dSopenharmony_ci } 4602e5b6d6dSopenharmony_ci //setArray(sortedLines, lines, size); 4612e5b6d6dSopenharmony_ci qsort(sortedLines, size, sizeof(Line*), gComparer); 4622e5b6d6dSopenharmony_ci 4632e5b6d6dSopenharmony_ci for(i = 0; i < size; i++) { 4642e5b6d6dSopenharmony_ci if(*(sortedLines+i) == &lines[i]) { 4652e5b6d6dSopenharmony_ci ordered++; 4662e5b6d6dSopenharmony_ci } 4672e5b6d6dSopenharmony_ci if(*(sortedLines+i) == &lines[size-i-1]) { 4682e5b6d6dSopenharmony_ci reversed++; 4692e5b6d6dSopenharmony_ci } 4702e5b6d6dSopenharmony_ci } 4712e5b6d6dSopenharmony_ci 4722e5b6d6dSopenharmony_ci delete[] sortedLines; 4732e5b6d6dSopenharmony_ci if(ordered == size) { 4742e5b6d6dSopenharmony_ci return 0; // in normal order 4752e5b6d6dSopenharmony_ci } else if(reversed == size) { 4762e5b6d6dSopenharmony_ci return 1; // in reversed order 4772e5b6d6dSopenharmony_ci } else { 4782e5b6d6dSopenharmony_ci return -1; // unknown order 4792e5b6d6dSopenharmony_ci } 4802e5b6d6dSopenharmony_ci} 4812e5b6d6dSopenharmony_ci 4822e5b6d6dSopenharmony_civoid 4832e5b6d6dSopenharmony_cigetExemplars(const char *locale, UnicodeSet &exemplars, UErrorCode &status) { 4842e5b6d6dSopenharmony_ci // first we fill out structures with exemplar characters. 4852e5b6d6dSopenharmony_ci UResourceBundle *res = ures_open(NULL, locale, &status); 4862e5b6d6dSopenharmony_ci UnicodeString exemplarString = ures_getUnicodeStringByKey(res, "ExemplarCharacters", &status); 4872e5b6d6dSopenharmony_ci exemplars.clear(); 4882e5b6d6dSopenharmony_ci exemplars.applyPattern(exemplarString, status); 4892e5b6d6dSopenharmony_ci ures_close(res); 4902e5b6d6dSopenharmony_ci} 4912e5b6d6dSopenharmony_ci 4922e5b6d6dSopenharmony_ci 4932e5b6d6dSopenharmony_civoid 4942e5b6d6dSopenharmony_cigetFileNames(const char *name, char *tailoringName, char *tailoringDumpName, char *defaultName, char *defaultDumpName, char *diffName) { 4952e5b6d6dSopenharmony_ci if(tailoringName) { 4962e5b6d6dSopenharmony_ci strcpy(tailoringName, platforms[gPlatformIndexes[0]].name); 4972e5b6d6dSopenharmony_ci strcat(tailoringName, "/"); 4982e5b6d6dSopenharmony_ci strcat(tailoringName, name); 4992e5b6d6dSopenharmony_ci strcat(tailoringName, "_raw."); 5002e5b6d6dSopenharmony_ci strcat(tailoringName, outExtension); 5012e5b6d6dSopenharmony_ci } 5022e5b6d6dSopenharmony_ci if(tailoringDumpName) { 5032e5b6d6dSopenharmony_ci strcpy(tailoringDumpName, platforms[gPlatformIndexes[0]].name); 5042e5b6d6dSopenharmony_ci strcat(tailoringDumpName, "/"); 5052e5b6d6dSopenharmony_ci strcat(tailoringDumpName, name); 5062e5b6d6dSopenharmony_ci strcat(tailoringDumpName, ".dump"); 5072e5b6d6dSopenharmony_ci } 5082e5b6d6dSopenharmony_ci 5092e5b6d6dSopenharmony_ci if(diffName) { 5102e5b6d6dSopenharmony_ci strcpy(diffName, platforms[gPlatformIndexes[0]].name); 5112e5b6d6dSopenharmony_ci strcat(diffName, "/"); 5122e5b6d6dSopenharmony_ci strcat(diffName, name); 5132e5b6d6dSopenharmony_ci strcat(diffName, "_collation."); 5142e5b6d6dSopenharmony_ci strcat(diffName, outExtension); 5152e5b6d6dSopenharmony_ci } 5162e5b6d6dSopenharmony_ci 5172e5b6d6dSopenharmony_ci if(defaultName) { 5182e5b6d6dSopenharmony_ci strcpy(defaultName, platforms[gRefNum].name); 5192e5b6d6dSopenharmony_ci strcat(defaultName, "/"); 5202e5b6d6dSopenharmony_ci strcat(defaultName, name); 5212e5b6d6dSopenharmony_ci strcat(defaultName, "_default_raw."); 5222e5b6d6dSopenharmony_ci strcat(defaultName, outExtension); 5232e5b6d6dSopenharmony_ci } 5242e5b6d6dSopenharmony_ci 5252e5b6d6dSopenharmony_ci if(defaultDumpName) { 5262e5b6d6dSopenharmony_ci strcpy(defaultDumpName, platforms[gRefNum].name); 5272e5b6d6dSopenharmony_ci strcat(defaultDumpName, "/"); 5282e5b6d6dSopenharmony_ci strcat(defaultDumpName, name); 5292e5b6d6dSopenharmony_ci strcat(defaultDumpName, "_default.dump"); 5302e5b6d6dSopenharmony_ci } 5312e5b6d6dSopenharmony_ci} 5322e5b6d6dSopenharmony_ci 5332e5b6d6dSopenharmony_civoid 5342e5b6d6dSopenharmony_cisetFiles(const char *name, UErrorCode &status) { 5352e5b6d6dSopenharmony_ci if(U_FAILURE(status)) { 5362e5b6d6dSopenharmony_ci return; 5372e5b6d6dSopenharmony_ci } 5382e5b6d6dSopenharmony_ci int32_t i = 0; 5392e5b6d6dSopenharmony_ci char tailoringName[256]; 5402e5b6d6dSopenharmony_ci char tailoringDumpName[256]; 5412e5b6d6dSopenharmony_ci char defaultName[256]; 5422e5b6d6dSopenharmony_ci char defaultDumpName[256]; 5432e5b6d6dSopenharmony_ci char diffName[256]; 5442e5b6d6dSopenharmony_ci 5452e5b6d6dSopenharmony_ci getFileNames(name, tailoringName, tailoringDumpName, defaultName, defaultDumpName, diffName); 5462e5b6d6dSopenharmony_ci if(options[PLATFORM].doesOccur && !options[DIFF].doesOccur) { 5472e5b6d6dSopenharmony_ci if(createDir(platforms[gPlatformIndexes[0]].name) == 0) { 5482e5b6d6dSopenharmony_ci tailoringBundle = new UPrinter(tailoringName, "en", "utf-8", NULL, false); 5492e5b6d6dSopenharmony_ci fTailoringDump = fopen(tailoringDumpName, "wb"); 5502e5b6d6dSopenharmony_ci } else { 5512e5b6d6dSopenharmony_ci status = U_FILE_ACCESS_ERROR; 5522e5b6d6dSopenharmony_ci return; 5532e5b6d6dSopenharmony_ci } 5542e5b6d6dSopenharmony_ci } 5552e5b6d6dSopenharmony_ci 5562e5b6d6dSopenharmony_ci if(options[REFERENCE].doesOccur && !options[DIFF].doesOccur) { 5572e5b6d6dSopenharmony_ci if(createDir(platforms[gRefNum].name) == 0) { 5582e5b6d6dSopenharmony_ci referenceBundle = new UPrinter(defaultName, "en", "utf-8", NULL, false); 5592e5b6d6dSopenharmony_ci fDefaultDump = fopen(defaultDumpName, "wb"); 5602e5b6d6dSopenharmony_ci } else { 5612e5b6d6dSopenharmony_ci status = U_FILE_ACCESS_ERROR; 5622e5b6d6dSopenharmony_ci return; 5632e5b6d6dSopenharmony_ci } 5642e5b6d6dSopenharmony_ci } 5652e5b6d6dSopenharmony_ci 5662e5b6d6dSopenharmony_ci if((options[PLATFORM].doesOccur && options[REFERENCE].doesOccur) || options[DIFF].doesOccur) { 5672e5b6d6dSopenharmony_ci if(createDir(platforms[gPlatformIndexes[0]].name) == 0) { 5682e5b6d6dSopenharmony_ci bundle = new UPrinter(diffName, "en", "utf-8", NULL, false); 5692e5b6d6dSopenharmony_ci } 5702e5b6d6dSopenharmony_ci } 5712e5b6d6dSopenharmony_ci if(options[DIFF].doesOccur) { 5722e5b6d6dSopenharmony_ci fTailoringDump = fopen(tailoringDumpName, "rb"); 5732e5b6d6dSopenharmony_ci fDefaultDump = fopen(defaultDumpName, "rb"); 5742e5b6d6dSopenharmony_ci } 5752e5b6d6dSopenharmony_ci} 5762e5b6d6dSopenharmony_ci 5772e5b6d6dSopenharmony_ci 5782e5b6d6dSopenharmony_ciUErrorCode status = U_ZERO_ERROR; 5792e5b6d6dSopenharmony_cistatic UnicodeSet UNASSIGNED(UnicodeString("[:Cn:]"), status); 5802e5b6d6dSopenharmony_cistatic UnicodeSet GENERAL_ACCENTS(UnicodeString("[[:block=Combining Diacritical Marks:]-[:Cn:]]"), status); 5812e5b6d6dSopenharmony_ci//static UnicodeSet ASCII_BASE(UnicodeString("[[:ASCII:]-[:L:]-[:N:]]"), status); 5822e5b6d6dSopenharmony_cistatic UnicodeSet ASCII_BASE(UnicodeString("[[:ASCII:]]"), status); 5832e5b6d6dSopenharmony_cistatic UnicodeSet ALPHABETIC(UnicodeString("[:alphabetic:]"), status); 5842e5b6d6dSopenharmony_ci//static UnicodeSet CONTROL(UnicodeString("[[:control:][\\u0000-\\u002F]]"), status); 5852e5b6d6dSopenharmony_cistatic UnicodeSet BMP(UnicodeString("[\\u0000-\\uFFFF]"), status); 5862e5b6d6dSopenharmony_ci 5872e5b6d6dSopenharmony_cistatic UnicodeSet CONTROL(UnicodeString("[:control:]"), status); 5882e5b6d6dSopenharmony_ci 5892e5b6d6dSopenharmony_ciUCollator * 5902e5b6d6dSopenharmony_cisetLocale(const char* locale, UErrorCode &status) 5912e5b6d6dSopenharmony_ci{ 5922e5b6d6dSopenharmony_ci gWinLCID = uloc_getLCID(locale); 5932e5b6d6dSopenharmony_ci setlocale(LC_COLLATE, locale); 5942e5b6d6dSopenharmony_ci 5952e5b6d6dSopenharmony_ci if(gCol) { 5962e5b6d6dSopenharmony_ci ucol_close(gCol); 5972e5b6d6dSopenharmony_ci } 5982e5b6d6dSopenharmony_ci gCol = ucol_open(locale, &status); 5992e5b6d6dSopenharmony_ci ucol_setAttribute(gCol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 6002e5b6d6dSopenharmony_ci //ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); 6012e5b6d6dSopenharmony_ci //ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &status); 6022e5b6d6dSopenharmony_ci 6032e5b6d6dSopenharmony_ci return gCol; 6042e5b6d6dSopenharmony_ci} 6052e5b6d6dSopenharmony_ci 6062e5b6d6dSopenharmony_ci 6072e5b6d6dSopenharmony_ci 6082e5b6d6dSopenharmony_ciUCollator * 6092e5b6d6dSopenharmony_cisetReference(UErrorCode &status) 6102e5b6d6dSopenharmony_ci{ 6112e5b6d6dSopenharmony_ci gWinLCID = uloc_getLCID("en"); 6122e5b6d6dSopenharmony_ci setlocale(LC_COLLATE, "en_US.UTF-8"); 6132e5b6d6dSopenharmony_ci if(gCol) { 6142e5b6d6dSopenharmony_ci ucol_close(gCol); 6152e5b6d6dSopenharmony_ci } 6162e5b6d6dSopenharmony_ci gCol = ucol_open("root", &status); 6172e5b6d6dSopenharmony_ci ucol_setAttribute(gCol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 6182e5b6d6dSopenharmony_ci return gCol; 6192e5b6d6dSopenharmony_ci} 6202e5b6d6dSopenharmony_ci 6212e5b6d6dSopenharmony_civoid 6222e5b6d6dSopenharmony_ciprocessInteractive() { 6232e5b6d6dSopenharmony_ci char command[256]; 6242e5b6d6dSopenharmony_ci while(fgets(command, 256, stdin)) { 6252e5b6d6dSopenharmony_ci 6262e5b6d6dSopenharmony_ci } 6272e5b6d6dSopenharmony_ci} 6282e5b6d6dSopenharmony_ci 6292e5b6d6dSopenharmony_ciUChar probeChars[][4] = { 6302e5b6d6dSopenharmony_ci { 0x0061, 0x0062, 0x00E1, 0x0041 }, // latin with a-grave 6312e5b6d6dSopenharmony_ci { 0x0041, 0x0042, 0x00C1, 0x0061 }, // upper first 6322e5b6d6dSopenharmony_ci { 0x006E, 0x006F, 0x00F1, 0x004E }, // latin with n-tilda 6332e5b6d6dSopenharmony_ci { 0x004E, 0x004F, 0x00D1, 0x006E }, // upper first 6342e5b6d6dSopenharmony_ci { 0x0433, 0x0493, 0x0491, 0x0413 }, // Cyrillic 6352e5b6d6dSopenharmony_ci { 0x0413, 0x0492, 0x0490, 0x0433 }, // upper first 6362e5b6d6dSopenharmony_ci { 0x3045, 0x3047, 0x3094, 0x3046 } // Hiragana/Katakana (last resort) 6372e5b6d6dSopenharmony_ci 6382e5b6d6dSopenharmony_ci}; 6392e5b6d6dSopenharmony_ci 6402e5b6d6dSopenharmony_civoid 6412e5b6d6dSopenharmony_ciprocessCollator(UCollator *col, UErrorCode &status) { 6422e5b6d6dSopenharmony_ci int32_t i = 0; 6432e5b6d6dSopenharmony_ci uint32_t j = 0; 6442e5b6d6dSopenharmony_ci gCol = col; 6452e5b6d6dSopenharmony_ci UChar ruleString[16384]; 6462e5b6d6dSopenharmony_ci char myLoc[256]; 6472e5b6d6dSopenharmony_ci 6482e5b6d6dSopenharmony_ci int32_t ruleStringLength = ucol_getRulesEx(gCol, UCOL_TAILORING_ONLY, ruleString, 16384); 6492e5b6d6dSopenharmony_ci logger->log(UnicodeString(ruleString, ruleStringLength), true); 6502e5b6d6dSopenharmony_ci const char *locale = ucol_getLocale(gCol, ULOC_REQUESTED_LOCALE, &status); 6512e5b6d6dSopenharmony_ci if(locale == NULL) { 6522e5b6d6dSopenharmony_ci locale = "en"; 6532e5b6d6dSopenharmony_ci } 6542e5b6d6dSopenharmony_ci strcpy(myLoc, locale); 6552e5b6d6dSopenharmony_ci UnicodeSet exemplarUSet; 6562e5b6d6dSopenharmony_ci UnicodeSet RefRepertoire; 6572e5b6d6dSopenharmony_ci 6582e5b6d6dSopenharmony_ci UnicodeSet tailored; 6592e5b6d6dSopenharmony_ci 6602e5b6d6dSopenharmony_ci tailored = *((UnicodeSet *)ucol_getTailoredSet(gCol, &status)); 6612e5b6d6dSopenharmony_ci tailored.removeAll(CONTROL); 6622e5b6d6dSopenharmony_ci 6632e5b6d6dSopenharmony_ci 6642e5b6d6dSopenharmony_ci UnicodeString pattern; 6652e5b6d6dSopenharmony_ci int sanityResult; 6662e5b6d6dSopenharmony_ci 6672e5b6d6dSopenharmony_ci UnicodeSet hanSet; 6682e5b6d6dSopenharmony_ci UBool hanAppears = false; 6692e5b6d6dSopenharmony_ci 6702e5b6d6dSopenharmony_ci debug->log("\nGenerating order for platform: %s\n", platforms[gPlatformIndexes[0]].name); 6712e5b6d6dSopenharmony_ci gComparer = platforms[gPlatformIndexes[0]].comparer; 6722e5b6d6dSopenharmony_ci 6732e5b6d6dSopenharmony_ci StrengthProbe probe(platforms[gPlatformIndexes[0]].comparer, platforms[gPlatformIndexes[0]].skgetter, 0x0030, probeChars[0][0], probeChars[0][1], probeChars[0][2], probeChars[0][3]); 6742e5b6d6dSopenharmony_ci sanityResult = probe.checkSanity(); 6752e5b6d6dSopenharmony_ci j = 0; 6762e5b6d6dSopenharmony_ci while(sanityResult && j+1 < sizeof(probeChars)/sizeof(probeChars[0])) { 6772e5b6d6dSopenharmony_ci j++; 6782e5b6d6dSopenharmony_ci sanityResult = probe.setProbeChars(probeChars[j][0], probeChars[j][1], probeChars[j][2], probeChars[j][3]); 6792e5b6d6dSopenharmony_ci } 6802e5b6d6dSopenharmony_ci if(sanityResult) { 6812e5b6d6dSopenharmony_ci logger->log("Bad choice of probe characters! Sanity returned %i. Exiting\n", sanityResult, sanityResult); 6822e5b6d6dSopenharmony_ci return; 6832e5b6d6dSopenharmony_ci } 6842e5b6d6dSopenharmony_ci logger->log("Probe chars: %C, %C, %C, %C\n", probeChars[j][0], probeChars[j][1], probeChars[j][2], probeChars[j][3]); 6852e5b6d6dSopenharmony_ci 6862e5b6d6dSopenharmony_ci debug->off(); 6872e5b6d6dSopenharmony_ci 6882e5b6d6dSopenharmony_ci if(gRepertoire.size()) { 6892e5b6d6dSopenharmony_ci exemplarUSet = gRepertoire; 6902e5b6d6dSopenharmony_ci } else { 6912e5b6d6dSopenharmony_ci generateRepertoire(locale, exemplarUSet, hanAppears, status); 6922e5b6d6dSopenharmony_ci } 6932e5b6d6dSopenharmony_ci exemplarUSet.addAll(tailored); 6942e5b6d6dSopenharmony_ci hanSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_HAN, status); 6952e5b6d6dSopenharmony_ci exemplarUSet.removeAll(hanSet); 6962e5b6d6dSopenharmony_ci 6972e5b6d6dSopenharmony_ci logger->log(exemplarUSet.toPattern(pattern, true), true); 6982e5b6d6dSopenharmony_ci 6992e5b6d6dSopenharmony_ci exemplarUSet = flatten(exemplarUSet, status); 7002e5b6d6dSopenharmony_ci logger->log(exemplarUSet.toPattern(pattern, true), true); 7012e5b6d6dSopenharmony_ci 7022e5b6d6dSopenharmony_ci if(!options[PRINTREF].doesOccur) { 7032e5b6d6dSopenharmony_ci 7042e5b6d6dSopenharmony_ci logger->log("\n*** Detecting ordering for the locale\n\n"); 7052e5b6d6dSopenharmony_ci 7062e5b6d6dSopenharmony_ci debug->on(); 7072e5b6d6dSopenharmony_ci SortedLines lines(exemplarUSet, gExcludeSet, probe, logger, debug); 7082e5b6d6dSopenharmony_ci lines.analyse(status); 7092e5b6d6dSopenharmony_ci lines.calculateSortKeys(); 7102e5b6d6dSopenharmony_ci debug->log("\n*** Final order\n\n"); 7112e5b6d6dSopenharmony_ci debug->log(lines.toPrettyString(true, true), true); 7122e5b6d6dSopenharmony_ci lines.toFile(fTailoringDump, true, status); 7132e5b6d6dSopenharmony_ci tailoringBundle->log(lines.toOutput(outputFormat, myLoc, platforms[gPlatformIndexes[0]].name, NULL, true, true, hanAppears), true); 7142e5b6d6dSopenharmony_ci //debug->off(); 7152e5b6d6dSopenharmony_ci 7162e5b6d6dSopenharmony_ci if(options[REFERENCE].doesOccur) { 7172e5b6d6dSopenharmony_ci status = U_ZERO_ERROR; 7182e5b6d6dSopenharmony_ci lines.getRepertoire(RefRepertoire); 7192e5b6d6dSopenharmony_ci setReference(status); 7202e5b6d6dSopenharmony_ci 7212e5b6d6dSopenharmony_ci logger->log(exemplarUSet.toPattern(pattern, true), true); 7222e5b6d6dSopenharmony_ci logger->log(RefRepertoire.toPattern(pattern, true), true); 7232e5b6d6dSopenharmony_ci 7242e5b6d6dSopenharmony_ci StrengthProbe RefProbe(platforms[gRefNum].comparer, platforms[gRefNum].skgetter); 7252e5b6d6dSopenharmony_ci logger->log("\n*** Detecting ordering for reference\n\n"); 7262e5b6d6dSopenharmony_ci SortedLines RefLines(exemplarUSet, gExcludeSet, RefProbe, logger, debug); 7272e5b6d6dSopenharmony_ci RefLines.analyse(status); 7282e5b6d6dSopenharmony_ci referenceBundle->log(RefLines.toOutput(outputFormat, myLoc, platforms[gRefNum].name, NULL, true, true, false), true); 7292e5b6d6dSopenharmony_ci RefLines.toFile(fDefaultDump, true, status); 7302e5b6d6dSopenharmony_ci 7312e5b6d6dSopenharmony_ci lines.reduceDifference(RefLines); 7322e5b6d6dSopenharmony_ci logger->log("\n*** Final rules\n\n"); 7332e5b6d6dSopenharmony_ci logger->log(lines.toPrettyString(true), true); 7342e5b6d6dSopenharmony_ci bundle->log(lines.toOutput(outputFormat, myLoc, platforms[gPlatformIndexes[0]].name, platforms[gRefNum].name, true, true, hanAppears), true); 7352e5b6d6dSopenharmony_ci } 7362e5b6d6dSopenharmony_ci } else { 7372e5b6d6dSopenharmony_ci setReference(status); 7382e5b6d6dSopenharmony_ci StrengthProbe RefProbe(platforms[gRefNum].comparer, platforms[gRefNum].skgetter); 7392e5b6d6dSopenharmony_ci logger->log("\n*** Detecting ordering for reference\n\n"); 7402e5b6d6dSopenharmony_ci SortedLines RefLines(exemplarUSet, gExcludeSet, RefProbe, logger, debug); 7412e5b6d6dSopenharmony_ci RefLines.analyse(status); 7422e5b6d6dSopenharmony_ci logger->log(RefLines.toPrettyString(true), true); 7432e5b6d6dSopenharmony_ci referenceBundle->log(RefLines.toOutput(outputFormat, myLoc, platforms[gRefNum].name, NULL, true, true, false), true); 7442e5b6d6dSopenharmony_ci } 7452e5b6d6dSopenharmony_ci if(hanAppears) { 7462e5b6d6dSopenharmony_ci // there are Han characters. This is a huge block. The best we can do is to just sort it, compare to empty 7472e5b6d6dSopenharmony_ci // and spit it out. Anything else would be a suicide (actually is - kernel just kills you :) 7482e5b6d6dSopenharmony_ci logger->log("\n*** Detecting order for Han\n"); 7492e5b6d6dSopenharmony_ci debug->off(); 7502e5b6d6dSopenharmony_ci setLocale(gLocale, status); 7512e5b6d6dSopenharmony_ci exemplarUSet.clear(); 7522e5b6d6dSopenharmony_ci exemplarUSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_HAN, status); 7532e5b6d6dSopenharmony_ci exemplarUSet = flatten(exemplarUSet, status); 7542e5b6d6dSopenharmony_ci SortedLines han(exemplarUSet, gExcludeSet, probe, logger, debug); 7552e5b6d6dSopenharmony_ci han.sort(true, true); 7562e5b6d6dSopenharmony_ci han.classifyRepertoire(); 7572e5b6d6dSopenharmony_ci han.getBounds(status); 7582e5b6d6dSopenharmony_ci tailoringBundle->log("Han ordering:<br>\n"); 7592e5b6d6dSopenharmony_ci tailoringBundle->log(han.toOutput(outputFormat, myLoc, platforms[gPlatformIndexes[0]].name, NULL, true, false, false), true); 7602e5b6d6dSopenharmony_ci bundle->log(han.toOutput(outputFormat, myLoc, platforms[gPlatformIndexes[0]].name, NULL, true, false, false), true); 7612e5b6d6dSopenharmony_ci } 7622e5b6d6dSopenharmony_ci ucol_close(gCol); 7632e5b6d6dSopenharmony_ci} 7642e5b6d6dSopenharmony_ci 7652e5b6d6dSopenharmony_civoid 7662e5b6d6dSopenharmony_ciprocessLocale(const char *locale, UErrorCode &status) { 7672e5b6d6dSopenharmony_ci setLocale(locale, status); 7682e5b6d6dSopenharmony_ci setFiles(locale, status); 7692e5b6d6dSopenharmony_ci if(U_FAILURE(status)) { 7702e5b6d6dSopenharmony_ci return; 7712e5b6d6dSopenharmony_ci } 7722e5b6d6dSopenharmony_ci 7732e5b6d6dSopenharmony_ci debug->log("Locale %s (LCID:%06X, unix:%s)\n", locale, gWinLCID, setlocale(LC_COLLATE, NULL)); 7742e5b6d6dSopenharmony_ci tailoringBundle->log("// Ordering for locale %s (LCID:%06X, unix:%s), platform %s reference %s<br>\n", 7752e5b6d6dSopenharmony_ci locale, gWinLCID, setlocale(LC_COLLATE, NULL), 7762e5b6d6dSopenharmony_ci platforms[gPlatformIndexes[0]].name, platforms[gRefNum].name); 7772e5b6d6dSopenharmony_ci if(options[REFERENCE].doesOccur) { 7782e5b6d6dSopenharmony_ci referenceBundle->log("// Reference for locale %s (LCID:%06X, unix:%s), platform %s reference %s<br>\n", 7792e5b6d6dSopenharmony_ci locale, gWinLCID, setlocale(LC_COLLATE, NULL), 7802e5b6d6dSopenharmony_ci platforms[gPlatformIndexes[0]].name, platforms[gRefNum].name); 7812e5b6d6dSopenharmony_ci } 7822e5b6d6dSopenharmony_ci 7832e5b6d6dSopenharmony_ci 7842e5b6d6dSopenharmony_ci processCollator(gCol, status); 7852e5b6d6dSopenharmony_ci} 7862e5b6d6dSopenharmony_ci 7872e5b6d6dSopenharmony_ci 7882e5b6d6dSopenharmony_ci 7892e5b6d6dSopenharmony_ciUBool 7902e5b6d6dSopenharmony_cihasCollationElements(const char *locName) { 7912e5b6d6dSopenharmony_ci 7922e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 7932e5b6d6dSopenharmony_ci UResourceBundle *ColEl = NULL; 7942e5b6d6dSopenharmony_ci 7952e5b6d6dSopenharmony_ci UResourceBundle *loc = ures_open(NULL, locName, &status);; 7962e5b6d6dSopenharmony_ci 7972e5b6d6dSopenharmony_ci if(U_SUCCESS(status)) { 7982e5b6d6dSopenharmony_ci status = U_ZERO_ERROR; 7992e5b6d6dSopenharmony_ci ColEl = ures_getByKey(loc, "CollationElements", ColEl, &status); 8002e5b6d6dSopenharmony_ci if(status == U_ZERO_ERROR) { /* do the test - there are real elements */ 8012e5b6d6dSopenharmony_ci ures_close(ColEl); 8022e5b6d6dSopenharmony_ci ures_close(loc); 8032e5b6d6dSopenharmony_ci return true; 8042e5b6d6dSopenharmony_ci } 8052e5b6d6dSopenharmony_ci ures_close(ColEl); 8062e5b6d6dSopenharmony_ci ures_close(loc); 8072e5b6d6dSopenharmony_ci } 8082e5b6d6dSopenharmony_ci return false; 8092e5b6d6dSopenharmony_ci} 8102e5b6d6dSopenharmony_ci 8112e5b6d6dSopenharmony_ciint 8122e5b6d6dSopenharmony_cimain(int argc, 8132e5b6d6dSopenharmony_ci char* argv[]) 8142e5b6d6dSopenharmony_ci{ 8152e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 8162e5b6d6dSopenharmony_ci logger = new UPrinter(stdout, "en", "latin-1"); 8172e5b6d6dSopenharmony_ci debug = new UPrinter(stderr, "en", "latin-1"); 8182e5b6d6dSopenharmony_ci 8192e5b6d6dSopenharmony_ci/* 8202e5b6d6dSopenharmony_ci USet *wsp = uprv_openRuleWhiteSpaceSet(&status); 8212e5b6d6dSopenharmony_ci uset_add(wsp, 0x0041); 8222e5b6d6dSopenharmony_ci uset_remove(wsp, 0x0041); 8232e5b6d6dSopenharmony_ci UnicodeString pat; 8242e5b6d6dSopenharmony_ci ((UnicodeSet *)wsp)->toPattern(pat, true); 8252e5b6d6dSopenharmony_ci pat.setCharAt(pat.length(), 0); 8262e5b6d6dSopenharmony_ci escapeString(pat.getBuffer(), pat.length(), log); 8272e5b6d6dSopenharmony_ci u_fflush(log); 8282e5b6d6dSopenharmony_ci*/ 8292e5b6d6dSopenharmony_ci 8302e5b6d6dSopenharmony_ci processArgs(argc, argv, status); 8312e5b6d6dSopenharmony_ci int32_t i = 0; 8322e5b6d6dSopenharmony_ci 8332e5b6d6dSopenharmony_ci 8342e5b6d6dSopenharmony_ci 8352e5b6d6dSopenharmony_ci if(U_FAILURE(status) || gPlatformNo == 0) { 8362e5b6d6dSopenharmony_ci return -1; 8372e5b6d6dSopenharmony_ci } 8382e5b6d6dSopenharmony_ci 8392e5b6d6dSopenharmony_ci utf8cnv = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales for now. 8402e5b6d6dSopenharmony_ci gUCA = ucol_open("root", &status); 8412e5b6d6dSopenharmony_ci 8422e5b6d6dSopenharmony_ci if(options[INTERACTIVE].doesOccur) { 8432e5b6d6dSopenharmony_ci processInteractive(); 8442e5b6d6dSopenharmony_ci } else { 8452e5b6d6dSopenharmony_ci if(gRulesStdin) { 8462e5b6d6dSopenharmony_ci char buffer[1024]; 8472e5b6d6dSopenharmony_ci UChar ruleBuffer[16384]; 8482e5b6d6dSopenharmony_ci UChar *rules = ruleBuffer; 8492e5b6d6dSopenharmony_ci int32_t maxRuleLen = 16384; 8502e5b6d6dSopenharmony_ci int32_t rLen = 0; 8512e5b6d6dSopenharmony_ci while(fgets(buffer, 1024, stdin)) { 8522e5b6d6dSopenharmony_ci if(buffer[0] != '/' && buffer[1] != '/') { 8532e5b6d6dSopenharmony_ci rLen = u_unescape(buffer, rules, maxRuleLen); 8542e5b6d6dSopenharmony_ci rules += rLen; 8552e5b6d6dSopenharmony_ci maxRuleLen -= rLen; 8562e5b6d6dSopenharmony_ci } 8572e5b6d6dSopenharmony_ci } 8582e5b6d6dSopenharmony_ci UParseError parseError; 8592e5b6d6dSopenharmony_ci //escapeString(ruleBuffer, rules-ruleBuffer, log);// 8602e5b6d6dSopenharmony_ci debug->log("%U\n", ruleBuffer); 8612e5b6d6dSopenharmony_ci 8622e5b6d6dSopenharmony_ci UCollator *col = ucol_openRules(ruleBuffer, rules-ruleBuffer, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 8632e5b6d6dSopenharmony_ci if(U_SUCCESS(status)) { 8642e5b6d6dSopenharmony_ci setFiles("stdinRules", status); 8652e5b6d6dSopenharmony_ci processCollator(col, status); 8662e5b6d6dSopenharmony_ci } else { 8672e5b6d6dSopenharmony_ci logger->log("Error %s\n", u_errorName(status)); 8682e5b6d6dSopenharmony_ci } 8692e5b6d6dSopenharmony_ci } else if(options[DIFF].doesOccur) { 8702e5b6d6dSopenharmony_ci logger->log("Diffing two dumps\n"); 8712e5b6d6dSopenharmony_ci // must have locale, platform and ref in order to be 8722e5b6d6dSopenharmony_ci // able to find dump files. 8732e5b6d6dSopenharmony_ci setFiles(gLocale, status); 8742e5b6d6dSopenharmony_ci 8752e5b6d6dSopenharmony_ci if(fTailoringDump && fDefaultDump) { 8762e5b6d6dSopenharmony_ci SortedLines tailoring(fTailoringDump, logger, debug, status); 8772e5b6d6dSopenharmony_ci logger->log(tailoring.toString(true), true); 8782e5b6d6dSopenharmony_ci SortedLines reference(fDefaultDump, logger, debug, status); 8792e5b6d6dSopenharmony_ci logger->log(reference.toString(true), true); 8802e5b6d6dSopenharmony_ci tailoring.reduceDifference(reference); 8812e5b6d6dSopenharmony_ci logger->log("\n*** Final rules\n\n"); 8822e5b6d6dSopenharmony_ci logger->log(tailoring.toPrettyString(true), true); 8832e5b6d6dSopenharmony_ci //result->log(lines.toPrettyString(true), true); 8842e5b6d6dSopenharmony_ci bundle->log(tailoring.toOutput(outputFormat, gLocale, platforms[gPlatformIndexes[0]].name, platforms[gRefNum].name, true, true, false), true); 8852e5b6d6dSopenharmony_ci } 8862e5b6d6dSopenharmony_ci 8872e5b6d6dSopenharmony_ci } else { 8882e5b6d6dSopenharmony_ci if(gLocale) { 8892e5b6d6dSopenharmony_ci processLocale(gLocale, status); 8902e5b6d6dSopenharmony_ci } else if(gLocaleNo) { 8912e5b6d6dSopenharmony_ci for(i = 0; i < gLocaleNo; i++) { 8922e5b6d6dSopenharmony_ci processLocale(gLocales[i], status); 8932e5b6d6dSopenharmony_ci } 8942e5b6d6dSopenharmony_ci } else { // do the loop through all the locales 8952e5b6d6dSopenharmony_ci int32_t noOfLoc = uloc_countAvailable(); 8962e5b6d6dSopenharmony_ci const char *locName = NULL; 8972e5b6d6dSopenharmony_ci for(i = 0; i<noOfLoc; i++) { 8982e5b6d6dSopenharmony_ci status = U_ZERO_ERROR; 8992e5b6d6dSopenharmony_ci locName = uloc_getAvailable(i); 9002e5b6d6dSopenharmony_ci if(hasCollationElements(locName)) { 9012e5b6d6dSopenharmony_ci processLocale(locName, status); 9022e5b6d6dSopenharmony_ci } 9032e5b6d6dSopenharmony_ci } 9042e5b6d6dSopenharmony_ci } 9052e5b6d6dSopenharmony_ci } 9062e5b6d6dSopenharmony_ci } 9072e5b6d6dSopenharmony_ci 9082e5b6d6dSopenharmony_ci 9092e5b6d6dSopenharmony_ci ucol_close(gUCA); 9102e5b6d6dSopenharmony_ci ucnv_close(utf8cnv); 9112e5b6d6dSopenharmony_ci 9122e5b6d6dSopenharmony_ci delete logger; 9132e5b6d6dSopenharmony_ci delete debug; 9142e5b6d6dSopenharmony_ci if(tailoringBundle) { 9152e5b6d6dSopenharmony_ci delete tailoringBundle; 9162e5b6d6dSopenharmony_ci } 9172e5b6d6dSopenharmony_ci if(referenceBundle) { 9182e5b6d6dSopenharmony_ci delete referenceBundle; 9192e5b6d6dSopenharmony_ci } 9202e5b6d6dSopenharmony_ci if(bundle) { 9212e5b6d6dSopenharmony_ci delete bundle; 9222e5b6d6dSopenharmony_ci } 9232e5b6d6dSopenharmony_ci if(fTailoringDump) { 9242e5b6d6dSopenharmony_ci fclose(fTailoringDump); 9252e5b6d6dSopenharmony_ci } 9262e5b6d6dSopenharmony_ci if(fDefaultDump) { 9272e5b6d6dSopenharmony_ci fclose(fDefaultDump); 9282e5b6d6dSopenharmony_ci } 9292e5b6d6dSopenharmony_ci return 0; 9302e5b6d6dSopenharmony_ci} 9312e5b6d6dSopenharmony_ci 9322e5b6d6dSopenharmony_ci 9332e5b6d6dSopenharmony_ciUnicodeString propertyAndValueName(UProperty prop, int32_t i) { 9342e5b6d6dSopenharmony_ci UnicodeString result; 9352e5b6d6dSopenharmony_ci result.append(u_getPropertyName(prop, U_LONG_PROPERTY_NAME)); 9362e5b6d6dSopenharmony_ci result.append("="); 9372e5b6d6dSopenharmony_ci result.append(u_getPropertyValueName(prop, i, U_LONG_PROPERTY_NAME)); 9382e5b6d6dSopenharmony_ci 9392e5b6d6dSopenharmony_ci //+ "(" + prop + "," + i + ") "; 9402e5b6d6dSopenharmony_ci return result; 9412e5b6d6dSopenharmony_ci} 9422e5b6d6dSopenharmony_ci 9432e5b6d6dSopenharmony_ci 9442e5b6d6dSopenharmony_civoid generateRepertoire(const char *locale, UnicodeSet &rep, UBool &hanAppears, UErrorCode &status) { 9452e5b6d6dSopenharmony_ci UnicodeString dispName; 9462e5b6d6dSopenharmony_ci debug->log("Getting repertoire for %s\n", locale); 9472e5b6d6dSopenharmony_ci tailoringBundle->log("// Scripts in repertoire: "); 9482e5b6d6dSopenharmony_ci if(options[REFERENCE].doesOccur) { 9492e5b6d6dSopenharmony_ci referenceBundle->log("// Scripts in repertoire: "); 9502e5b6d6dSopenharmony_ci } 9512e5b6d6dSopenharmony_ci rep.clear(); 9522e5b6d6dSopenharmony_ci UnicodeSet delta; 9532e5b6d6dSopenharmony_ci 9542e5b6d6dSopenharmony_ci UScriptCode script[256]; 9552e5b6d6dSopenharmony_ci int32_t i = 0; 9562e5b6d6dSopenharmony_ci // now add the scripts for the locale 9572e5b6d6dSopenharmony_ci UProperty prop = UCHAR_SCRIPT; 9582e5b6d6dSopenharmony_ci int32_t scriptLength = uscript_getCode(locale, script, 256, &status); 9592e5b6d6dSopenharmony_ci if(scriptLength) { 9602e5b6d6dSopenharmony_ci for (i = 0; i < scriptLength; ++i) { 9612e5b6d6dSopenharmony_ci if(script[i] == USCRIPT_HAN) { 9622e5b6d6dSopenharmony_ci hanAppears = true; 9632e5b6d6dSopenharmony_ci continue; 9642e5b6d6dSopenharmony_ci } 9652e5b6d6dSopenharmony_ci delta.applyIntPropertyValue(prop, script[i], status); 9662e5b6d6dSopenharmony_ci debug->log("Adding "); 9672e5b6d6dSopenharmony_ci debug->log(propertyAndValueName(prop, script[i]), true); 9682e5b6d6dSopenharmony_ci tailoringBundle->log("// "); 9692e5b6d6dSopenharmony_ci tailoringBundle->log(propertyAndValueName(prop, script[i]), true); 9702e5b6d6dSopenharmony_ci if(options[REFERENCE].doesOccur) { 9712e5b6d6dSopenharmony_ci referenceBundle->log("// "); 9722e5b6d6dSopenharmony_ci referenceBundle->log(propertyAndValueName(prop, script[i]), true); 9732e5b6d6dSopenharmony_ci } 9742e5b6d6dSopenharmony_ci rep.addAll(delta); 9752e5b6d6dSopenharmony_ci } 9762e5b6d6dSopenharmony_ci } else { 9772e5b6d6dSopenharmony_ci delta.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_LATIN, status); 9782e5b6d6dSopenharmony_ci rep.addAll(delta); 9792e5b6d6dSopenharmony_ci } 9802e5b6d6dSopenharmony_ci 9812e5b6d6dSopenharmony_ci // now see which blocks those overlap, and add 9822e5b6d6dSopenharmony_ci prop = UCHAR_BLOCK; 9832e5b6d6dSopenharmony_ci int32_t min = u_getIntPropertyMinValue(prop); 9842e5b6d6dSopenharmony_ci int32_t max = u_getIntPropertyMaxValue(prop); 9852e5b6d6dSopenharmony_ci UnicodeSet checkDelta; 9862e5b6d6dSopenharmony_ci for (i = min; i <= max; ++i) { 9872e5b6d6dSopenharmony_ci // skip certain blocks 9882e5b6d6dSopenharmony_ci const char *name = u_getPropertyValueName(prop, i, U_LONG_PROPERTY_NAME); 9892e5b6d6dSopenharmony_ci if (strcmp(name, "Superscripts_and_Subscripts") == 0 9902e5b6d6dSopenharmony_ci || strcmp(name, "Letterlike_Symbols") == 0 9912e5b6d6dSopenharmony_ci || strcmp(name, "Alphabetic_Presentation_Forms") == 0 9922e5b6d6dSopenharmony_ci || strcmp(name, "Halfwidth_and_Fullwidth_Forms") == 0) continue; 9932e5b6d6dSopenharmony_ci 9942e5b6d6dSopenharmony_ci delta.applyIntPropertyValue(prop, i, status).removeAll(UNASSIGNED); 9952e5b6d6dSopenharmony_ci if (!rep.containsSome(delta)) continue; 9962e5b6d6dSopenharmony_ci if (rep.containsAll(delta)) continue; // just to see what we are adding 9972e5b6d6dSopenharmony_ci debug->log("Adding "); 9982e5b6d6dSopenharmony_ci debug->log(propertyAndValueName(prop, i), true); 9992e5b6d6dSopenharmony_ci tailoringBundle->log("// "); 10002e5b6d6dSopenharmony_ci tailoringBundle->log(propertyAndValueName(prop, i), true); 10012e5b6d6dSopenharmony_ci if(options[REFERENCE].doesOccur) { 10022e5b6d6dSopenharmony_ci referenceBundle->log("// "); 10032e5b6d6dSopenharmony_ci referenceBundle->log(propertyAndValueName(prop, i), true); 10042e5b6d6dSopenharmony_ci } 10052e5b6d6dSopenharmony_ci rep.addAll(delta); 10062e5b6d6dSopenharmony_ci } 10072e5b6d6dSopenharmony_ci 10082e5b6d6dSopenharmony_ci // add ASCII and general accents 10092e5b6d6dSopenharmony_ci rep.addAll(GENERAL_ACCENTS).addAll(ASCII_BASE); 10102e5b6d6dSopenharmony_ci rep.removeAll(CONTROL); 10112e5b6d6dSopenharmony_ci //delta.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_HAN, status); 10122e5b6d6dSopenharmony_ci //rep.removeAll(delta); 10132e5b6d6dSopenharmony_ci 10142e5b6d6dSopenharmony_ci // now add the exemplar characters 10152e5b6d6dSopenharmony_ci // can't get at them from Java right now 10162e5b6d6dSopenharmony_ci tailoringBundle->log("<br>\n"); 10172e5b6d6dSopenharmony_ci if(options[REFERENCE].doesOccur) { 10182e5b6d6dSopenharmony_ci referenceBundle->log("<br>\n"); 10192e5b6d6dSopenharmony_ci } 10202e5b6d6dSopenharmony_ci} 10212e5b6d6dSopenharmony_ci 10222e5b6d6dSopenharmony_ciUnicodeSet flatten(const UnicodeSet &source, UErrorCode &status) { 10232e5b6d6dSopenharmony_ci UnicodeSet result; 10242e5b6d6dSopenharmony_ci UnicodeSetIterator it(source); 10252e5b6d6dSopenharmony_ci UnicodeString item, itemNFKD, toNormalize; 10262e5b6d6dSopenharmony_ci while (it.next()) { 10272e5b6d6dSopenharmony_ci // would be nicer if UnicodeSetIterator had a getString function 10282e5b6d6dSopenharmony_ci if (it.isString()) { 10292e5b6d6dSopenharmony_ci Normalizer::normalize(it.getString(), UNORM_NFD, 0, item, status); 10302e5b6d6dSopenharmony_ci Normalizer::normalize(it.getString(), UNORM_NFKD, 0, itemNFKD, status); 10312e5b6d6dSopenharmony_ci } else { 10322e5b6d6dSopenharmony_ci toNormalize.setTo(it.getCodepoint()); 10332e5b6d6dSopenharmony_ci Normalizer::normalize(toNormalize, UNORM_NFD, 0, item, status); 10342e5b6d6dSopenharmony_ci Normalizer::normalize(toNormalize, UNORM_NFKD, 0, itemNFKD, status); 10352e5b6d6dSopenharmony_ci } 10362e5b6d6dSopenharmony_ci result.addAll(item); 10372e5b6d6dSopenharmony_ci result.addAll(itemNFKD); 10382e5b6d6dSopenharmony_ci } 10392e5b6d6dSopenharmony_ci return result; 10402e5b6d6dSopenharmony_ci} 10412e5b6d6dSopenharmony_ci 10422e5b6d6dSopenharmony_ci 10432e5b6d6dSopenharmony_civoid testWin(StrengthProbe &probe, UErrorCode &status) 10442e5b6d6dSopenharmony_ci{ 10452e5b6d6dSopenharmony_ci UnicodeSet trailings(UnicodeString("[\\uFE7D\\uFE7C\\u30FD\\uFF70\\u30FC\\u309D\\u3032\\u3031\\u3005\\u0651]"), status); 10462e5b6d6dSopenharmony_ci char intChar[] = "\\uFE7D\\uFE7C\\u30FD\\uFF70\\u30FC\\u309D\\u3032\\u3031\\u3005\\u0651"; 10472e5b6d6dSopenharmony_ci UChar interesting[256]; 10482e5b6d6dSopenharmony_ci int32_t intLen = u_unescape(intChar, interesting, 256); 10492e5b6d6dSopenharmony_ci UChar i = 0; 10502e5b6d6dSopenharmony_ci UChar j = 0, k = 0; 10512e5b6d6dSopenharmony_ci int32_t count; 10522e5b6d6dSopenharmony_ci Line myCh, combo, trial, inter, kLine; 10532e5b6d6dSopenharmony_ci for(i = 0; i < intLen; i++) { 10542e5b6d6dSopenharmony_ci inter.setTo(interesting[i]); 10552e5b6d6dSopenharmony_ci logger->log(inter.toString(true), true); 10562e5b6d6dSopenharmony_ci logger->log("----------------------\n"); 10572e5b6d6dSopenharmony_ci for(j = 0; j < 0xFFFF; j++) { 10582e5b6d6dSopenharmony_ci myCh.setTo(j); 10592e5b6d6dSopenharmony_ci if(probe.distanceFromEmptyString(myCh) == UCOL_IDENTICAL) { 10602e5b6d6dSopenharmony_ci continue; 10612e5b6d6dSopenharmony_ci } 10622e5b6d6dSopenharmony_ci logger->log(myCh.toString(true)); 10632e5b6d6dSopenharmony_ci combo.setTo(j); 10642e5b6d6dSopenharmony_ci combo.append(interesting[i]); 10652e5b6d6dSopenharmony_ci count = 0; 10662e5b6d6dSopenharmony_ci for(k = 0; k < 0xFFFF; k++) { 10672e5b6d6dSopenharmony_ci kLine.setTo(k); 10682e5b6d6dSopenharmony_ci trial.setTo(j); 10692e5b6d6dSopenharmony_ci trial.append(k); 10702e5b6d6dSopenharmony_ci if(probe.compare(kLine, inter) < 0) { 10712e5b6d6dSopenharmony_ci if(probe.compare(trial, combo) >= 0) { 10722e5b6d6dSopenharmony_ci count++; 10732e5b6d6dSopenharmony_ci } 10742e5b6d6dSopenharmony_ci } 10752e5b6d6dSopenharmony_ci } 10762e5b6d6dSopenharmony_ci logger->log("%i %i\n", count, count); 10772e5b6d6dSopenharmony_ci } 10782e5b6d6dSopenharmony_ci } 10792e5b6d6dSopenharmony_ci} 10802e5b6d6dSopenharmony_ci 1081