12e5b6d6dSopenharmony_ci// © 2017 and later: Unicode, Inc. and others. 22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 32e5b6d6dSopenharmony_ci/* 42e5b6d6dSopenharmony_ci******************************************************************************* 52e5b6d6dSopenharmony_ci* 62e5b6d6dSopenharmony_ci* Copyright (C) 2003, International Business Machines 72e5b6d6dSopenharmony_ci* Corporation and others. All Rights Reserved. 82e5b6d6dSopenharmony_ci* 92e5b6d6dSopenharmony_ci******************************************************************************* 102e5b6d6dSopenharmony_ci* 112e5b6d6dSopenharmony_ci* File colprobe.cpp 122e5b6d6dSopenharmony_ci* 132e5b6d6dSopenharmony_ci* Modification History: 142e5b6d6dSopenharmony_ci* 152e5b6d6dSopenharmony_ci* Date Name Description 162e5b6d6dSopenharmony_ci* 03/18/2003 weiv Creation. 172e5b6d6dSopenharmony_ci******************************************************************************* 182e5b6d6dSopenharmony_ci*/ 192e5b6d6dSopenharmony_ci 202e5b6d6dSopenharmony_ci#include "uoptions.h" 212e5b6d6dSopenharmony_ci#include "unicode/ucol.h" 222e5b6d6dSopenharmony_ci#include "unicode/ucoleitr.h" 232e5b6d6dSopenharmony_ci#include "unicode/ures.h" 242e5b6d6dSopenharmony_ci#include "unicode/uniset.h" 252e5b6d6dSopenharmony_ci#include "unicode/usetiter.h" 262e5b6d6dSopenharmony_ci#include "unicode/ustring.h" 272e5b6d6dSopenharmony_ci#include "unicode/uchar.h" 282e5b6d6dSopenharmony_ci#include "unicode/uscript.h" 292e5b6d6dSopenharmony_ci#include "uprops.h" 302e5b6d6dSopenharmony_ci#include "hash.h" 312e5b6d6dSopenharmony_ci#include "ucol_imp.h" 322e5b6d6dSopenharmony_ci 332e5b6d6dSopenharmony_ci#include "unicode/ustdio.h" 342e5b6d6dSopenharmony_ci#include "unicode/utrans.h" 352e5b6d6dSopenharmony_ci 362e5b6d6dSopenharmony_ci#include <stdio.h> 372e5b6d6dSopenharmony_ci#include <stdlib.h> 382e5b6d6dSopenharmony_ci#include <string.h> 392e5b6d6dSopenharmony_ci#include <io.h> 402e5b6d6dSopenharmony_ci#include <fcntl.h> 412e5b6d6dSopenharmony_ci 422e5b6d6dSopenharmony_ci#include "colprobe.h" 432e5b6d6dSopenharmony_ci 442e5b6d6dSopenharmony_ci 452e5b6d6dSopenharmony_ci#ifdef WIN32 462e5b6d6dSopenharmony_ci#include <windows.h> 472e5b6d6dSopenharmony_ci#else 482e5b6d6dSopenharmony_ci// 492e5b6d6dSopenharmony_ci// Stubs for Windows API functions when building on UNIXes. 502e5b6d6dSopenharmony_ci// 512e5b6d6dSopenharmony_citypedef int DWORD; 522e5b6d6dSopenharmony_ciinline int CompareStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;}; 532e5b6d6dSopenharmony_ci#include <sys/time.h> 542e5b6d6dSopenharmony_ciunsigned long timeGetTime() { 552e5b6d6dSopenharmony_ci struct timeval t; 562e5b6d6dSopenharmony_ci gettimeofday(&t, 0); 572e5b6d6dSopenharmony_ci unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares. 582e5b6d6dSopenharmony_ci val += t.tv_usec / 1000; 592e5b6d6dSopenharmony_ci return val; 602e5b6d6dSopenharmony_ci}; 612e5b6d6dSopenharmony_ciinline int LCMapStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;}; 622e5b6d6dSopenharmony_ciconst int LCMAP_SORTKEY = 0; 632e5b6d6dSopenharmony_ci#define MAKELCID(a,b) 0 642e5b6d6dSopenharmony_ciconst int SORT_DEFAULT = 0; 652e5b6d6dSopenharmony_ci#endif 662e5b6d6dSopenharmony_ci 672e5b6d6dSopenharmony_ci#include "line.h" 682e5b6d6dSopenharmony_ci 692e5b6d6dSopenharmony_cistatic UBool gVerbose = false; 702e5b6d6dSopenharmony_cistatic UBool gDebug = false; 712e5b6d6dSopenharmony_cistatic UBool gQuiet = false; 722e5b6d6dSopenharmony_cistatic UBool gExemplar = false; 732e5b6d6dSopenharmony_ci 742e5b6d6dSopenharmony_ciDWORD gWinLCID; 752e5b6d6dSopenharmony_ciint gCount; 762e5b6d6dSopenharmony_ciLine **gICULines; 772e5b6d6dSopenharmony_ciUCollator *gCol; 782e5b6d6dSopenharmony_ciUCollator *gUCA; 792e5b6d6dSopenharmony_ciLine source; 802e5b6d6dSopenharmony_ciLine target; 812e5b6d6dSopenharmony_ciLine *gSource = &source; 822e5b6d6dSopenharmony_ciLine *gTarget = ⌖ 832e5b6d6dSopenharmony_ciHashtable gElements(false); 842e5b6d6dSopenharmony_ciHashtable gExpansions(false); 852e5b6d6dSopenharmony_ciCompareFn gComparer; 862e5b6d6dSopenharmony_ci 872e5b6d6dSopenharmony_ciconst UChar separatorChar = 0x0030; 882e5b6d6dSopenharmony_ci 892e5b6d6dSopenharmony_ciUFILE *out = NULL; 902e5b6d6dSopenharmony_ciUFILE *err = NULL; 912e5b6d6dSopenharmony_ciUFILE *log = NULL; 922e5b6d6dSopenharmony_ci 932e5b6d6dSopenharmony_ciconst char *progName = "colprobe"; 942e5b6d6dSopenharmony_ci 952e5b6d6dSopenharmony_ciconst char *gLocale = NULL; 962e5b6d6dSopenharmony_ci//char platform[256]; 972e5b6d6dSopenharmony_ciint32_t platformIndex = -1; 982e5b6d6dSopenharmony_ciint32_t gPlatformNo = 0; 992e5b6d6dSopenharmony_ciint32_t gPlatformIndexes[10]; 1002e5b6d6dSopenharmony_ciint32_t gLocaleNo = 0; 1012e5b6d6dSopenharmony_ciconst char* gLocales[100]; 1022e5b6d6dSopenharmony_ciUBool gRulesStdin = false; 1032e5b6d6dSopenharmony_ci 1042e5b6d6dSopenharmony_cienum { 1052e5b6d6dSopenharmony_ci HELP1, 1062e5b6d6dSopenharmony_ci HELP2, 1072e5b6d6dSopenharmony_ci VERBOSE, 1082e5b6d6dSopenharmony_ci QUIET, 1092e5b6d6dSopenharmony_ci VERSION, 1102e5b6d6dSopenharmony_ci ICUDATADIR, 1112e5b6d6dSopenharmony_ci COPYRIGHT, 1122e5b6d6dSopenharmony_ci LOCALE, 1132e5b6d6dSopenharmony_ci PLATFORM, 1142e5b6d6dSopenharmony_ci DEBUG, 1152e5b6d6dSopenharmony_ci EXEMPLAR, 1162e5b6d6dSopenharmony_ci RULESSTDIN 1172e5b6d6dSopenharmony_ci}; 1182e5b6d6dSopenharmony_ci 1192e5b6d6dSopenharmony_ciUOption options[]={ 1202e5b6d6dSopenharmony_ci /*0*/ UOPTION_HELP_H, 1212e5b6d6dSopenharmony_ci /*1*/ UOPTION_HELP_QUESTION_MARK, 1222e5b6d6dSopenharmony_ci /*2*/ UOPTION_VERBOSE, 1232e5b6d6dSopenharmony_ci /*3*/ UOPTION_QUIET, 1242e5b6d6dSopenharmony_ci /*4*/ UOPTION_VERSION, 1252e5b6d6dSopenharmony_ci /*5*/ UOPTION_ICUDATADIR, 1262e5b6d6dSopenharmony_ci /*6*/ UOPTION_COPYRIGHT, 1272e5b6d6dSopenharmony_ci /*7*/ UOPTION_DEF("locale", 'l', UOPT_REQUIRES_ARG), 1282e5b6d6dSopenharmony_ci /*8*/ UOPTION_DEF("platform", 'p', UOPT_REQUIRES_ARG), 1292e5b6d6dSopenharmony_ci /*9*/ UOPTION_DEF("debug", 'D', UOPT_NO_ARG), 1302e5b6d6dSopenharmony_ci /*10*/ UOPTION_DEF("exemplar", 'E', UOPT_NO_ARG), 1312e5b6d6dSopenharmony_ci /*11*/ UOPTION_DEF("rulesstdin", 'R', UOPT_NO_ARG) 1322e5b6d6dSopenharmony_ci}; 1332e5b6d6dSopenharmony_ci 1342e5b6d6dSopenharmony_ciint Winstrcmp(const void *a, const void *b) { 1352e5b6d6dSopenharmony_ci gCount++; 1362e5b6d6dSopenharmony_ci int t; 1372e5b6d6dSopenharmony_ci t = CompareStringW(gWinLCID, 0, 1382e5b6d6dSopenharmony_ci (*(Line **)a)->name, (*(Line **)a)->len, 1392e5b6d6dSopenharmony_ci (*(Line **)b)->name, (*(Line **)b)->len); 1402e5b6d6dSopenharmony_ci return t-2; 1412e5b6d6dSopenharmony_ci} 1422e5b6d6dSopenharmony_ci 1432e5b6d6dSopenharmony_ciint ICUstrcmp(const void *a, const void *b) { 1442e5b6d6dSopenharmony_ci gCount++; 1452e5b6d6dSopenharmony_ci UCollationResult t; 1462e5b6d6dSopenharmony_ci t = ucol_strcoll(gCol, 1472e5b6d6dSopenharmony_ci (*(Line **)a)->name, (*(Line **)a)->len, 1482e5b6d6dSopenharmony_ci (*(Line **)b)->name, (*(Line **)b)->len); 1492e5b6d6dSopenharmony_ci if (t == UCOL_LESS) return -1; 1502e5b6d6dSopenharmony_ci if (t == UCOL_GREATER) return +1; 1512e5b6d6dSopenharmony_ci return 0; 1522e5b6d6dSopenharmony_ci} 1532e5b6d6dSopenharmony_ci 1542e5b6d6dSopenharmony_cistruct { 1552e5b6d6dSopenharmony_ci const char* name; 1562e5b6d6dSopenharmony_ci CompareFn comparer; 1572e5b6d6dSopenharmony_ci} platforms[] = { 1582e5b6d6dSopenharmony_ci { "icu", ICUstrcmp }, 1592e5b6d6dSopenharmony_ci { "win", Winstrcmp} 1602e5b6d6dSopenharmony_ci}; 1612e5b6d6dSopenharmony_ci 1622e5b6d6dSopenharmony_ci 1632e5b6d6dSopenharmony_civoid deleteLineElement(void *line) { 1642e5b6d6dSopenharmony_ci delete((Line *)line); 1652e5b6d6dSopenharmony_ci} 1662e5b6d6dSopenharmony_ci 1672e5b6d6dSopenharmony_civoid stringToLower(char *string) { 1682e5b6d6dSopenharmony_ci uint32_t i = 0; 1692e5b6d6dSopenharmony_ci for(i = 0; i < strlen(string); i++) { 1702e5b6d6dSopenharmony_ci string[i] = tolower(string[i]); 1712e5b6d6dSopenharmony_ci } 1722e5b6d6dSopenharmony_ci} 1732e5b6d6dSopenharmony_ci 1742e5b6d6dSopenharmony_civoid usage(const char *name) { 1752e5b6d6dSopenharmony_ci u_fprintf(out, "Usage: %s --locale loc_name --platform platform\n", name); 1762e5b6d6dSopenharmony_ci} 1772e5b6d6dSopenharmony_ci 1782e5b6d6dSopenharmony_civoid listKnownPlatforms() { 1792e5b6d6dSopenharmony_ci int32_t i = 0; 1802e5b6d6dSopenharmony_ci u_fprintf(err, "Known platforms:\n"); 1812e5b6d6dSopenharmony_ci for(i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) { 1822e5b6d6dSopenharmony_ci u_fprintf(err, "\t%s\n", platforms[i]); 1832e5b6d6dSopenharmony_ci } 1842e5b6d6dSopenharmony_ci} 1852e5b6d6dSopenharmony_ci 1862e5b6d6dSopenharmony_civoid addPlatform(const char *platform) { 1872e5b6d6dSopenharmony_ci int32_t i; 1882e5b6d6dSopenharmony_ci //stringToLower(platform); 1892e5b6d6dSopenharmony_ci int32_t oldPlatformNo = gPlatformNo; 1902e5b6d6dSopenharmony_ci 1912e5b6d6dSopenharmony_ci for(i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) { 1922e5b6d6dSopenharmony_ci if(strcmp(platform, platforms[i].name) == 0) { 1932e5b6d6dSopenharmony_ci gPlatformIndexes[gPlatformNo++] = i; 1942e5b6d6dSopenharmony_ci } 1952e5b6d6dSopenharmony_ci } 1962e5b6d6dSopenharmony_ci if(gPlatformNo == oldPlatformNo) { 1972e5b6d6dSopenharmony_ci u_fprintf(err, "Unknown platform %s\n", platform); 1982e5b6d6dSopenharmony_ci listKnownPlatforms(); 1992e5b6d6dSopenharmony_ci } 2002e5b6d6dSopenharmony_ci} 2012e5b6d6dSopenharmony_ci 2022e5b6d6dSopenharmony_civoid processArgs(int argc, char* argv[], UErrorCode &status) 2032e5b6d6dSopenharmony_ci{ 2042e5b6d6dSopenharmony_ci int32_t i = 0; 2052e5b6d6dSopenharmony_ci U_MAIN_INIT_ARGS(argc, argv); 2062e5b6d6dSopenharmony_ci 2072e5b6d6dSopenharmony_ci argc = u_parseArgs(argc, argv, (int32_t)(sizeof(options)/sizeof(options[0])), options); 2082e5b6d6dSopenharmony_ci 2092e5b6d6dSopenharmony_ci if(argc < 0) { 2102e5b6d6dSopenharmony_ci u_fprintf(err, "Unknown option: %s\n", argv[-argc]); 2112e5b6d6dSopenharmony_ci usage(progName); 2122e5b6d6dSopenharmony_ci return; 2132e5b6d6dSopenharmony_ci } 2142e5b6d6dSopenharmony_ci 2152e5b6d6dSopenharmony_ci if(options[0].doesOccur || options[1].doesOccur) { 2162e5b6d6dSopenharmony_ci usage(progName); 2172e5b6d6dSopenharmony_ci return; 2182e5b6d6dSopenharmony_ci } 2192e5b6d6dSopenharmony_ci if(options[VERBOSE].doesOccur) { 2202e5b6d6dSopenharmony_ci gVerbose = true; 2212e5b6d6dSopenharmony_ci } 2222e5b6d6dSopenharmony_ci if(options[DEBUG].doesOccur) { 2232e5b6d6dSopenharmony_ci gDebug = true; 2242e5b6d6dSopenharmony_ci gVerbose = true; 2252e5b6d6dSopenharmony_ci } 2262e5b6d6dSopenharmony_ci if(options[EXEMPLAR].doesOccur) { 2272e5b6d6dSopenharmony_ci gExemplar = true; 2282e5b6d6dSopenharmony_ci } 2292e5b6d6dSopenharmony_ci if(options[QUIET].doesOccur) { 2302e5b6d6dSopenharmony_ci gQuiet = true; 2312e5b6d6dSopenharmony_ci } 2322e5b6d6dSopenharmony_ci/* 2332e5b6d6dSopenharmony_ci for(i = 8; i < 9; i++) { 2342e5b6d6dSopenharmony_ci if(!options[i].doesOccur) { 2352e5b6d6dSopenharmony_ci u_fprintf(err, "Option %s is required!\n", options[i].longName); 2362e5b6d6dSopenharmony_ci usage(progName); 2372e5b6d6dSopenharmony_ci status = U_ILLEGAL_ARGUMENT_ERROR; 2382e5b6d6dSopenharmony_ci } 2392e5b6d6dSopenharmony_ci if(options[i].value == NULL) { 2402e5b6d6dSopenharmony_ci u_fprintf(err, "Option %s needs an argument!\n", options[i].longName); 2412e5b6d6dSopenharmony_ci usage(progName); 2422e5b6d6dSopenharmony_ci status = U_ILLEGAL_ARGUMENT_ERROR; 2432e5b6d6dSopenharmony_ci } 2442e5b6d6dSopenharmony_ci } 2452e5b6d6dSopenharmony_ci*/ 2462e5b6d6dSopenharmony_ci // ASCII based options specified on the command line 2472e5b6d6dSopenharmony_ci // this is for testing purposes, will allow to load 2482e5b6d6dSopenharmony_ci // up ICU rules and then poke through them. 2492e5b6d6dSopenharmony_ci // In that case, we test only ICU and don't need 2502e5b6d6dSopenharmony_ci // a locale. 2512e5b6d6dSopenharmony_ci if(options[RULESSTDIN].doesOccur) { 2522e5b6d6dSopenharmony_ci gRulesStdin = true; 2532e5b6d6dSopenharmony_ci addPlatform("icu"); 2542e5b6d6dSopenharmony_ci return; 2552e5b6d6dSopenharmony_ci } 2562e5b6d6dSopenharmony_ci 2572e5b6d6dSopenharmony_ci if(options[LOCALE].doesOccur) { 2582e5b6d6dSopenharmony_ci gLocale = options[LOCALE].value; 2592e5b6d6dSopenharmony_ci } else { 2602e5b6d6dSopenharmony_ci for(i = 1; i < argc; i++) { 2612e5b6d6dSopenharmony_ci gLocales[gLocaleNo++] = argv[i]; 2622e5b6d6dSopenharmony_ci } 2632e5b6d6dSopenharmony_ci } 2642e5b6d6dSopenharmony_ci if(options[PLATFORM].doesOccur) { 2652e5b6d6dSopenharmony_ci //strcpy(platform, options[PLATFORM].value); 2662e5b6d6dSopenharmony_ci //addPlatform("icu"); 2672e5b6d6dSopenharmony_ci addPlatform(options[PLATFORM].value); 2682e5b6d6dSopenharmony_ci } else { // there is a list of platforms 2692e5b6d6dSopenharmony_ci u_fprintf(err, "Option %s is required!\n", options[i].longName); 2702e5b6d6dSopenharmony_ci usage(progName); 2712e5b6d6dSopenharmony_ci status = U_ILLEGAL_ARGUMENT_ERROR; 2722e5b6d6dSopenharmony_ci } 2732e5b6d6dSopenharmony_ci 2742e5b6d6dSopenharmony_ci // 2752e5b6d6dSopenharmony_ci // Set up a Windows LCID 2762e5b6d6dSopenharmony_ci // 2772e5b6d6dSopenharmony_ci gWinLCID = uloc_getLCID(gLocale); 2782e5b6d6dSopenharmony_ci /* 2792e5b6d6dSopenharmony_ci if (gLocale != 0) { 2802e5b6d6dSopenharmony_ci gWinLCID = MAKELCID(gLocale, SORT_DEFAULT); 2812e5b6d6dSopenharmony_ci } 2822e5b6d6dSopenharmony_ci else { 2832e5b6d6dSopenharmony_ci gWinLCID = uloc_getLCID(gLocale); 2842e5b6d6dSopenharmony_ci } 2852e5b6d6dSopenharmony_ci */ 2862e5b6d6dSopenharmony_ci 2872e5b6d6dSopenharmony_ci} 2882e5b6d6dSopenharmony_ci 2892e5b6d6dSopenharmony_civoid printRules(const UChar *name, int32_t len, UFILE *file) { 2902e5b6d6dSopenharmony_ci // very rudimentary pretty rules print 2912e5b6d6dSopenharmony_ci int32_t i = 0; 2922e5b6d6dSopenharmony_ci UChar toPrint[16384]; 2932e5b6d6dSopenharmony_ci int32_t toPrintIndex = 0; 2942e5b6d6dSopenharmony_ci for(i = 0; i < len; i++) { 2952e5b6d6dSopenharmony_ci if(name[i] == 0x0026) { 2962e5b6d6dSopenharmony_ci if(toPrintIndex) { 2972e5b6d6dSopenharmony_ci toPrint[toPrintIndex] = 0; 2982e5b6d6dSopenharmony_ci u_fprintf(file, "%U\n", toPrint); 2992e5b6d6dSopenharmony_ci toPrintIndex = 0; 3002e5b6d6dSopenharmony_ci toPrint[toPrintIndex++] = name[i]; 3012e5b6d6dSopenharmony_ci } else { 3022e5b6d6dSopenharmony_ci toPrint[toPrintIndex++] = name[i]; 3032e5b6d6dSopenharmony_ci } 3042e5b6d6dSopenharmony_ci } else { 3052e5b6d6dSopenharmony_ci toPrint[toPrintIndex++] = name[i]; 3062e5b6d6dSopenharmony_ci } 3072e5b6d6dSopenharmony_ci } 3082e5b6d6dSopenharmony_ci if(toPrintIndex) { 3092e5b6d6dSopenharmony_ci toPrint[toPrintIndex] = 0; 3102e5b6d6dSopenharmony_ci u_fprintf(file, "%U\n", toPrint); 3112e5b6d6dSopenharmony_ci toPrintIndex = 0; 3122e5b6d6dSopenharmony_ci } 3132e5b6d6dSopenharmony_ci 3142e5b6d6dSopenharmony_ci 3152e5b6d6dSopenharmony_ci} 3162e5b6d6dSopenharmony_ci 3172e5b6d6dSopenharmony_civoid escapeString(const UChar *name, int32_t len, UFILE *file) { 3182e5b6d6dSopenharmony_ci u_fprintf(file, "%U", name); 3192e5b6d6dSopenharmony_ci/* 3202e5b6d6dSopenharmony_ci int32_t j = 0; 3212e5b6d6dSopenharmony_ci for(j = 0; j < len; j++) { 3222e5b6d6dSopenharmony_ci if(name[j] >= 0x20 && name[j] < 0x80) { 3232e5b6d6dSopenharmony_ci u_fprintf(file, "%c", name[j]); 3242e5b6d6dSopenharmony_ci } else { 3252e5b6d6dSopenharmony_ci u_fprintf(file, "\\u%04X", name[j]); 3262e5b6d6dSopenharmony_ci } 3272e5b6d6dSopenharmony_ci } 3282e5b6d6dSopenharmony_ci*/ 3292e5b6d6dSopenharmony_ci} 3302e5b6d6dSopenharmony_civoid escapeALine(Line *line, UFILE *file) { 3312e5b6d6dSopenharmony_ci escapeString(line->name, line->len, file); 3322e5b6d6dSopenharmony_ci} 3332e5b6d6dSopenharmony_ci 3342e5b6d6dSopenharmony_civoid escapeExpansion(Line *line, UFILE *file) { 3352e5b6d6dSopenharmony_ci escapeString(line->expansionString, line->expLen, file); 3362e5b6d6dSopenharmony_ci} 3372e5b6d6dSopenharmony_ci 3382e5b6d6dSopenharmony_civoid showNames(Line *line, UFILE *file) { 3392e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 3402e5b6d6dSopenharmony_ci int32_t j = 0; 3412e5b6d6dSopenharmony_ci char charName[256]; 3422e5b6d6dSopenharmony_ci for(j = 0; j < line->len; j++) { 3432e5b6d6dSopenharmony_ci u_charName(line->name[j], U_EXTENDED_CHAR_NAME, charName, 256, &status); 3442e5b6d6dSopenharmony_ci u_fprintf(file, "%s ", charName); 3452e5b6d6dSopenharmony_ci } 3462e5b6d6dSopenharmony_ci} 3472e5b6d6dSopenharmony_ci 3482e5b6d6dSopenharmony_civoid setArray(Line **array, Line *contents, int32_t size) { 3492e5b6d6dSopenharmony_ci int32_t i = 0; 3502e5b6d6dSopenharmony_ci for(i = 0; i < size; i++) { 3512e5b6d6dSopenharmony_ci array[i] = contents+i; 3522e5b6d6dSopenharmony_ci } 3532e5b6d6dSopenharmony_ci} 3542e5b6d6dSopenharmony_ci 3552e5b6d6dSopenharmony_ci// set an array from a Hashtable 3562e5b6d6dSopenharmony_ciint32_t 3572e5b6d6dSopenharmony_cisetArray(Line **array, Hashtable *table = &gElements) { 3582e5b6d6dSopenharmony_ci int32_t size = table->count(); 3592e5b6d6dSopenharmony_ci int32_t hashIndex = -1; 3602e5b6d6dSopenharmony_ci const UHashElement *hashElement = NULL; 3612e5b6d6dSopenharmony_ci int32_t count = 0; 3622e5b6d6dSopenharmony_ci while((hashElement = table->nextElement(hashIndex)) != NULL) { 3632e5b6d6dSopenharmony_ci array[count++] = (Line *)hashElement->value.pointer; 3642e5b6d6dSopenharmony_ci } 3652e5b6d6dSopenharmony_ci return size; 3662e5b6d6dSopenharmony_ci} 3672e5b6d6dSopenharmony_ci 3682e5b6d6dSopenharmony_ciUBool trySwamped(Line **smaller, Line **greater, UChar chars[2], CompareFn comparer) { 3692e5b6d6dSopenharmony_ci u_strcpy(gSource->name, (*smaller)->name); 3702e5b6d6dSopenharmony_ci gSource->name[(*smaller)->len] = separatorChar; 3712e5b6d6dSopenharmony_ci gSource->name[(*smaller)->len+1] = chars[0]; 3722e5b6d6dSopenharmony_ci gSource->name[(*smaller)->len+2] = 0; 3732e5b6d6dSopenharmony_ci gSource->len = (*smaller)->len+2; 3742e5b6d6dSopenharmony_ci 3752e5b6d6dSopenharmony_ci u_strcpy(gTarget->name, (*greater)->name); 3762e5b6d6dSopenharmony_ci gTarget->name[(*greater)->len] = separatorChar; 3772e5b6d6dSopenharmony_ci gTarget->name[(*greater)->len+1] = chars[1]; 3782e5b6d6dSopenharmony_ci gTarget->name[(*greater)->len+2] = 0; 3792e5b6d6dSopenharmony_ci gTarget->len = (*greater)->len+2; 3802e5b6d6dSopenharmony_ci 3812e5b6d6dSopenharmony_ci if(comparer(&gSource, &gTarget) > 0) { 3822e5b6d6dSopenharmony_ci return true; 3832e5b6d6dSopenharmony_ci } else { 3842e5b6d6dSopenharmony_ci return false; 3852e5b6d6dSopenharmony_ci } 3862e5b6d6dSopenharmony_ci} 3872e5b6d6dSopenharmony_ci 3882e5b6d6dSopenharmony_ciUBool trySwamps(Line **smaller, Line **greater, UChar chars[2], CompareFn comparer) { 3892e5b6d6dSopenharmony_ci gSource->name[0] = chars[0]; 3902e5b6d6dSopenharmony_ci gSource->name[1] = separatorChar; 3912e5b6d6dSopenharmony_ci u_strcpy(gSource->name+2, (*smaller)->name); 3922e5b6d6dSopenharmony_ci gSource->len = (*smaller)->len+2; 3932e5b6d6dSopenharmony_ci 3942e5b6d6dSopenharmony_ci gTarget->name[0] = chars[1]; 3952e5b6d6dSopenharmony_ci gTarget->name[1] = separatorChar; 3962e5b6d6dSopenharmony_ci u_strcpy(gTarget->name+2, (*greater)->name); 3972e5b6d6dSopenharmony_ci gTarget->len = (*greater)->len+2; 3982e5b6d6dSopenharmony_ci 3992e5b6d6dSopenharmony_ci if(comparer(&gSource, &gTarget) < 0) { 4002e5b6d6dSopenharmony_ci return true; 4012e5b6d6dSopenharmony_ci } else { 4022e5b6d6dSopenharmony_ci return false; 4032e5b6d6dSopenharmony_ci } 4042e5b6d6dSopenharmony_ci} 4052e5b6d6dSopenharmony_ci 4062e5b6d6dSopenharmony_ciUColAttributeValue 4072e5b6d6dSopenharmony_ciprobeStrength(Line** prevLine, Line **currLine, CompareFn comparer) { 4082e5b6d6dSopenharmony_ci // Primary swamps secondary 4092e5b6d6dSopenharmony_ci // have pairs where [0] 2> [1] 4102e5b6d6dSopenharmony_ci UChar primSwamps[][2] = { 4112e5b6d6dSopenharmony_ci { 0x00E0, 0x0061 }, 4122e5b6d6dSopenharmony_ci { 0x0450, 0x0435 }, 4132e5b6d6dSopenharmony_ci { 0x31a3, 0x310d } 4142e5b6d6dSopenharmony_ci }; 4152e5b6d6dSopenharmony_ci // Secondary swamps tertiary 4162e5b6d6dSopenharmony_ci // have pairs where [0] 3> [1] 4172e5b6d6dSopenharmony_ci UChar secSwamps[][2] = { 4182e5b6d6dSopenharmony_ci { 0x0053, 0x0073 }, 4192e5b6d6dSopenharmony_ci { 0x0415, 0x0435 }, 4202e5b6d6dSopenharmony_ci { 0x31b6, 0x310e } 4212e5b6d6dSopenharmony_ci }; 4222e5b6d6dSopenharmony_ci // Secondary is swamped by primary 4232e5b6d6dSopenharmony_ci // have pairs where [0] 1> [1] 4242e5b6d6dSopenharmony_ci UChar secSwamped[][2] = { 4252e5b6d6dSopenharmony_ci { 0x0062, 0x0061 }, 4262e5b6d6dSopenharmony_ci { 0x0436, 0x0454 }, 4272e5b6d6dSopenharmony_ci { 0x310e, 0x310d } 4282e5b6d6dSopenharmony_ci }; 4292e5b6d6dSopenharmony_ci // Tertiary is swamped by secondary 4302e5b6d6dSopenharmony_ci // have pairs where [0] 2> [1] 4312e5b6d6dSopenharmony_ci UChar terSwamped[][2] = { 4322e5b6d6dSopenharmony_ci { 0x00E0, 0x0061 }, 4332e5b6d6dSopenharmony_ci { 0x0450, 0x0435 }, 4342e5b6d6dSopenharmony_ci { 0x31a3, 0x310d } 4352e5b6d6dSopenharmony_ci }; 4362e5b6d6dSopenharmony_ci int32_t i = 0; 4372e5b6d6dSopenharmony_ci // Tertiary swamps equal? 4382e5b6d6dSopenharmony_ci int result = 0; 4392e5b6d6dSopenharmony_ci // Choose the pair 4402e5b6d6dSopenharmony_ci i = 0; 4412e5b6d6dSopenharmony_ci /* 4422e5b6d6dSopenharmony_ci if((*prevLine)->name[0] > 0xFF && (*currLine)->name[0] > 0xFF) { 4432e5b6d6dSopenharmony_ci i = 0; 4442e5b6d6dSopenharmony_ci } else if((*prevLine)->name[0] < 0x0400 && (*currLine)->name[0] < 0x0400) { 4452e5b6d6dSopenharmony_ci i = 1; 4462e5b6d6dSopenharmony_ci } else { 4472e5b6d6dSopenharmony_ci i = 2; 4482e5b6d6dSopenharmony_ci } 4492e5b6d6dSopenharmony_ci */ 4502e5b6d6dSopenharmony_ci // are they equal? 4512e5b6d6dSopenharmony_ci if((result = comparer(prevLine, currLine)) == 0) { 4522e5b6d6dSopenharmony_ci return UCOL_IDENTICAL; 4532e5b6d6dSopenharmony_ci } else if(result > 0) { 4542e5b6d6dSopenharmony_ci //fprintf(stderr, "lines should be ordered!"); 4552e5b6d6dSopenharmony_ci return UCOL_OFF; 4562e5b6d6dSopenharmony_ci } else if(trySwamps(prevLine, currLine, primSwamps[i], comparer)) { 4572e5b6d6dSopenharmony_ci return UCOL_PRIMARY; 4582e5b6d6dSopenharmony_ci } else if(trySwamps(prevLine, currLine, secSwamps[i], comparer)) { 4592e5b6d6dSopenharmony_ci return UCOL_SECONDARY; 4602e5b6d6dSopenharmony_ci } else if(trySwamped(prevLine, currLine, terSwamped[i], comparer)) { 4612e5b6d6dSopenharmony_ci // is there a tertiary difference 4622e5b6d6dSopenharmony_ci return UCOL_TERTIARY; 4632e5b6d6dSopenharmony_ci } else { 4642e5b6d6dSopenharmony_ci //fprintf(stderr, "Unknown strength!\n"); 4652e5b6d6dSopenharmony_ci return UCOL_ON; 4662e5b6d6dSopenharmony_ci } 4672e5b6d6dSopenharmony_ci} 4682e5b6d6dSopenharmony_ci 4692e5b6d6dSopenharmony_ci// This function tries to probe the set of lines 4702e5b6d6dSopenharmony_ci// (already sorted by qsort) and deduct the strengths 4712e5b6d6dSopenharmony_civoid 4722e5b6d6dSopenharmony_cianalyzeStrength(Line **lines, int32_t size, CompareFn comparer) { 4732e5b6d6dSopenharmony_ci int32_t i = 0; 4742e5b6d6dSopenharmony_ci 4752e5b6d6dSopenharmony_ci for(i = 1; i < size; i++) { 4762e5b6d6dSopenharmony_ci Line **prevLine = lines+i-1; 4772e5b6d6dSopenharmony_ci Line **currLine = lines+i; 4782e5b6d6dSopenharmony_ci (*currLine)->strength = probeStrength(prevLine, currLine, comparer); 4792e5b6d6dSopenharmony_ci (*currLine)->sortedIndex = i; 4802e5b6d6dSopenharmony_ci (*currLine)->previous = *prevLine; 4812e5b6d6dSopenharmony_ci (*prevLine)->next = *currLine; 4822e5b6d6dSopenharmony_ci 4832e5b6d6dSopenharmony_ci } 4842e5b6d6dSopenharmony_ci 4852e5b6d6dSopenharmony_ci} 4862e5b6d6dSopenharmony_ci 4872e5b6d6dSopenharmony_civoid printStrength(UColAttributeValue strength, UFILE *file) { 4882e5b6d6dSopenharmony_ci u_fprintf(file, " "); 4892e5b6d6dSopenharmony_ci switch(strength) { 4902e5b6d6dSopenharmony_ci case UCOL_IDENTICAL: 4912e5b6d6dSopenharmony_ci u_fprintf(file, "="); 4922e5b6d6dSopenharmony_ci break; 4932e5b6d6dSopenharmony_ci case UCOL_TERTIARY: 4942e5b6d6dSopenharmony_ci //u_fprintf(file, "<3"); 4952e5b6d6dSopenharmony_ci u_fprintf(file, "<<<"); 4962e5b6d6dSopenharmony_ci break; 4972e5b6d6dSopenharmony_ci case UCOL_SECONDARY: 4982e5b6d6dSopenharmony_ci //u_fprintf(file, "<2"); 4992e5b6d6dSopenharmony_ci u_fprintf(file, "<<"); 5002e5b6d6dSopenharmony_ci break; 5012e5b6d6dSopenharmony_ci case UCOL_PRIMARY: 5022e5b6d6dSopenharmony_ci //u_fprintf(file, "<1"); 5032e5b6d6dSopenharmony_ci u_fprintf(file, "<"); 5042e5b6d6dSopenharmony_ci break; 5052e5b6d6dSopenharmony_ci case UCOL_OFF: 5062e5b6d6dSopenharmony_ci u_fprintf(file, ">?"); 5072e5b6d6dSopenharmony_ci default: 5082e5b6d6dSopenharmony_ci u_fprintf(file, "?!"); 5092e5b6d6dSopenharmony_ci break; 5102e5b6d6dSopenharmony_ci } 5112e5b6d6dSopenharmony_ci u_fprintf(file, " "); 5122e5b6d6dSopenharmony_ci} 5132e5b6d6dSopenharmony_ci 5142e5b6d6dSopenharmony_civoid printStrength(Line *line, UFILE *file) { 5152e5b6d6dSopenharmony_ci printStrength(line->strength, file); 5162e5b6d6dSopenharmony_ci} 5172e5b6d6dSopenharmony_ci 5182e5b6d6dSopenharmony_civoid printLine(Line *line, UFILE *file) { 5192e5b6d6dSopenharmony_ci escapeALine(line, file); 5202e5b6d6dSopenharmony_ci if(line->isExpansion) { 5212e5b6d6dSopenharmony_ci u_fprintf(file, "/"); 5222e5b6d6dSopenharmony_ci escapeExpansion(line, file); 5232e5b6d6dSopenharmony_ci } 5242e5b6d6dSopenharmony_ci} 5252e5b6d6dSopenharmony_ci 5262e5b6d6dSopenharmony_civoid printOrdering(Line **lines, int32_t size, UFILE *file, UBool useLinks = false) { 5272e5b6d6dSopenharmony_ci int32_t i = 0; 5282e5b6d6dSopenharmony_ci 5292e5b6d6dSopenharmony_ci //printLine(*lines); 5302e5b6d6dSopenharmony_ci //escapeALine(*lines); // Print first line 5312e5b6d6dSopenharmony_ci 5322e5b6d6dSopenharmony_ci Line *line = NULL; 5332e5b6d6dSopenharmony_ci Line *previous = *lines; 5342e5b6d6dSopenharmony_ci if(previous->isReset) { 5352e5b6d6dSopenharmony_ci u_fprintf(file, "\n& "); 5362e5b6d6dSopenharmony_ci escapeALine(previous, file); 5372e5b6d6dSopenharmony_ci } else if(!previous->isRemoved) { 5382e5b6d6dSopenharmony_ci printLine(previous, file); 5392e5b6d6dSopenharmony_ci } 5402e5b6d6dSopenharmony_ci i = 1; 5412e5b6d6dSopenharmony_ci while(i < size && previous->next) { 5422e5b6d6dSopenharmony_ci if(useLinks) { 5432e5b6d6dSopenharmony_ci line = previous->next; 5442e5b6d6dSopenharmony_ci } else { 5452e5b6d6dSopenharmony_ci line = *(lines+i); 5462e5b6d6dSopenharmony_ci } 5472e5b6d6dSopenharmony_ci if(line->isReset) { 5482e5b6d6dSopenharmony_ci u_fprintf(file, "\n& "); 5492e5b6d6dSopenharmony_ci escapeALine(line, file); 5502e5b6d6dSopenharmony_ci } else if(!line->isRemoved) { 5512e5b6d6dSopenharmony_ci if(file == out) { 5522e5b6d6dSopenharmony_ci u_fprintf(file, "\n"); 5532e5b6d6dSopenharmony_ci } 5542e5b6d6dSopenharmony_ci if(i > 0) { 5552e5b6d6dSopenharmony_ci printStrength(line, file); 5562e5b6d6dSopenharmony_ci } 5572e5b6d6dSopenharmony_ci printLine(line, file); 5582e5b6d6dSopenharmony_ci //escapeALine(line, file); 5592e5b6d6dSopenharmony_ci } 5602e5b6d6dSopenharmony_ci previous = line; 5612e5b6d6dSopenharmony_ci i++; 5622e5b6d6dSopenharmony_ci } 5632e5b6d6dSopenharmony_ci u_fprintf(file, "\n"); 5642e5b6d6dSopenharmony_ci} 5652e5b6d6dSopenharmony_ci 5662e5b6d6dSopenharmony_ci 5672e5b6d6dSopenharmony_civoid setIndexes(Line **lines, int32_t size) { 5682e5b6d6dSopenharmony_ci int32_t i = 0; 5692e5b6d6dSopenharmony_ci (*lines)->sortedIndex = 0; 5702e5b6d6dSopenharmony_ci for(i = 1; i < size; i++) { 5712e5b6d6dSopenharmony_ci Line *line = *(lines+i); 5722e5b6d6dSopenharmony_ci Line *prev = *(lines+i-1); 5732e5b6d6dSopenharmony_ci line->previous = prev; 5742e5b6d6dSopenharmony_ci prev->next = line; 5752e5b6d6dSopenharmony_ci line->sortedIndex = i; 5762e5b6d6dSopenharmony_ci } 5772e5b6d6dSopenharmony_ci} 5782e5b6d6dSopenharmony_ci 5792e5b6d6dSopenharmony_ci 5802e5b6d6dSopenharmony_ci// this seems to be a dead end 5812e5b6d6dSopenharmony_civoid 5822e5b6d6dSopenharmony_cinoteExpansion(Line **gLines, Line *line, int32_t size, CompareFn comparer) { 5832e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 5842e5b6d6dSopenharmony_ci 5852e5b6d6dSopenharmony_ci UnicodeString key(line->name, line->len); 5862e5b6d6dSopenharmony_ci //Line *toInsert = (Line *)gElements.get(key); 5872e5b6d6dSopenharmony_ci Line *toInsert = (Line *)gExpansions.get(key); 5882e5b6d6dSopenharmony_ci if(toInsert != NULL) { 5892e5b6d6dSopenharmony_ci toInsert->isExpansion = true; 5902e5b6d6dSopenharmony_ci u_strcpy(toInsert->expansionString, line->expansionString); 5912e5b6d6dSopenharmony_ci toInsert->expLen = line->expLen; 5922e5b6d6dSopenharmony_ci toInsert->previous->next = toInsert->next; 5932e5b6d6dSopenharmony_ci toInsert->next->previous = toInsert->previous; 5942e5b6d6dSopenharmony_ci gElements.remove(key); 5952e5b6d6dSopenharmony_ci } else { 5962e5b6d6dSopenharmony_ci toInsert = new Line(*line); 5972e5b6d6dSopenharmony_ci toInsert->isExpansion = true; 5982e5b6d6dSopenharmony_ci gElements.put(UnicodeString(toInsert->name, toInsert->len), toInsert, status); 5992e5b6d6dSopenharmony_ci } 6002e5b6d6dSopenharmony_ci 6012e5b6d6dSopenharmony_ci int32_t i = 0; 6022e5b6d6dSopenharmony_ci Line testLine; 6032e5b6d6dSopenharmony_ci Line *l = &testLine; 6042e5b6d6dSopenharmony_ci for(i = 0; i < size; i++) { 6052e5b6d6dSopenharmony_ci u_strcpy(testLine.name, (*(gLines+i))->name); 6062e5b6d6dSopenharmony_ci u_strcat(testLine.name, line->expansionString); 6072e5b6d6dSopenharmony_ci testLine.len = (*(gLines+i))->len + line->expLen; 6082e5b6d6dSopenharmony_ci if(comparer(&l, &line) > 0) { 6092e5b6d6dSopenharmony_ci toInsert->previous = *(gLines+i-1); 6102e5b6d6dSopenharmony_ci toInsert->next = *(gLines+i); 6112e5b6d6dSopenharmony_ci toInsert->previous->next = toInsert; 6122e5b6d6dSopenharmony_ci toInsert->next->previous = toInsert; 6132e5b6d6dSopenharmony_ci break; 6142e5b6d6dSopenharmony_ci } 6152e5b6d6dSopenharmony_ci } 6162e5b6d6dSopenharmony_ci if(gVerbose) { 6172e5b6d6dSopenharmony_ci u_fprintf(log, "Adding expansion\n"); 6182e5b6d6dSopenharmony_ci escapeALine(line, log); 6192e5b6d6dSopenharmony_ci u_fprintf(log, "/"); 6202e5b6d6dSopenharmony_ci escapeExpansion(line, log); 6212e5b6d6dSopenharmony_ci u_fprintf(log, " "); 6222e5b6d6dSopenharmony_ci } 6232e5b6d6dSopenharmony_ci} 6242e5b6d6dSopenharmony_ci 6252e5b6d6dSopenharmony_civoid 6262e5b6d6dSopenharmony_cipositionExpansions(Line **gLines, int32_t size, CompareFn comparer) { 6272e5b6d6dSopenharmony_ci int result = 0; 6282e5b6d6dSopenharmony_ci Line *line = NULL; 6292e5b6d6dSopenharmony_ci Line *toMove = NULL; 6302e5b6d6dSopenharmony_ci int32_t i = 0, j = 0; 6312e5b6d6dSopenharmony_ci Line **sortedExpansions = new Line*[gExpansions.count()]; 6322e5b6d6dSopenharmony_ci int32_t sortedExpansionsSize = setArray(sortedExpansions, &gExpansions); 6332e5b6d6dSopenharmony_ci qsort(sortedExpansions, sortedExpansionsSize, sizeof(Line *), comparer); 6342e5b6d6dSopenharmony_ci // Make a list of things in the vincinity of expansion candidate 6352e5b6d6dSopenharmony_ci for(j = 0; j < sortedExpansionsSize; j++) { 6362e5b6d6dSopenharmony_ci line = *(sortedExpansions+j); 6372e5b6d6dSopenharmony_ci UnicodeString key(line->name, line->len); 6382e5b6d6dSopenharmony_ci toMove = (Line *)gElements.get(key); 6392e5b6d6dSopenharmony_ci int32_t i = 0; 6402e5b6d6dSopenharmony_ci Line testLine, prevTestLine; 6412e5b6d6dSopenharmony_ci Line *l = &testLine; 6422e5b6d6dSopenharmony_ci Line *prevL = &prevTestLine; 6432e5b6d6dSopenharmony_ci // This can be further optimized, since we now know that we have a 6442e5b6d6dSopenharmony_ci // sorted list of expansions, so current can start from toMove, since all 6452e5b6d6dSopenharmony_ci // the elements before it are already smaller. In the beginning it needs to 6462e5b6d6dSopenharmony_ci // be on gLines, though. 6472e5b6d6dSopenharmony_ci Line *current = *gLines; 6482e5b6d6dSopenharmony_ci while(current) { 6492e5b6d6dSopenharmony_ci if(current == toMove) { 6502e5b6d6dSopenharmony_ci // we are wading through a sorted list 6512e5b6d6dSopenharmony_ci // if we found ourselves, it means that we 6522e5b6d6dSopenharmony_ci // are already in a right place, so no moving 6532e5b6d6dSopenharmony_ci // is needed, but we need to make sure we have 6542e5b6d6dSopenharmony_ci // the right strength. 6552e5b6d6dSopenharmony_ci toMove->strength = probeStrength(&prevL, &toMove, comparer); 6562e5b6d6dSopenharmony_ci if(0) { 6572e5b6d6dSopenharmony_ci u_fprintf(log, "Positioned expansion without moving "); 6582e5b6d6dSopenharmony_ci printLine(toMove, log); 6592e5b6d6dSopenharmony_ci u_fprintf(log, " new ordering: \n"); 6602e5b6d6dSopenharmony_ci printOrdering(gLines, size, log, true); 6612e5b6d6dSopenharmony_ci } 6622e5b6d6dSopenharmony_ci break; 6632e5b6d6dSopenharmony_ci } else { 6642e5b6d6dSopenharmony_ci u_strcpy(testLine.name, current->name); 6652e5b6d6dSopenharmony_ci if(!current->isExpansion) { 6662e5b6d6dSopenharmony_ci u_strcat(testLine.name, line->expansionString); 6672e5b6d6dSopenharmony_ci testLine.len = current->len + line->expLen; 6682e5b6d6dSopenharmony_ci } else { 6692e5b6d6dSopenharmony_ci testLine.len = current->len; 6702e5b6d6dSopenharmony_ci } 6712e5b6d6dSopenharmony_ci if(comparer(&l, &line) > 0) { 6722e5b6d6dSopenharmony_ci // remove from chain 6732e5b6d6dSopenharmony_ci if(toMove->next) { 6742e5b6d6dSopenharmony_ci toMove->next->strength = probeStrength(&(toMove->previous), &(toMove->next), comparer); 6752e5b6d6dSopenharmony_ci toMove->next->previous = toMove->previous; 6762e5b6d6dSopenharmony_ci } 6772e5b6d6dSopenharmony_ci if(toMove->previous) { 6782e5b6d6dSopenharmony_ci toMove->previous->next = toMove->next; 6792e5b6d6dSopenharmony_ci } 6802e5b6d6dSopenharmony_ci 6812e5b6d6dSopenharmony_ci // insert 6822e5b6d6dSopenharmony_ci toMove->previous = current->previous; 6832e5b6d6dSopenharmony_ci toMove->next = current; 6842e5b6d6dSopenharmony_ci 6852e5b6d6dSopenharmony_ci if(current->previous) { 6862e5b6d6dSopenharmony_ci current->previous->next = toMove; 6872e5b6d6dSopenharmony_ci } 6882e5b6d6dSopenharmony_ci current->previous = toMove; 6892e5b6d6dSopenharmony_ci 6902e5b6d6dSopenharmony_ci toMove->strength = probeStrength(&prevL, &toMove, comparer); 6912e5b6d6dSopenharmony_ci toMove->next->strength = probeStrength(&toMove, &l, comparer); 6922e5b6d6dSopenharmony_ci if(0) { 6932e5b6d6dSopenharmony_ci u_fprintf(log, "Positioned expansion "); 6942e5b6d6dSopenharmony_ci printLine(toMove, log); 6952e5b6d6dSopenharmony_ci u_fprintf(log, " new ordering: \n"); 6962e5b6d6dSopenharmony_ci printOrdering(gLines, size, log, true); 6972e5b6d6dSopenharmony_ci } 6982e5b6d6dSopenharmony_ci if(toMove->strength == UCOL_IDENTICAL) { 6992e5b6d6dSopenharmony_ci // check for craziness such as s = ss/s 7002e5b6d6dSopenharmony_ci // such line would consist of previous (or next) concatenated with the expansion value 7012e5b6d6dSopenharmony_ci // make a test 7022e5b6d6dSopenharmony_ci UChar fullString[256]; 7032e5b6d6dSopenharmony_ci u_strcpy(fullString, toMove->previous->name); 7042e5b6d6dSopenharmony_ci u_strcat(fullString, toMove->expansionString); 7052e5b6d6dSopenharmony_ci if(u_strcmp(fullString, toMove->name) == 0) { 7062e5b6d6dSopenharmony_ci toMove->previous->next = toMove->next; 7072e5b6d6dSopenharmony_ci toMove->next->previous = toMove->previous; 7082e5b6d6dSopenharmony_ci toMove->isRemoved = true; 7092e5b6d6dSopenharmony_ci u_fprintf(log, "Removed: "); 7102e5b6d6dSopenharmony_ci printLine(toMove, log); 7112e5b6d6dSopenharmony_ci u_fprintf(log, "\n"); 7122e5b6d6dSopenharmony_ci } 7132e5b6d6dSopenharmony_ci } else if(toMove->next->strength == UCOL_IDENTICAL) { 7142e5b6d6dSopenharmony_ci UChar fullString[256]; 7152e5b6d6dSopenharmony_ci u_strcpy(fullString, toMove->next->name); 7162e5b6d6dSopenharmony_ci u_strcat(fullString, toMove->expansionString); 7172e5b6d6dSopenharmony_ci if(u_strcmp(fullString, toMove->name) == 0) { 7182e5b6d6dSopenharmony_ci toMove->next->strength = toMove->strength; 7192e5b6d6dSopenharmony_ci toMove->previous->next = toMove->next; 7202e5b6d6dSopenharmony_ci toMove->next->previous = toMove->previous; 7212e5b6d6dSopenharmony_ci toMove->isRemoved = true; 7222e5b6d6dSopenharmony_ci u_fprintf(log, "Removed because of back: "); 7232e5b6d6dSopenharmony_ci printLine(toMove, log); 7242e5b6d6dSopenharmony_ci u_fprintf(log, "\n"); 7252e5b6d6dSopenharmony_ci } 7262e5b6d6dSopenharmony_ci } 7272e5b6d6dSopenharmony_ci break; 7282e5b6d6dSopenharmony_ci } 7292e5b6d6dSopenharmony_ci prevTestLine = testLine; 7302e5b6d6dSopenharmony_ci } 7312e5b6d6dSopenharmony_ci current = current->next; 7322e5b6d6dSopenharmony_ci } 7332e5b6d6dSopenharmony_ci } 7342e5b6d6dSopenharmony_ci delete[] sortedExpansions; 7352e5b6d6dSopenharmony_ci} 7362e5b6d6dSopenharmony_ci 7372e5b6d6dSopenharmony_ci 7382e5b6d6dSopenharmony_civoid 7392e5b6d6dSopenharmony_cinoteExpansion(Line *line) { 7402e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 7412e5b6d6dSopenharmony_ci UnicodeString key(line->name, line->len); 7422e5b6d6dSopenharmony_ci Line *el = (Line *)gElements.get(key); 7432e5b6d6dSopenharmony_ci if(el != NULL) { 7442e5b6d6dSopenharmony_ci el->isExpansion = true; 7452e5b6d6dSopenharmony_ci u_strcpy(el->expansionString, line->expansionString); 7462e5b6d6dSopenharmony_ci el->expLen = line->expLen; 7472e5b6d6dSopenharmony_ci } else { 7482e5b6d6dSopenharmony_ci Line *toInsert = new Line(*line); 7492e5b6d6dSopenharmony_ci toInsert->isExpansion = true; 7502e5b6d6dSopenharmony_ci gElements.put(UnicodeString(line->name, line->len), toInsert, status); 7512e5b6d6dSopenharmony_ci } 7522e5b6d6dSopenharmony_ci 7532e5b6d6dSopenharmony_ci Line *el2 = (Line *)gExpansions.get(key); 7542e5b6d6dSopenharmony_ci el2->isExpansion = true; 7552e5b6d6dSopenharmony_ci u_strcpy(el2->expansionString, line->expansionString); 7562e5b6d6dSopenharmony_ci el2->expLen = line->expLen; 7572e5b6d6dSopenharmony_ci 7582e5b6d6dSopenharmony_ci if(gDebug) { 7592e5b6d6dSopenharmony_ci u_fprintf(log, "Adding expansion\n"); 7602e5b6d6dSopenharmony_ci printLine(line, log); 7612e5b6d6dSopenharmony_ci u_fprintf(log, "\n"); 7622e5b6d6dSopenharmony_ci } 7632e5b6d6dSopenharmony_ci} 7642e5b6d6dSopenharmony_ci 7652e5b6d6dSopenharmony_civoid 7662e5b6d6dSopenharmony_cinoteContraction(Line *line) { 7672e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 7682e5b6d6dSopenharmony_ci Line *toInsert = new Line(*line); 7692e5b6d6dSopenharmony_ci toInsert->isContraction = true; 7702e5b6d6dSopenharmony_ci gElements.put(UnicodeString(line->name, line->len), toInsert, status); 7712e5b6d6dSopenharmony_ci if(gVerbose) { 7722e5b6d6dSopenharmony_ci u_fprintf(log, "Adding contraction\n"); 7732e5b6d6dSopenharmony_ci escapeALine(line, log); 7742e5b6d6dSopenharmony_ci u_fprintf(log, " "); 7752e5b6d6dSopenharmony_ci } 7762e5b6d6dSopenharmony_ci} 7772e5b6d6dSopenharmony_ci 7782e5b6d6dSopenharmony_civoid 7792e5b6d6dSopenharmony_cinoteElement(Line *line) { 7802e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 7812e5b6d6dSopenharmony_ci Line *toInsert = new Line(*line); 7822e5b6d6dSopenharmony_ci gElements.put(UnicodeString(line->name, line->len), toInsert, status); 7832e5b6d6dSopenharmony_ci if(0) { //if(gDebug) 7842e5b6d6dSopenharmony_ci escapeALine(line, log); 7852e5b6d6dSopenharmony_ci u_fprintf(log, " "); 7862e5b6d6dSopenharmony_ci } 7872e5b6d6dSopenharmony_ci} 7882e5b6d6dSopenharmony_ci 7892e5b6d6dSopenharmony_ci 7902e5b6d6dSopenharmony_ci 7912e5b6d6dSopenharmony_ci// This function checks if a combination of characters has changed place with the 7922e5b6d6dSopenharmony_ci// adjacent elements. If so, these are most probably contractions. 7932e5b6d6dSopenharmony_ci// However, it still needs to be checked if these contractions are fake - the 7942e5b6d6dSopenharmony_ci// test is simple - if xy is suspected contraction, if we get that x/y is expansion, then 7952e5b6d6dSopenharmony_ci// xy is a fake contraction. 7962e5b6d6dSopenharmony_ciint32_t 7972e5b6d6dSopenharmony_cianalyzeContractions(Line** lines, int32_t size, CompareFn comparer) { 7982e5b6d6dSopenharmony_ci int32_t i = 0, j = 0; 7992e5b6d6dSopenharmony_ci int32_t outOfOrder = 0; 8002e5b6d6dSopenharmony_ci UColAttributeValue strength = UCOL_OFF; 8012e5b6d6dSopenharmony_ci UColAttributeValue currStrength = UCOL_OFF; 8022e5b6d6dSopenharmony_ci Line **prevLine = lines; 8032e5b6d6dSopenharmony_ci Line **currLine = NULL; 8042e5b6d6dSopenharmony_ci Line **backupLine = NULL; 8052e5b6d6dSopenharmony_ci UBool prevIsContraction = false, currIsContraction = false; 8062e5b6d6dSopenharmony_ci // Problem here is detecting a contraction that is at the very end of the sorted list 8072e5b6d6dSopenharmony_ci for(i = 1; i < size; i++) { 8082e5b6d6dSopenharmony_ci currLine = lines+i; 8092e5b6d6dSopenharmony_ci strength = probeStrength(prevLine, currLine, comparer); 8102e5b6d6dSopenharmony_ci if(strength == UCOL_OFF || strength != (*currLine)->strength) { 8112e5b6d6dSopenharmony_ci prevIsContraction = false; 8122e5b6d6dSopenharmony_ci currIsContraction = false; 8132e5b6d6dSopenharmony_ci if(!outOfOrder) { 8142e5b6d6dSopenharmony_ci if(gVerbose) { 8152e5b6d6dSopenharmony_ci u_fprintf(log, "Possible contractions: "); 8162e5b6d6dSopenharmony_ci } 8172e5b6d6dSopenharmony_ci } 8182e5b6d6dSopenharmony_ci // now we have two elements that are different. The question is, 8192e5b6d6dSopenharmony_ci // which one of them is the contraction - which one has moved. 8202e5b6d6dSopenharmony_ci // Could be the previous, but could also be the current. 8212e5b6d6dSopenharmony_ci 8222e5b6d6dSopenharmony_ci outOfOrder++; 8232e5b6d6dSopenharmony_ci 8242e5b6d6dSopenharmony_ci // First, lets check whether the previous has jumped back 8252e5b6d6dSopenharmony_ci j = i+1; 8262e5b6d6dSopenharmony_ci // skip all the nexts that have smaller strength, they don't have an effect 8272e5b6d6dSopenharmony_ci while(j < size && (*(lines+j))->strength > (*currLine)->strength) { 8282e5b6d6dSopenharmony_ci j++; 8292e5b6d6dSopenharmony_ci } 8302e5b6d6dSopenharmony_ci // check if there are other elements of same or greater strength 8312e5b6d6dSopenharmony_ci while(j < size && 8322e5b6d6dSopenharmony_ci (strength = probeStrength(prevLine, (backupLine = lines+j), comparer)) == UCOL_OFF) { 8332e5b6d6dSopenharmony_ci j++; 8342e5b6d6dSopenharmony_ci // if we skipped more than one, it might be in fact a contraction 8352e5b6d6dSopenharmony_ci prevIsContraction = true; 8362e5b6d6dSopenharmony_ci } 8372e5b6d6dSopenharmony_ci if(prevIsContraction) { 8382e5b6d6dSopenharmony_ci noteContraction(*prevLine); 8392e5b6d6dSopenharmony_ci j = i-2; 8402e5b6d6dSopenharmony_ci // add all the previous elements with smaller strength, since they also 8412e5b6d6dSopenharmony_ci // will jump over and are contractions 8422e5b6d6dSopenharmony_ci while(j >= 0 && (*(lines+j+1))->strength > (*currLine)->strength) { 8432e5b6d6dSopenharmony_ci strength = probeStrength(lines+j, currLine, comparer); 8442e5b6d6dSopenharmony_ci if(strength == UCOL_OFF) { 8452e5b6d6dSopenharmony_ci noteContraction(*(lines+j)); 8462e5b6d6dSopenharmony_ci } 8472e5b6d6dSopenharmony_ci j--; 8482e5b6d6dSopenharmony_ci } 8492e5b6d6dSopenharmony_ci } 8502e5b6d6dSopenharmony_ci 8512e5b6d6dSopenharmony_ci // now we check if the current element is jumping forward, 8522e5b6d6dSopenharmony_ci // the dance steps are analogous to above. 8532e5b6d6dSopenharmony_ci j = i - 2; 8542e5b6d6dSopenharmony_ci while(j >= 0 && (*(lines+j+1))->strength > (*currLine)->strength) { 8552e5b6d6dSopenharmony_ci j--; 8562e5b6d6dSopenharmony_ci } 8572e5b6d6dSopenharmony_ci while(j >= 0 && 8582e5b6d6dSopenharmony_ci (strength = probeStrength((backupLine = lines+j), currLine, comparer)) == UCOL_OFF) { 8592e5b6d6dSopenharmony_ci j--; 8602e5b6d6dSopenharmony_ci currIsContraction = true; 8612e5b6d6dSopenharmony_ci } 8622e5b6d6dSopenharmony_ci if(currIsContraction) { 8632e5b6d6dSopenharmony_ci if(gVerbose) { 8642e5b6d6dSopenharmony_ci escapeALine(*currLine, log); 8652e5b6d6dSopenharmony_ci u_fprintf(log, " "); 8662e5b6d6dSopenharmony_ci } 8672e5b6d6dSopenharmony_ci j = i+1; 8682e5b6d6dSopenharmony_ci while(j < size && (*(lines+j))->strength > (*currLine)->strength) { 8692e5b6d6dSopenharmony_ci strength = probeStrength(prevLine, lines+j, comparer); 8702e5b6d6dSopenharmony_ci if(strength == UCOL_OFF) { 8712e5b6d6dSopenharmony_ci noteContraction(*(lines+j)); 8722e5b6d6dSopenharmony_ci } 8732e5b6d6dSopenharmony_ci j++; 8742e5b6d6dSopenharmony_ci } 8752e5b6d6dSopenharmony_ci } 8762e5b6d6dSopenharmony_ci 8772e5b6d6dSopenharmony_ci // Not sure about either. List both and then check 8782e5b6d6dSopenharmony_ci if(!(prevIsContraction || currIsContraction)) { 8792e5b6d6dSopenharmony_ci noteContraction(*prevLine); 8802e5b6d6dSopenharmony_ci noteContraction(*currLine); 8812e5b6d6dSopenharmony_ci } 8822e5b6d6dSopenharmony_ci } 8832e5b6d6dSopenharmony_ci prevLine = currLine; 8842e5b6d6dSopenharmony_ci } 8852e5b6d6dSopenharmony_ci if(outOfOrder) { 8862e5b6d6dSopenharmony_ci if(gVerbose) { 8872e5b6d6dSopenharmony_ci u_fprintf(log, "\n"); 8882e5b6d6dSopenharmony_ci } 8892e5b6d6dSopenharmony_ci } 8902e5b6d6dSopenharmony_ci return outOfOrder; 8912e5b6d6dSopenharmony_ci} 8922e5b6d6dSopenharmony_ci 8932e5b6d6dSopenharmony_ciint32_t 8942e5b6d6dSopenharmony_cidetectContractions(Line **gLines, Line *lines, int32_t size, CompareFn comparer) { 8952e5b6d6dSopenharmony_ci int32_t i = 0, j = 0; 8962e5b6d6dSopenharmony_ci int32_t noContractions = 0; 8972e5b6d6dSopenharmony_ci // Create and compare doubles: 8982e5b6d6dSopenharmony_ci Line *backupLines = new Line[size]; 8992e5b6d6dSopenharmony_ci Line::copyArray(backupLines, lines, size); 9002e5b6d6dSopenharmony_ci // detect contractions 9012e5b6d6dSopenharmony_ci 9022e5b6d6dSopenharmony_ci Line **gLinesBackup = NULL; //new Line*[size]; 9032e5b6d6dSopenharmony_ci 9042e5b6d6dSopenharmony_ci for(i = 0; i < size; i++) { 9052e5b6d6dSopenharmony_ci // preserve index and previous 9062e5b6d6dSopenharmony_ci Line::copyArray(lines, backupLines, size); 9072e5b6d6dSopenharmony_ci for(j = 0; j < size; j++) { 9082e5b6d6dSopenharmony_ci u_strcpy(lines[j].name, backupLines[i].name); 9092e5b6d6dSopenharmony_ci u_strcat(lines[j].name, backupLines[j].name); 9102e5b6d6dSopenharmony_ci lines[j].len = backupLines[i].len+backupLines[j].len; 9112e5b6d6dSopenharmony_ci } 9122e5b6d6dSopenharmony_ci 9132e5b6d6dSopenharmony_ci if((noContractions += analyzeContractions(gLines, size, comparer)) && gDebug) { 9142e5b6d6dSopenharmony_ci if(gLinesBackup == NULL) { 9152e5b6d6dSopenharmony_ci gLinesBackup = new Line*[size]; 9162e5b6d6dSopenharmony_ci } 9172e5b6d6dSopenharmony_ci // Show the sorted doubles, for debugging 9182e5b6d6dSopenharmony_ci setArray(gLinesBackup, lines, size); 9192e5b6d6dSopenharmony_ci qsort(gLinesBackup, size, sizeof(Line *), comparer); 9202e5b6d6dSopenharmony_ci //setIndexes(gLinesBackup, size); 9212e5b6d6dSopenharmony_ci analyzeStrength(gLinesBackup, size, comparer); 9222e5b6d6dSopenharmony_ci printOrdering(gLinesBackup, size, log); 9232e5b6d6dSopenharmony_ci } 9242e5b6d6dSopenharmony_ci if(!gQuiet) { 9252e5b6d6dSopenharmony_ci u_fprintf(log, "."); 9262e5b6d6dSopenharmony_ci } 9272e5b6d6dSopenharmony_ci } 9282e5b6d6dSopenharmony_ci if(!gQuiet) { 9292e5b6d6dSopenharmony_ci u_fprintf(log, "\n"); 9302e5b6d6dSopenharmony_ci } 9312e5b6d6dSopenharmony_ci delete[] backupLines; 9322e5b6d6dSopenharmony_ci if(gLinesBackup) { 9332e5b6d6dSopenharmony_ci delete[] gLinesBackup; 9342e5b6d6dSopenharmony_ci } 9352e5b6d6dSopenharmony_ci return noContractions; 9362e5b6d6dSopenharmony_ci} 9372e5b6d6dSopenharmony_ci 9382e5b6d6dSopenharmony_ci// gLines in this function is an array of sorted pointers. 9392e5b6d6dSopenharmony_ci// Contractions are already included. 9402e5b6d6dSopenharmony_ciint32_t 9412e5b6d6dSopenharmony_cidetectExpansions(Line **gLines, int32_t size, CompareFn comparer) { 9422e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 9432e5b6d6dSopenharmony_ci // detect expansions 9442e5b6d6dSopenharmony_ci 9452e5b6d6dSopenharmony_ci UColAttributeValue startStrength = UCOL_OFF, endStrength = UCOL_OFF, 9462e5b6d6dSopenharmony_ci strength = UCOL_OFF, previousStrength = UCOL_OFF; 9472e5b6d6dSopenharmony_ci Line start, end, src; 9482e5b6d6dSopenharmony_ci Line *startP = &start, *endP = &end, *srcP = &src; 9492e5b6d6dSopenharmony_ci Line *current = NULL; 9502e5b6d6dSopenharmony_ci memset(startP, 0, sizeof(Line)); 9512e5b6d6dSopenharmony_ci memset(endP, 0, sizeof(Line)); 9522e5b6d6dSopenharmony_ci memset(srcP, 0, sizeof(Line)); 9532e5b6d6dSopenharmony_ci int32_t srcLen; 9542e5b6d6dSopenharmony_ci int32_t i = 0, j = 0, k = 0; 9552e5b6d6dSopenharmony_ci for(i = 0; i < size; i++) { 9562e5b6d6dSopenharmony_ci u_strcpy(start.name, (*(gLines+i))->name); 9572e5b6d6dSopenharmony_ci u_strcpy(end.name, (*(gLines+i))->name); 9582e5b6d6dSopenharmony_ci srcLen = (*(gLines+i))->len; 9592e5b6d6dSopenharmony_ci u_strcpy(start.name+srcLen, (*(gLines))->name); 9602e5b6d6dSopenharmony_ci start.len = srcLen + (*(gLines))->len; 9612e5b6d6dSopenharmony_ci u_strcpy(end.name+srcLen, (*(gLines+size-1))->name); 9622e5b6d6dSopenharmony_ci end.len = srcLen + (*(gLines+size-1))->len; 9632e5b6d6dSopenharmony_ci 9642e5b6d6dSopenharmony_ci for(k = 0; k < size; k++) { // k is index of a thing that is not doubled 9652e5b6d6dSopenharmony_ci current = *(gLines+k); 9662e5b6d6dSopenharmony_ci // see if we have moved to front 9672e5b6d6dSopenharmony_ci // has it moved to the very beginning 9682e5b6d6dSopenharmony_ci if((startStrength = probeStrength((gLines+k), &startP, comparer)) != UCOL_OFF) { 9692e5b6d6dSopenharmony_ci continue; // this one is in the front 9702e5b6d6dSopenharmony_ci } 9712e5b6d6dSopenharmony_ci // has it moved to the very end? 9722e5b6d6dSopenharmony_ci if((endStrength = probeStrength(&endP, (gLines+k), comparer)) != UCOL_OFF) { 9732e5b6d6dSopenharmony_ci continue; // this one is in the back 9742e5b6d6dSopenharmony_ci } 9752e5b6d6dSopenharmony_ci // Potential Expansion 9762e5b6d6dSopenharmony_ci if(gDebug) { //gVerbose 9772e5b6d6dSopenharmony_ci u_fprintf(log, "Possible expansion: "); 9782e5b6d6dSopenharmony_ci escapeALine(*(gLines+k), log); 9792e5b6d6dSopenharmony_ci u_fprintf(log, " "); 9802e5b6d6dSopenharmony_ci } 9812e5b6d6dSopenharmony_ci // Now we have to make sure that this is really an expansion 9822e5b6d6dSopenharmony_ci // First, we have to find it 9832e5b6d6dSopenharmony_ci u_strcpy(src.name, (*(gLines+i))->name); 9842e5b6d6dSopenharmony_ci for(j = 0; j < size; j++) { 9852e5b6d6dSopenharmony_ci u_strcpy(src.name+srcLen, (*(gLines+j))->name); 9862e5b6d6dSopenharmony_ci src.len = srcLen + (*(gLines+j))->len; 9872e5b6d6dSopenharmony_ci if((strength = probeStrength(&srcP, (gLines+k), comparer)) == UCOL_OFF) { 9882e5b6d6dSopenharmony_ci strength = probeStrength((gLines+k), &srcP, comparer); 9892e5b6d6dSopenharmony_ci // we found it *(gLines+j-1) is the element that is interesting 9902e5b6d6dSopenharmony_ci // since gLines+j-1 < gLines+k < gLines+j 9912e5b6d6dSopenharmony_ci if(gDebug) { //gVerbose 9922e5b6d6dSopenharmony_ci u_fprintf(log, "i = %i, k = %i, j = %i ", i, k, j); 9932e5b6d6dSopenharmony_ci escapeALine(*(gLines+i), log); 9942e5b6d6dSopenharmony_ci escapeALine(*(gLines+j-1), log); 9952e5b6d6dSopenharmony_ci printStrength(previousStrength, log); 9962e5b6d6dSopenharmony_ci escapeALine(current, log); 9972e5b6d6dSopenharmony_ci printStrength(strength, log); 9982e5b6d6dSopenharmony_ci escapeALine(*(gLines+i), log); 9992e5b6d6dSopenharmony_ci escapeALine(*(gLines+j), log); 10002e5b6d6dSopenharmony_ci u_fprintf(log, "\n"); 10012e5b6d6dSopenharmony_ci } 10022e5b6d6dSopenharmony_ci // check whether it is a contraction that is the same as an expansion 10032e5b6d6dSopenharmony_ci // or a multi character that doesn't do anything 10042e5b6d6dSopenharmony_ci current->addExpansionHit(i, j); 10052e5b6d6dSopenharmony_ci current->isExpansion = true; 10062e5b6d6dSopenharmony_ci current->expIndex = k; 10072e5b6d6dSopenharmony_ci // cache expansion 10082e5b6d6dSopenharmony_ci gExpansions.put(UnicodeString(current->name, current->len), current, status); //new Line(*current) 10092e5b6d6dSopenharmony_ci break; 10102e5b6d6dSopenharmony_ci } 10112e5b6d6dSopenharmony_ci previousStrength = strength; 10122e5b6d6dSopenharmony_ci } 10132e5b6d6dSopenharmony_ci } 10142e5b6d6dSopenharmony_ci if(!gQuiet) { 10152e5b6d6dSopenharmony_ci u_fprintf(log, "."); 10162e5b6d6dSopenharmony_ci } 10172e5b6d6dSopenharmony_ci } 10182e5b6d6dSopenharmony_ci if(!gQuiet) { 10192e5b6d6dSopenharmony_ci u_fprintf(log, "\n"); 10202e5b6d6dSopenharmony_ci } 10212e5b6d6dSopenharmony_ci // now we have identified possible expansions. We need to find out how do they expand. 10222e5b6d6dSopenharmony_ci // Let's iterate over expansions cache - it's easier. 10232e5b6d6dSopenharmony_ci const UHashElement *el = NULL; 10242e5b6d6dSopenharmony_ci int32_t hashIndex = -1; 10252e5b6d6dSopenharmony_ci Line *doubles = new Line[size*10]; 10262e5b6d6dSopenharmony_ci Line **sorter = new Line*[size*10]; 10272e5b6d6dSopenharmony_ci int32_t currSize = 0; 10282e5b6d6dSopenharmony_ci int32_t newSize = 0; 10292e5b6d6dSopenharmony_ci Line *prev = NULL; 10302e5b6d6dSopenharmony_ci Line *next = NULL; 10312e5b6d6dSopenharmony_ci Line *origin = NULL; 10322e5b6d6dSopenharmony_ci int result = 0; 10332e5b6d6dSopenharmony_ci // Make a list of things in the vincinity of expansion candidate 10342e5b6d6dSopenharmony_ci // in expansionPrefixes and expansionAfter we have stored the 10352e5b6d6dSopenharmony_ci // prefixes of stuff that caused the detection of an expansion 10362e5b6d6dSopenharmony_ci // and a position where the expansion was. 10372e5b6d6dSopenharmony_ci // For example (icu, de__PHONEBOOK), we had: 10382e5b6d6dSopenharmony_ci // aE <<< \u00E4 < af 10392e5b6d6dSopenharmony_ci // AD < \u00E4 <<< Ae 10402e5b6d6dSopenharmony_ci // From that we will construct the following sequence: 10412e5b6d6dSopenharmony_ci // AD < aE <<< \u00E4/ <<< Ae < af 10422e5b6d6dSopenharmony_ci // then we will take the vincinity of \u00E4: 10432e5b6d6dSopenharmony_ci // aE <<< \u00E4/ <<< Ae 10442e5b6d6dSopenharmony_ci // then we will choose the smallest expansion to be the expansion 10452e5b6d6dSopenharmony_ci // part: 'e'. 10462e5b6d6dSopenharmony_ci // if there is equality, we choose the equal part: 10472e5b6d6dSopenharmony_ci // (win32, de__PHONEBOOK): 10482e5b6d6dSopenharmony_ci // AD < \u00E4/ = ae <<< aE <<< Ae 10492e5b6d6dSopenharmony_ci // we choose 'e'. 10502e5b6d6dSopenharmony_ci 10512e5b6d6dSopenharmony_ci while((el = gExpansions.nextElement(hashIndex)) != NULL) { 10522e5b6d6dSopenharmony_ci newSize = 0; 10532e5b6d6dSopenharmony_ci current = (Line *)el->value.pointer; 10542e5b6d6dSopenharmony_ci currSize = size*current->expansionPrefixesSize; 10552e5b6d6dSopenharmony_ci if(gDebug) { 10562e5b6d6dSopenharmony_ci escapeALine(current, log); 10572e5b6d6dSopenharmony_ci u_fprintf(log, " Number: %i\n", current->expansionPrefixesSize); 10582e5b6d6dSopenharmony_ci } 10592e5b6d6dSopenharmony_ci // construct the doubles 10602e5b6d6dSopenharmony_ci for(i = 0; i < current->expansionPrefixesSize; i++) { 10612e5b6d6dSopenharmony_ci doubles[newSize].suffix = current->expansionAfter[i]-1; 10622e5b6d6dSopenharmony_ci doubles[newSize++].setToConcat(*(gLines+current->expansionPrefixes[i]), *(gLines+current->expansionAfter[i]-1)); 10632e5b6d6dSopenharmony_ci doubles[newSize].suffix = current->expansionAfter[i]; 10642e5b6d6dSopenharmony_ci doubles[newSize++].setToConcat(*(gLines+current->expansionPrefixes[i]), *(gLines+current->expansionAfter[i])); 10652e5b6d6dSopenharmony_ci } 10662e5b6d6dSopenharmony_ci // add the expansion we're observing 10672e5b6d6dSopenharmony_ci doubles[newSize++] = *current; 10682e5b6d6dSopenharmony_ci setArray(sorter, doubles, newSize); 10692e5b6d6dSopenharmony_ci qsort(sorter, newSize, sizeof(Line*), comparer); 10702e5b6d6dSopenharmony_ci analyzeStrength(sorter, newSize, comparer); 10712e5b6d6dSopenharmony_ci if(gDebug) { 10722e5b6d6dSopenharmony_ci printOrdering(sorter, newSize, log); 10732e5b6d6dSopenharmony_ci } 10742e5b6d6dSopenharmony_ci i = 0; 10752e5b6d6dSopenharmony_ci while(**(sorter+i) != *current) { 10762e5b6d6dSopenharmony_ci i++; 10772e5b6d6dSopenharmony_ci } 10782e5b6d6dSopenharmony_ci // find the two additions 10792e5b6d6dSopenharmony_ci if((*(sorter+i))->strength == UCOL_IDENTICAL) { 10802e5b6d6dSopenharmony_ci // if we ae id 10812e5b6d6dSopenharmony_ci origin = *(gLines+((*(sorter+i-1))->suffix)); 10822e5b6d6dSopenharmony_ci u_strcpy(current->expansionString, origin->name); 10832e5b6d6dSopenharmony_ci current->expLen = origin->len; 10842e5b6d6dSopenharmony_ci } else if(i < newSize-1 && (*(sorter+i+1))->strength == UCOL_IDENTICAL) { 10852e5b6d6dSopenharmony_ci origin = *(gLines+((*(sorter+i+1))->suffix)); 10862e5b6d6dSopenharmony_ci u_strcpy(current->expansionString, origin->name); 10872e5b6d6dSopenharmony_ci current->expLen = origin->len; 10882e5b6d6dSopenharmony_ci } else { 10892e5b6d6dSopenharmony_ci if(i > 0) { 10902e5b6d6dSopenharmony_ci prev = *(gLines+(*(sorter+i-1))->suffix); 10912e5b6d6dSopenharmony_ci if(i < newSize-1) { 10922e5b6d6dSopenharmony_ci next = *(gLines+(*(sorter+i+1))->suffix); 10932e5b6d6dSopenharmony_ci result = comparer(&prev, &next); 10942e5b6d6dSopenharmony_ci if(result <= 0) { 10952e5b6d6dSopenharmony_ci u_strcpy(current->expansionString, prev->name); 10962e5b6d6dSopenharmony_ci current->expLen = prev->len; 10972e5b6d6dSopenharmony_ci } else { 10982e5b6d6dSopenharmony_ci u_strcpy(current->expansionString, next->name); 10992e5b6d6dSopenharmony_ci current->expLen = next->len; 11002e5b6d6dSopenharmony_ci } 11012e5b6d6dSopenharmony_ci } 11022e5b6d6dSopenharmony_ci } 11032e5b6d6dSopenharmony_ci if(0) { //if(gDebug) 11042e5b6d6dSopenharmony_ci u_fprintf(log, "Expansion is: "); 11052e5b6d6dSopenharmony_ci escapeALine(current, log); 11062e5b6d6dSopenharmony_ci u_fprintf(log, "/"); 11072e5b6d6dSopenharmony_ci escapeExpansion(current, log); 11082e5b6d6dSopenharmony_ci u_fprintf(log, "\n"); 11092e5b6d6dSopenharmony_ci } 11102e5b6d6dSopenharmony_ci } 11112e5b6d6dSopenharmony_ci noteExpansion(current); 11122e5b6d6dSopenharmony_ci //noteExpansion(gLines, current, size, comparer); 11132e5b6d6dSopenharmony_ci if(!gQuiet) { 11142e5b6d6dSopenharmony_ci u_fprintf(log, "."); 11152e5b6d6dSopenharmony_ci } 11162e5b6d6dSopenharmony_ci } 11172e5b6d6dSopenharmony_ci if(!gQuiet) { 11182e5b6d6dSopenharmony_ci u_fprintf(log, "\n"); 11192e5b6d6dSopenharmony_ci } 11202e5b6d6dSopenharmony_ci delete[] doubles; 11212e5b6d6dSopenharmony_ci delete[] sorter; 11222e5b6d6dSopenharmony_ci return gExpansions.count(); 11232e5b6d6dSopenharmony_ci} 11242e5b6d6dSopenharmony_ci 11252e5b6d6dSopenharmony_ciUBool 11262e5b6d6dSopenharmony_ciisTailored(Line *line, UErrorCode &status) { 11272e5b6d6dSopenharmony_ci UBool result = false; 11282e5b6d6dSopenharmony_ci UCollationElements *tailoring = ucol_openElements(gCol, line->name, line->len, &status); 11292e5b6d6dSopenharmony_ci UCollationElements *uca = ucol_openElements(gUCA, line->name, line->len, &status); 11302e5b6d6dSopenharmony_ci 11312e5b6d6dSopenharmony_ci int32_t tailElement = UCOL_NULLORDER; 11322e5b6d6dSopenharmony_ci int32_t ucaElement = UCOL_NULLORDER; 11332e5b6d6dSopenharmony_ci 11342e5b6d6dSopenharmony_ci do { 11352e5b6d6dSopenharmony_ci do { 11362e5b6d6dSopenharmony_ci tailElement = ucol_next(tailoring, &status); 11372e5b6d6dSopenharmony_ci } while(tailElement == 0); 11382e5b6d6dSopenharmony_ci do { 11392e5b6d6dSopenharmony_ci ucaElement = ucol_next(uca, &status); 11402e5b6d6dSopenharmony_ci } while(ucaElement == 0); 11412e5b6d6dSopenharmony_ci if(tailElement != ucaElement) { 11422e5b6d6dSopenharmony_ci result = true; 11432e5b6d6dSopenharmony_ci break; 11442e5b6d6dSopenharmony_ci } 11452e5b6d6dSopenharmony_ci } while (tailElement != UCOL_NULLORDER && ucaElement != UCOL_NULLORDER); 11462e5b6d6dSopenharmony_ci 11472e5b6d6dSopenharmony_ci ucol_closeElements(tailoring); 11482e5b6d6dSopenharmony_ci ucol_closeElements(uca); 11492e5b6d6dSopenharmony_ci return result; 11502e5b6d6dSopenharmony_ci} 11512e5b6d6dSopenharmony_ci 11522e5b6d6dSopenharmony_civoid 11532e5b6d6dSopenharmony_cireduceUntailored(Line **gLines, int32_t size){ 11542e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 11552e5b6d6dSopenharmony_ci Line *current = *(gLines); 11562e5b6d6dSopenharmony_ci Line *previous = NULL; 11572e5b6d6dSopenharmony_ci while(current) { 11582e5b6d6dSopenharmony_ci // if the current line is not tailored according to the UCA 11592e5b6d6dSopenharmony_ci if(!isTailored(current, status)) { 11602e5b6d6dSopenharmony_ci // we remove it 11612e5b6d6dSopenharmony_ci current->isRemoved = true; 11622e5b6d6dSopenharmony_ci } else { 11632e5b6d6dSopenharmony_ci // if it's tailored 11642e5b6d6dSopenharmony_ci if(current->previous && current->previous->isRemoved == true) { 11652e5b6d6dSopenharmony_ci previous = current->previous; 11662e5b6d6dSopenharmony_ci while(previous && (previous->strength > current->strength || previous->isExpansion || previous->isContraction) && previous->isRemoved) { 11672e5b6d6dSopenharmony_ci if(previous->previous && previous->previous->isRemoved) { 11682e5b6d6dSopenharmony_ci previous = previous->previous; 11692e5b6d6dSopenharmony_ci } else { 11702e5b6d6dSopenharmony_ci break; 11712e5b6d6dSopenharmony_ci } 11722e5b6d6dSopenharmony_ci } 11732e5b6d6dSopenharmony_ci if(previous) { 11742e5b6d6dSopenharmony_ci previous->isReset = true; 11752e5b6d6dSopenharmony_ci } else { 11762e5b6d6dSopenharmony_ci (*(gLines))->isReset = true; 11772e5b6d6dSopenharmony_ci } 11782e5b6d6dSopenharmony_ci } 11792e5b6d6dSopenharmony_ci } 11802e5b6d6dSopenharmony_ci current = current->next; 11812e5b6d6dSopenharmony_ci } 11822e5b6d6dSopenharmony_ci} 11832e5b6d6dSopenharmony_ci 11842e5b6d6dSopenharmony_civoid 11852e5b6d6dSopenharmony_ciconstructAndAnalyze(Line **gLines, Line *lines, int32_t size, CompareFn comparer) { 11862e5b6d6dSopenharmony_ci int32_t i = 0, j = 0, k = 0; 11872e5b6d6dSopenharmony_ci // setup our compare arrays to point to single set. 11882e5b6d6dSopenharmony_ci 11892e5b6d6dSopenharmony_ci // For contractions we need a block of data 11902e5b6d6dSopenharmony_ci setArray(gLines, lines, size); 11912e5b6d6dSopenharmony_ci //size = setArray(gLines); 11922e5b6d6dSopenharmony_ci 11932e5b6d6dSopenharmony_ci qsort(gLines, size, sizeof(Line *), comparer); 11942e5b6d6dSopenharmony_ci 11952e5b6d6dSopenharmony_ci // Establish who is previous according to the sort order 11962e5b6d6dSopenharmony_ci //setIndexes(gLines, size); 11972e5b6d6dSopenharmony_ci 11982e5b6d6dSopenharmony_ci analyzeStrength(gLines, size, comparer); 11992e5b6d6dSopenharmony_ci if(gVerbose) { 12002e5b6d6dSopenharmony_ci u_fprintf(log, "Ordering:\n"); 12012e5b6d6dSopenharmony_ci printOrdering(gLines, size, log); 12022e5b6d6dSopenharmony_ci } 12032e5b6d6dSopenharmony_ci 12042e5b6d6dSopenharmony_ci //showDifferences(exemplarSetSize); 12052e5b6d6dSopenharmony_ci //dumpData(exemplarSetSize); 12062e5b6d6dSopenharmony_ci 12072e5b6d6dSopenharmony_ci if(!gQuiet) { 12082e5b6d6dSopenharmony_ci u_fprintf(log, "Detecting contractions?\n"); 12092e5b6d6dSopenharmony_ci } 12102e5b6d6dSopenharmony_ci int32_t noContractions = 0; 12112e5b6d6dSopenharmony_ci noContractions = detectContractions(gLines, lines, size, comparer); 12122e5b6d6dSopenharmony_ci if(!gQuiet) { 12132e5b6d6dSopenharmony_ci u_fprintf(log, "Detected %i contractions\n", noContractions); 12142e5b6d6dSopenharmony_ci } 12152e5b6d6dSopenharmony_ci 12162e5b6d6dSopenharmony_ci // now we have suspected contractions in the table 12172e5b6d6dSopenharmony_ci // we have to re-sort the things 12182e5b6d6dSopenharmony_ci size = setArray(gLines); 12192e5b6d6dSopenharmony_ci qsort(gLines, size, sizeof(Line *), comparer); 12202e5b6d6dSopenharmony_ci analyzeStrength(gLines, size, comparer); 12212e5b6d6dSopenharmony_ci 12222e5b6d6dSopenharmony_ci if(!gQuiet) { 12232e5b6d6dSopenharmony_ci u_fprintf(log, "Detecting expansions\n"); 12242e5b6d6dSopenharmony_ci } 12252e5b6d6dSopenharmony_ci int32_t noExpansions = detectExpansions(gLines, size, comparer); 12262e5b6d6dSopenharmony_ci if(!gQuiet) { 12272e5b6d6dSopenharmony_ci u_fprintf(log, "Detected %i expansions\n", noExpansions); 12282e5b6d6dSopenharmony_ci } 12292e5b6d6dSopenharmony_ci 12302e5b6d6dSopenharmony_ci positionExpansions(gLines, size, comparer); 12312e5b6d6dSopenharmony_ci 12322e5b6d6dSopenharmony_ci if(gVerbose) { 12332e5b6d6dSopenharmony_ci u_fprintf(log, "After positioning expansions:\n"); 12342e5b6d6dSopenharmony_ci printOrdering(gLines, size, log, true); 12352e5b6d6dSopenharmony_ci } 12362e5b6d6dSopenharmony_ci //reduceUntailored(gLines, size); 12372e5b6d6dSopenharmony_ci if(!gQuiet) { 12382e5b6d6dSopenharmony_ci u_fprintf(out, "Final result\n"); 12392e5b6d6dSopenharmony_ci } 12402e5b6d6dSopenharmony_ci printOrdering(gLines, size, out, true); 12412e5b6d6dSopenharmony_ci printOrdering(gLines, size, log, true); 12422e5b6d6dSopenharmony_ci} 12432e5b6d6dSopenharmony_ci 12442e5b6d6dSopenharmony_ci// Check whether upper case comes before lower case or vice-versa 12452e5b6d6dSopenharmony_ciint32_t 12462e5b6d6dSopenharmony_cicheckCaseOrdering(void) { 12472e5b6d6dSopenharmony_ci UChar stuff[][3] = { 12482e5b6d6dSopenharmony_ci { 0x0061, separatorChar, 0x0061}, //"aa", 12492e5b6d6dSopenharmony_ci { 0x0061, separatorChar, 0x0041 }, //"a\\u00E0", 12502e5b6d6dSopenharmony_ci { 0x0041, separatorChar, 0x0061 }, //"\\u00E0a", 12512e5b6d6dSopenharmony_ci { 0x0041, separatorChar, 0x0041 }, //"\\u00E0a", 12522e5b6d6dSopenharmony_ci //{ 0x00E0, separatorChar, 0x00E0 } //"\\u00E0\\u00E0" 12532e5b6d6dSopenharmony_ci }; 12542e5b6d6dSopenharmony_ci const int32_t size = sizeof(stuff)/sizeof(stuff[0]); 12552e5b6d6dSopenharmony_ci 12562e5b6d6dSopenharmony_ci Line **sortedLines = new Line*[size]; 12572e5b6d6dSopenharmony_ci Line lines[size]; 12582e5b6d6dSopenharmony_ci 12592e5b6d6dSopenharmony_ci int32_t i = 0; 12602e5b6d6dSopenharmony_ci int32_t ordered = 0, reversed = 0; 12612e5b6d6dSopenharmony_ci 12622e5b6d6dSopenharmony_ci for(i = 0; i < size; i++) { 12632e5b6d6dSopenharmony_ci lines[i].setName(stuff[i], 3); 12642e5b6d6dSopenharmony_ci } 12652e5b6d6dSopenharmony_ci setArray(sortedLines, lines, size); 12662e5b6d6dSopenharmony_ci qsort(sortedLines, size, sizeof(Line*), gComparer); 12672e5b6d6dSopenharmony_ci 12682e5b6d6dSopenharmony_ci for(i = 0; i < size; i++) { 12692e5b6d6dSopenharmony_ci if(*(sortedLines+i) == &lines[i]) { 12702e5b6d6dSopenharmony_ci ordered++; 12712e5b6d6dSopenharmony_ci } 12722e5b6d6dSopenharmony_ci if(*(sortedLines+i) == &lines[size-i-1]) { 12732e5b6d6dSopenharmony_ci reversed++; 12742e5b6d6dSopenharmony_ci } 12752e5b6d6dSopenharmony_ci } 12762e5b6d6dSopenharmony_ci 12772e5b6d6dSopenharmony_ci delete[] sortedLines; 12782e5b6d6dSopenharmony_ci if(ordered == size) { 12792e5b6d6dSopenharmony_ci return 0; // in normal order 12802e5b6d6dSopenharmony_ci } else if(reversed == size) { 12812e5b6d6dSopenharmony_ci return 1; // in reversed order 12822e5b6d6dSopenharmony_ci } else { 12832e5b6d6dSopenharmony_ci return -1; // unknown order 12842e5b6d6dSopenharmony_ci } 12852e5b6d6dSopenharmony_ci} 12862e5b6d6dSopenharmony_ci 12872e5b6d6dSopenharmony_ci 12882e5b6d6dSopenharmony_ci// Check whether the secondaries are in the straight or reversed order 12892e5b6d6dSopenharmony_ciint32_t 12902e5b6d6dSopenharmony_cicheckSecondaryOrdering(void) { 12912e5b6d6dSopenharmony_ci UChar stuff[][5] = { 12922e5b6d6dSopenharmony_ci { 0x0061, separatorChar, 0x0061, separatorChar, 0x00E0 }, //"aa", 12932e5b6d6dSopenharmony_ci { 0x0061, separatorChar, 0x00E0, separatorChar, 0x0061 }, //"a\\u00E0", 12942e5b6d6dSopenharmony_ci { 0x00E0, separatorChar, 0x0061, separatorChar, 0x0061 }, //"\\u00E0a", 12952e5b6d6dSopenharmony_ci //{ 0x00E0, separatorChar, 0x00E0 } //"\\u00E0\\u00E0" 12962e5b6d6dSopenharmony_ci }; 12972e5b6d6dSopenharmony_ci const int32_t size = sizeof(stuff)/sizeof(stuff[0]); 12982e5b6d6dSopenharmony_ci 12992e5b6d6dSopenharmony_ci Line **sortedLines = new Line*[size]; 13002e5b6d6dSopenharmony_ci Line lines[size]; 13012e5b6d6dSopenharmony_ci 13022e5b6d6dSopenharmony_ci int32_t i = 0; 13032e5b6d6dSopenharmony_ci int32_t ordered = 0, reversed = 0; 13042e5b6d6dSopenharmony_ci 13052e5b6d6dSopenharmony_ci for(i = 0; i < size; i++) { 13062e5b6d6dSopenharmony_ci lines[i].setName(stuff[i], 5); 13072e5b6d6dSopenharmony_ci } 13082e5b6d6dSopenharmony_ci setArray(sortedLines, lines, size); 13092e5b6d6dSopenharmony_ci qsort(sortedLines, size, sizeof(Line*), gComparer); 13102e5b6d6dSopenharmony_ci 13112e5b6d6dSopenharmony_ci for(i = 0; i < size; i++) { 13122e5b6d6dSopenharmony_ci if(*(sortedLines+i) == &lines[i]) { 13132e5b6d6dSopenharmony_ci ordered++; 13142e5b6d6dSopenharmony_ci } 13152e5b6d6dSopenharmony_ci if(*(sortedLines+i) == &lines[size-i-1]) { 13162e5b6d6dSopenharmony_ci reversed++; 13172e5b6d6dSopenharmony_ci } 13182e5b6d6dSopenharmony_ci } 13192e5b6d6dSopenharmony_ci 13202e5b6d6dSopenharmony_ci delete[] sortedLines; 13212e5b6d6dSopenharmony_ci if(ordered == size) { 13222e5b6d6dSopenharmony_ci return 0; // in normal order 13232e5b6d6dSopenharmony_ci } else if(reversed == size) { 13242e5b6d6dSopenharmony_ci return 1; // in reversed order 13252e5b6d6dSopenharmony_ci } else { 13262e5b6d6dSopenharmony_ci return -1; // unknown order 13272e5b6d6dSopenharmony_ci } 13282e5b6d6dSopenharmony_ci} 13292e5b6d6dSopenharmony_ci 13302e5b6d6dSopenharmony_ci// We have to remove ignorable characters from the exemplar set, 13312e5b6d6dSopenharmony_ci// otherwise, we get messed up results 13322e5b6d6dSopenharmony_civoid removeIgnorableChars(UnicodeSet &exemplarUSet, CompareFn comparer, UErrorCode &status) { 13332e5b6d6dSopenharmony_ci UnicodeSet ignorables, primaryIgnorables; 13342e5b6d6dSopenharmony_ci UnicodeSetIterator exemplarUSetIter(exemplarUSet); 13352e5b6d6dSopenharmony_ci exemplarUSetIter.reset(); 13362e5b6d6dSopenharmony_ci Line empty; 13372e5b6d6dSopenharmony_ci Line *emptyP = ∅ 13382e5b6d6dSopenharmony_ci Line current; 13392e5b6d6dSopenharmony_ci Line *currLine = ¤t; 13402e5b6d6dSopenharmony_ci UColAttributeValue strength = UCOL_OFF; 13412e5b6d6dSopenharmony_ci 13422e5b6d6dSopenharmony_ci 13432e5b6d6dSopenharmony_ci while(exemplarUSetIter.next()) { 13442e5b6d6dSopenharmony_ci if(exemplarUSetIter.isString()) { // process a string 13452e5b6d6dSopenharmony_ci u_memcpy(currLine->name, exemplarUSetIter.getString().getBuffer(), exemplarUSetIter.getString().length()); 13462e5b6d6dSopenharmony_ci currLine->len = exemplarUSetIter.getString().length(); 13472e5b6d6dSopenharmony_ci strength = probeStrength(&emptyP, &currLine, comparer); 13482e5b6d6dSopenharmony_ci if(strength == UCOL_IDENTICAL) { 13492e5b6d6dSopenharmony_ci ignorables.add(exemplarUSetIter.getString()); 13502e5b6d6dSopenharmony_ci } else if(strength > UCOL_PRIMARY) { 13512e5b6d6dSopenharmony_ci primaryIgnorables.add(exemplarUSetIter.getString()); 13522e5b6d6dSopenharmony_ci } 13532e5b6d6dSopenharmony_ci } else { // process code point 13542e5b6d6dSopenharmony_ci UBool isError = false; 13552e5b6d6dSopenharmony_ci UChar32 codePoint = exemplarUSetIter.getCodepoint(); 13562e5b6d6dSopenharmony_ci currLine->len = 0; 13572e5b6d6dSopenharmony_ci U16_APPEND(currLine->name, currLine->len, 25, codePoint, isError); 13582e5b6d6dSopenharmony_ci strength = probeStrength(&emptyP, &currLine, comparer); 13592e5b6d6dSopenharmony_ci if(strength == UCOL_IDENTICAL) { 13602e5b6d6dSopenharmony_ci ignorables.add(codePoint); 13612e5b6d6dSopenharmony_ci } else if(strength > UCOL_PRIMARY) { 13622e5b6d6dSopenharmony_ci primaryIgnorables.add(codePoint); 13632e5b6d6dSopenharmony_ci } 13642e5b6d6dSopenharmony_ci } 13652e5b6d6dSopenharmony_ci } 13662e5b6d6dSopenharmony_ci 13672e5b6d6dSopenharmony_ci 13682e5b6d6dSopenharmony_ci 13692e5b6d6dSopenharmony_ci exemplarUSet.removeAll(ignorables); 13702e5b6d6dSopenharmony_ci exemplarUSet.removeAll(primaryIgnorables); 13712e5b6d6dSopenharmony_ci 13722e5b6d6dSopenharmony_ci UnicodeString removedPattern; 13732e5b6d6dSopenharmony_ci if(ignorables.size()) { 13742e5b6d6dSopenharmony_ci u_fprintf(log, "Ignorables:\n"); 13752e5b6d6dSopenharmony_ci ignorables.toPattern(removedPattern, true); 13762e5b6d6dSopenharmony_ci removedPattern.setCharAt(removedPattern.length(), 0); 13772e5b6d6dSopenharmony_ci escapeString(removedPattern.getBuffer(), removedPattern.length(), log); 13782e5b6d6dSopenharmony_ci u_fprintf(log, "\n"); 13792e5b6d6dSopenharmony_ci } 13802e5b6d6dSopenharmony_ci if(primaryIgnorables.size()) { 13812e5b6d6dSopenharmony_ci u_fprintf(log, "Primary ignorables:\n"); 13822e5b6d6dSopenharmony_ci primaryIgnorables.toPattern(removedPattern, true); 13832e5b6d6dSopenharmony_ci removedPattern.setCharAt(removedPattern.length(), 0); 13842e5b6d6dSopenharmony_ci escapeString(removedPattern.getBuffer(), removedPattern.length(), log); 13852e5b6d6dSopenharmony_ci u_fprintf(log, "\n"); 13862e5b6d6dSopenharmony_ci } 13872e5b6d6dSopenharmony_ci 13882e5b6d6dSopenharmony_ci} 13892e5b6d6dSopenharmony_ci 13902e5b6d6dSopenharmony_ci// TODO: develop logic for choosing boundary characters - right now it is hardcoded 13912e5b6d6dSopenharmony_ci// It should be a function of used scripts. Also, check whether we need to save 13922e5b6d6dSopenharmony_ci// used script names 13932e5b6d6dSopenharmony_civoid addUtilityChars(UnicodeSet &exemplarUSet, UErrorCode &status) { 13942e5b6d6dSopenharmony_ci 13952e5b6d6dSopenharmony_ci // in order to get nice rules, we need to add some characters to the 13962e5b6d6dSopenharmony_ci // starting set. These are mostly parts of compatibility composed characters, 13972e5b6d6dSopenharmony_ci // such as L-middle dot (middle dot is 0x00B7). If we don't add these, we would 13982e5b6d6dSopenharmony_ci // get a reset at a funky character, such as L-middle dot. This list will probably 13992e5b6d6dSopenharmony_ci // grow. 14002e5b6d6dSopenharmony_ci exemplarUSet.add(0x00B7); 14012e5b6d6dSopenharmony_ci 14022e5b6d6dSopenharmony_ci // these things represent a script before the target script and 14032e5b6d6dSopenharmony_ci // a script after. More logic should be added so that these characters are 14042e5b6d6dSopenharmony_ci // chosen automatically 14052e5b6d6dSopenharmony_ci 14062e5b6d6dSopenharmony_ci exemplarUSet.add(0x0038); 14072e5b6d6dSopenharmony_ci exemplarUSet.add(0x0039); 14082e5b6d6dSopenharmony_ci 14092e5b6d6dSopenharmony_ci //exemplarUSet.add(0x0433); 14102e5b6d6dSopenharmony_ci //exemplarUSet.add(0x0436); 14112e5b6d6dSopenharmony_ci exemplarUSet.add(0xfa29); 14122e5b6d6dSopenharmony_ci exemplarUSet.add(0xfa28); 14132e5b6d6dSopenharmony_ci} 14142e5b6d6dSopenharmony_ci 14152e5b6d6dSopenharmony_civoid 14162e5b6d6dSopenharmony_cigetExemplars(const char *locale, UnicodeSet &exemplars, UErrorCode &status) { 14172e5b6d6dSopenharmony_ci // first we fill out structures with exemplar characters. 14182e5b6d6dSopenharmony_ci UResourceBundle *res = ures_open(NULL, locale, &status); 14192e5b6d6dSopenharmony_ci int32_t exemplarLength = 0; 14202e5b6d6dSopenharmony_ci UnicodeString exemplarString = ures_getUnicodeStringByKey(res, "ExemplarCharacters", &status); 14212e5b6d6dSopenharmony_ci exemplars.clear(); 14222e5b6d6dSopenharmony_ci exemplars.applyPattern(exemplarString, status); 14232e5b6d6dSopenharmony_ci ures_close(res); 14242e5b6d6dSopenharmony_ci} 14252e5b6d6dSopenharmony_ci 14262e5b6d6dSopenharmony_civoid 14272e5b6d6dSopenharmony_ciprepareStartingSet(UnicodeSet &exemplarUSet, CompareFn comparer, UErrorCode &status) { 14282e5b6d6dSopenharmony_ci int32_t i = 0; 14292e5b6d6dSopenharmony_ci UnicodeString exemplarString; 14302e5b6d6dSopenharmony_ci exemplarUSet.toPattern(exemplarString); 14312e5b6d6dSopenharmony_ci // Produce case closure of exemplar characters 14322e5b6d6dSopenharmony_ci // Then we want to figure out what is the script of the exemplar characters 14332e5b6d6dSopenharmony_ci // just pick several and see their script 14342e5b6d6dSopenharmony_ci const char* usedScriptNames[USCRIPT_CODE_LIMIT]; 14352e5b6d6dSopenharmony_ci int32_t numberOfUsedScripts = 0; 14362e5b6d6dSopenharmony_ci char scriptSetPattern[256]; 14372e5b6d6dSopenharmony_ci UnicodeString pattern; // for debugging 14382e5b6d6dSopenharmony_ci UChar32 exChar = -1; 14392e5b6d6dSopenharmony_ci while(exemplarUSet.size() != 0 && (exChar = exemplarUSet.charAt(0)) != -1) { 14402e5b6d6dSopenharmony_ci int32_t scriptNo = u_getIntPropertyValue(exChar, UCHAR_SCRIPT); 14412e5b6d6dSopenharmony_ci usedScriptNames[numberOfUsedScripts] = u_getPropertyValueName(UCHAR_SCRIPT, scriptNo, U_SHORT_PROPERTY_NAME); 14422e5b6d6dSopenharmony_ci sprintf(scriptSetPattern, "[:%s:]", usedScriptNames[numberOfUsedScripts]); 14432e5b6d6dSopenharmony_ci numberOfUsedScripts++; 14442e5b6d6dSopenharmony_ci UnicodeSet scriptSet(UnicodeString(scriptSetPattern, ""), status); 14452e5b6d6dSopenharmony_ci exemplarUSet.removeAll(scriptSet); 14462e5b6d6dSopenharmony_ci exemplarUSet.toPattern(pattern, true); 14472e5b6d6dSopenharmony_ci } 14482e5b6d6dSopenharmony_ci exemplarUSet.clear(); 14492e5b6d6dSopenharmony_ci 14502e5b6d6dSopenharmony_ci // always add ASCII 14512e5b6d6dSopenharmony_ci //exemplarUSet.addAll(UnicodeSet(UnicodeString("[\\u0020-\\u007f]", ""), status)); 14522e5b6d6dSopenharmony_ci exemplarUSet.addAll(UnicodeSet(UnicodeString("[\\u0041-\\u005b]", ""), status)); 14532e5b6d6dSopenharmony_ci if(gExemplar) { 14542e5b6d6dSopenharmony_ci exemplarUSet.applyPattern(exemplarString, status); 14552e5b6d6dSopenharmony_ci exemplarUSet.closeOver(USET_CASE); 14562e5b6d6dSopenharmony_ci if(!gQuiet) { 14572e5b6d6dSopenharmony_ci u_fprintf(out, "ICU exemplar characters:\n"); 14582e5b6d6dSopenharmony_ci escapeString(exemplarString.getBuffer(), exemplarString.length(), out); 14592e5b6d6dSopenharmony_ci u_fprintf(out, "\n"); 14602e5b6d6dSopenharmony_ci } 14612e5b6d6dSopenharmony_ci } else { 14622e5b6d6dSopenharmony_ci if(!gQuiet) { 14632e5b6d6dSopenharmony_ci u_fprintf(out, "Using scripts:\n"); 14642e5b6d6dSopenharmony_ci } 14652e5b6d6dSopenharmony_ci // add interesting scripts 14662e5b6d6dSopenharmony_ci for(i = 0; i < numberOfUsedScripts; i++) { 14672e5b6d6dSopenharmony_ci sprintf(scriptSetPattern, "[:%s:]", usedScriptNames[i]); 14682e5b6d6dSopenharmony_ci exemplarUSet.addAll(UnicodeSet(UnicodeString(scriptSetPattern, ""), status)); 14692e5b6d6dSopenharmony_ci if(!gQuiet) { 14702e5b6d6dSopenharmony_ci u_fprintf(out, "%s\n", scriptSetPattern); 14712e5b6d6dSopenharmony_ci } 14722e5b6d6dSopenharmony_ci } 14732e5b6d6dSopenharmony_ci } 14742e5b6d6dSopenharmony_ci 14752e5b6d6dSopenharmony_ci 14762e5b6d6dSopenharmony_ci removeIgnorableChars(exemplarUSet, comparer, status); 14772e5b6d6dSopenharmony_ci 14782e5b6d6dSopenharmony_ci addUtilityChars(exemplarUSet, status); 14792e5b6d6dSopenharmony_ci 14802e5b6d6dSopenharmony_ci/* 14812e5b6d6dSopenharmony_ci // try to check whether tailored set and exemplar characters match. 14822e5b6d6dSopenharmony_ci USet *tailored = ucol_getTailoredSet(gCol, &status); 14832e5b6d6dSopenharmony_ci UBool tailoredContained = exemplarUSet.containsAll(*((UnicodeSet *)tailored)); 14842e5b6d6dSopenharmony_ci if(!tailoredContained) { 14852e5b6d6dSopenharmony_ci ((UnicodeSet *)tailored)->removeAll(exemplarUSet); 14862e5b6d6dSopenharmony_ci UnicodeString pattern; 14872e5b6d6dSopenharmony_ci ((UnicodeSet *)tailored)->toPattern(pattern, true); 14882e5b6d6dSopenharmony_ci } 14892e5b6d6dSopenharmony_ci uset_close(tailored); 14902e5b6d6dSopenharmony_ci*/ 14912e5b6d6dSopenharmony_ci 14922e5b6d6dSopenharmony_ci //return exemplarUSet; 14932e5b6d6dSopenharmony_ci} 14942e5b6d6dSopenharmony_ci 14952e5b6d6dSopenharmony_civoid 14962e5b6d6dSopenharmony_cisetOutputFile(const char *name, UErrorCode &status) { 14972e5b6d6dSopenharmony_ci int32_t i = 0; 14982e5b6d6dSopenharmony_ci char filename[256]; 14992e5b6d6dSopenharmony_ci strcpy(filename, name); 15002e5b6d6dSopenharmony_ci for(i = 0; i < gPlatformNo; i++) { 15012e5b6d6dSopenharmony_ci strcat(filename, "_"); 15022e5b6d6dSopenharmony_ci strcat(filename, platforms[gPlatformIndexes[i]].name); 15032e5b6d6dSopenharmony_ci } 15042e5b6d6dSopenharmony_ci if(gExemplar) { 15052e5b6d6dSopenharmony_ci strcat(filename, "_exemplar"); 15062e5b6d6dSopenharmony_ci } else { 15072e5b6d6dSopenharmony_ci strcat(filename, "_script"); 15082e5b6d6dSopenharmony_ci } 15092e5b6d6dSopenharmony_ci strcat(filename, ".utf16.txt"); 15102e5b6d6dSopenharmony_ci out = u_fopen(filename, "wb", "en", "utf-16"); 15112e5b6d6dSopenharmony_ci} 15122e5b6d6dSopenharmony_ci 15132e5b6d6dSopenharmony_civoid 15142e5b6d6dSopenharmony_ciprocessCollator(UCollator *col, UErrorCode &status) { 15152e5b6d6dSopenharmony_ci int32_t i = 0; 15162e5b6d6dSopenharmony_ci gCol = col; 15172e5b6d6dSopenharmony_ci UChar ruleString[16384]; 15182e5b6d6dSopenharmony_ci int32_t ruleStringLength = ucol_getRulesEx(gCol, UCOL_TAILORING_ONLY, ruleString, 16384); 15192e5b6d6dSopenharmony_ci if(!gQuiet) { 15202e5b6d6dSopenharmony_ci u_fprintf(out, "ICU rules:\n"); 15212e5b6d6dSopenharmony_ci printRules(ruleString, ruleStringLength, out); 15222e5b6d6dSopenharmony_ci printRules(ruleString, ruleStringLength, log); 15232e5b6d6dSopenharmony_ci //escapeString(ruleString, ruleStringLength, out); 15242e5b6d6dSopenharmony_ci u_fprintf(out, "\n"); 15252e5b6d6dSopenharmony_ci } 15262e5b6d6dSopenharmony_ci const char *locale = ucol_getLocale(gCol, ULOC_REQUESTED_LOCALE, &status); 15272e5b6d6dSopenharmony_ci UnicodeSet exemplarUSet; 15282e5b6d6dSopenharmony_ci if(locale) { 15292e5b6d6dSopenharmony_ci getExemplars(locale, exemplarUSet, status); 15302e5b6d6dSopenharmony_ci } else { 15312e5b6d6dSopenharmony_ci exemplarUSet = *((UnicodeSet *)ucol_getTailoredSet(gCol, &status)); 15322e5b6d6dSopenharmony_ci } 15332e5b6d6dSopenharmony_ci 15342e5b6d6dSopenharmony_ci 15352e5b6d6dSopenharmony_ci for(i = 0; i < gPlatformNo; i++) { 15362e5b6d6dSopenharmony_ci u_fprintf(out, "\nGenerating order for platform: %s\n", platforms[gPlatformIndexes[i]].name); 15372e5b6d6dSopenharmony_ci gComparer = platforms[gPlatformIndexes[i]].comparer; 15382e5b6d6dSopenharmony_ci 15392e5b6d6dSopenharmony_ci prepareStartingSet(exemplarUSet, gComparer, status); 15402e5b6d6dSopenharmony_ci int32_t itemLen = 0; 15412e5b6d6dSopenharmony_ci // get the number of all the items from the set (both codepoints and strings) 15422e5b6d6dSopenharmony_ci int32_t exemplarSetSize = exemplarUSet.size(); 15432e5b6d6dSopenharmony_ci UnicodeSetIterator exemplarUSetIter(exemplarUSet); 15442e5b6d6dSopenharmony_ci 15452e5b6d6dSopenharmony_ci // allocate ICU lines 15462e5b6d6dSopenharmony_ci gICULines = new Line*[exemplarSetSize*5]; 15472e5b6d6dSopenharmony_ci int32_t j = 0; 15482e5b6d6dSopenharmony_ci int32_t linesCount = 0; 15492e5b6d6dSopenharmony_ci Line *lines = new Line[exemplarSetSize]; 15502e5b6d6dSopenharmony_ci 15512e5b6d6dSopenharmony_ci int32_t reversedSecondary = checkSecondaryOrdering(); 15522e5b6d6dSopenharmony_ci if(reversedSecondary == 0) { 15532e5b6d6dSopenharmony_ci u_fprintf(out, "Secondaries do not seem to be reversed\n"); 15542e5b6d6dSopenharmony_ci } else if(reversedSecondary == 1) { 15552e5b6d6dSopenharmony_ci u_fprintf(out, "Secondaries are reversed\n"); 15562e5b6d6dSopenharmony_ci if(gComparer == ICUstrcmp) { 15572e5b6d6dSopenharmony_ci ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_OFF, &status); 15582e5b6d6dSopenharmony_ci } 15592e5b6d6dSopenharmony_ci } else { 15602e5b6d6dSopenharmony_ci u_fprintf(out, "Cannot conclude if secondaries are reversed\n"); 15612e5b6d6dSopenharmony_ci } 15622e5b6d6dSopenharmony_ci 15632e5b6d6dSopenharmony_ci int32_t reversedCase = checkCaseOrdering(); 15642e5b6d6dSopenharmony_ci if(reversedCase == 0) { 15652e5b6d6dSopenharmony_ci u_fprintf(out, "Case does not seem to be reversed\n"); 15662e5b6d6dSopenharmony_ci } else if(reversedCase == 1) { 15672e5b6d6dSopenharmony_ci u_fprintf(out, "Case is reversed\n"); 15682e5b6d6dSopenharmony_ci if(gComparer == ICUstrcmp) { 15692e5b6d6dSopenharmony_ci ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_OFF, &status); 15702e5b6d6dSopenharmony_ci } 15712e5b6d6dSopenharmony_ci } else { 15722e5b6d6dSopenharmony_ci u_fprintf(out, "Cannot conclude if case is reversed\n"); 15732e5b6d6dSopenharmony_ci } 15742e5b6d6dSopenharmony_ci 15752e5b6d6dSopenharmony_ci exemplarUSetIter.reset(); 15762e5b6d6dSopenharmony_ci gElements.removeAll(); 15772e5b6d6dSopenharmony_ci gExpansions.removeAll(); 15782e5b6d6dSopenharmony_ci linesCount = 0; 15792e5b6d6dSopenharmony_ci 15802e5b6d6dSopenharmony_ci while(exemplarUSetIter.next()) { 15812e5b6d6dSopenharmony_ci Line *currLine = lines+linesCount; 15822e5b6d6dSopenharmony_ci if(exemplarUSetIter.isString()) { // process a string 15832e5b6d6dSopenharmony_ci u_memcpy(currLine->name, exemplarUSetIter.getString().getBuffer(), exemplarUSetIter.getString().length()); 15842e5b6d6dSopenharmony_ci currLine->len = exemplarUSetIter.getString().length(); 15852e5b6d6dSopenharmony_ci } else { // process code point 15862e5b6d6dSopenharmony_ci UBool isError = false; 15872e5b6d6dSopenharmony_ci currLine->len = 0; 15882e5b6d6dSopenharmony_ci U16_APPEND(currLine->name, currLine->len, 25, exemplarUSetIter.getCodepoint(), isError); 15892e5b6d6dSopenharmony_ci } 15902e5b6d6dSopenharmony_ci currLine->name[currLine->len] = 0; // zero terminate, for our evil ways 15912e5b6d6dSopenharmony_ci currLine->index = linesCount; 15922e5b6d6dSopenharmony_ci linesCount++; 15932e5b6d6dSopenharmony_ci noteElement(currLine); 15942e5b6d6dSopenharmony_ci } 15952e5b6d6dSopenharmony_ci constructAndAnalyze(gICULines, lines, exemplarSetSize, gComparer); 15962e5b6d6dSopenharmony_ci 15972e5b6d6dSopenharmony_ci delete[] lines; 15982e5b6d6dSopenharmony_ci } 15992e5b6d6dSopenharmony_ci 16002e5b6d6dSopenharmony_ci 16012e5b6d6dSopenharmony_ci // cleanup globals 16022e5b6d6dSopenharmony_ci delete[] gICULines; 16032e5b6d6dSopenharmony_ci u_fflush(out); 16042e5b6d6dSopenharmony_ci u_fclose(out); 16052e5b6d6dSopenharmony_ci ucol_close(gCol); 16062e5b6d6dSopenharmony_ci} 16072e5b6d6dSopenharmony_ci 16082e5b6d6dSopenharmony_civoid 16092e5b6d6dSopenharmony_ciprocessLocale(const char *locale, UErrorCode &status) { 16102e5b6d6dSopenharmony_ci gWinLCID = uloc_getLCID(locale); 16112e5b6d6dSopenharmony_ci 16122e5b6d6dSopenharmony_ci UCollator *col = ucol_open(locale, &status); 16132e5b6d6dSopenharmony_ci 16142e5b6d6dSopenharmony_ci setOutputFile(locale, status); 16152e5b6d6dSopenharmony_ci 16162e5b6d6dSopenharmony_ci u_fprintf(out, "Locale %s (LCID:%06X)\n", locale, gWinLCID); 16172e5b6d6dSopenharmony_ci 16182e5b6d6dSopenharmony_ci processCollator(col, status); 16192e5b6d6dSopenharmony_ci} 16202e5b6d6dSopenharmony_ci 16212e5b6d6dSopenharmony_ciUBool 16222e5b6d6dSopenharmony_cihasCollationElements(const char *locName) { 16232e5b6d6dSopenharmony_ci 16242e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 16252e5b6d6dSopenharmony_ci UResourceBundle *ColEl = NULL; 16262e5b6d6dSopenharmony_ci 16272e5b6d6dSopenharmony_ci UResourceBundle *loc = ures_open(NULL, locName, &status);; 16282e5b6d6dSopenharmony_ci 16292e5b6d6dSopenharmony_ci if(U_SUCCESS(status)) { 16302e5b6d6dSopenharmony_ci status = U_ZERO_ERROR; 16312e5b6d6dSopenharmony_ci ColEl = ures_getByKey(loc, "CollationElements", ColEl, &status); 16322e5b6d6dSopenharmony_ci if(status == U_ZERO_ERROR) { /* do the test - there are real elements */ 16332e5b6d6dSopenharmony_ci ures_close(ColEl); 16342e5b6d6dSopenharmony_ci ures_close(loc); 16352e5b6d6dSopenharmony_ci return true; 16362e5b6d6dSopenharmony_ci } 16372e5b6d6dSopenharmony_ci ures_close(ColEl); 16382e5b6d6dSopenharmony_ci ures_close(loc); 16392e5b6d6dSopenharmony_ci } 16402e5b6d6dSopenharmony_ci return false; 16412e5b6d6dSopenharmony_ci} 16422e5b6d6dSopenharmony_ci 16432e5b6d6dSopenharmony_ciint 16442e5b6d6dSopenharmony_cimain(int argc, 16452e5b6d6dSopenharmony_ci char* argv[]) 16462e5b6d6dSopenharmony_ci{ 16472e5b6d6dSopenharmony_ci UErrorCode status = U_ZERO_ERROR; 16482e5b6d6dSopenharmony_ci err = u_finit(stderr, "en", "latin-1"); 16492e5b6d6dSopenharmony_ci log = u_finit(stdout, "en", "latin-1"); 16502e5b6d6dSopenharmony_ci 16512e5b6d6dSopenharmony_ci/* 16522e5b6d6dSopenharmony_ci USet *wsp = uprv_openRuleWhiteSpaceSet(&status); 16532e5b6d6dSopenharmony_ci uset_add(wsp, 0x0041); 16542e5b6d6dSopenharmony_ci uset_remove(wsp, 0x0041); 16552e5b6d6dSopenharmony_ci UnicodeString pat; 16562e5b6d6dSopenharmony_ci ((UnicodeSet *)wsp)->toPattern(pat, true); 16572e5b6d6dSopenharmony_ci pat.setCharAt(pat.length(), 0); 16582e5b6d6dSopenharmony_ci escapeString(pat.getBuffer(), pat.length(), log); 16592e5b6d6dSopenharmony_ci u_fflush(log); 16602e5b6d6dSopenharmony_ci*/ 16612e5b6d6dSopenharmony_ci 16622e5b6d6dSopenharmony_ci UTransliterator *anyHex = utrans_open("[^\\u000a\\u0020-\\u007f] Any-Hex/Java", UTRANS_FORWARD, NULL, 0, NULL, &status); 16632e5b6d6dSopenharmony_ci u_fsettransliterator(log, U_WRITE, anyHex, &status); 16642e5b6d6dSopenharmony_ci 16652e5b6d6dSopenharmony_ci processArgs(argc, argv, status); 16662e5b6d6dSopenharmony_ci int32_t i = 0; 16672e5b6d6dSopenharmony_ci 16682e5b6d6dSopenharmony_ci 16692e5b6d6dSopenharmony_ci gElements.setValueDeleter(deleteLineElement); 16702e5b6d6dSopenharmony_ci 16712e5b6d6dSopenharmony_ci 16722e5b6d6dSopenharmony_ci if(U_FAILURE(status) || gPlatformNo == 0) { 16732e5b6d6dSopenharmony_ci return -1; 16742e5b6d6dSopenharmony_ci } 16752e5b6d6dSopenharmony_ci 16762e5b6d6dSopenharmony_ci gUCA = ucol_open("root", &status); 16772e5b6d6dSopenharmony_ci 16782e5b6d6dSopenharmony_ci if(gRulesStdin) { 16792e5b6d6dSopenharmony_ci char buffer[1024]; 16802e5b6d6dSopenharmony_ci UChar ruleBuffer[16384]; 16812e5b6d6dSopenharmony_ci UChar *rules = ruleBuffer; 16822e5b6d6dSopenharmony_ci int32_t maxRuleLen = 16384; 16832e5b6d6dSopenharmony_ci int32_t rLen = 0; 16842e5b6d6dSopenharmony_ci while(gets(buffer)) { 16852e5b6d6dSopenharmony_ci if(buffer[0] != '/' && buffer[1] != '/') { 16862e5b6d6dSopenharmony_ci rLen = u_unescape(buffer, rules, maxRuleLen); 16872e5b6d6dSopenharmony_ci rules += rLen; 16882e5b6d6dSopenharmony_ci maxRuleLen -= rLen; 16892e5b6d6dSopenharmony_ci } 16902e5b6d6dSopenharmony_ci } 16912e5b6d6dSopenharmony_ci UParseError parseError; 16922e5b6d6dSopenharmony_ci //escapeString(ruleBuffer, rules-ruleBuffer, log);// 16932e5b6d6dSopenharmony_ci u_fprintf(log, "%U\n", ruleBuffer); 16942e5b6d6dSopenharmony_ci 16952e5b6d6dSopenharmony_ci UCollator *col = ucol_openRules(ruleBuffer, rules-ruleBuffer, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status); 16962e5b6d6dSopenharmony_ci if(U_SUCCESS(status)) { 16972e5b6d6dSopenharmony_ci setOutputFile("stdinRules", status); 16982e5b6d6dSopenharmony_ci processCollator(col, status); 16992e5b6d6dSopenharmony_ci } else { 17002e5b6d6dSopenharmony_ci u_fprintf(err, "Error %s\n", u_errorName(status)); 17012e5b6d6dSopenharmony_ci } 17022e5b6d6dSopenharmony_ci } else { 17032e5b6d6dSopenharmony_ci 17042e5b6d6dSopenharmony_ci if(gLocale) { 17052e5b6d6dSopenharmony_ci processLocale(gLocale, status); 17062e5b6d6dSopenharmony_ci } else if(gLocaleNo) { 17072e5b6d6dSopenharmony_ci for(i = 0; i < gLocaleNo; i++) { 17082e5b6d6dSopenharmony_ci processLocale(gLocales[i], status); 17092e5b6d6dSopenharmony_ci } 17102e5b6d6dSopenharmony_ci } else { // do the loop through all the locales 17112e5b6d6dSopenharmony_ci int32_t noOfLoc = uloc_countAvailable(); 17122e5b6d6dSopenharmony_ci const char *locName = NULL; 17132e5b6d6dSopenharmony_ci for(i = 0; i<noOfLoc; i++) { 17142e5b6d6dSopenharmony_ci status = U_ZERO_ERROR; 17152e5b6d6dSopenharmony_ci locName = uloc_getAvailable(i); 17162e5b6d6dSopenharmony_ci if(hasCollationElements(locName)) { 17172e5b6d6dSopenharmony_ci processLocale(locName, status); 17182e5b6d6dSopenharmony_ci } 17192e5b6d6dSopenharmony_ci } 17202e5b6d6dSopenharmony_ci } 17212e5b6d6dSopenharmony_ci } 17222e5b6d6dSopenharmony_ci 17232e5b6d6dSopenharmony_ci 17242e5b6d6dSopenharmony_ci ucol_close(gUCA); 17252e5b6d6dSopenharmony_ci 17262e5b6d6dSopenharmony_ci u_fflush(log); 17272e5b6d6dSopenharmony_ci u_fclose(log); 17282e5b6d6dSopenharmony_ci u_fflush(err); 17292e5b6d6dSopenharmony_ci u_fclose(err); 17302e5b6d6dSopenharmony_ci 17312e5b6d6dSopenharmony_ci return 0; 17322e5b6d6dSopenharmony_ci}