12e5b6d6dSopenharmony_ci// © 2017 and later: Unicode, Inc. and others.
22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
32e5b6d6dSopenharmony_ci/*
42e5b6d6dSopenharmony_ci*******************************************************************************
52e5b6d6dSopenharmony_ci*
62e5b6d6dSopenharmony_ci*   Copyright (C) 2003, International Business Machines
72e5b6d6dSopenharmony_ci*   Corporation and others.  All Rights Reserved.
82e5b6d6dSopenharmony_ci*
92e5b6d6dSopenharmony_ci*******************************************************************************
102e5b6d6dSopenharmony_ci*
112e5b6d6dSopenharmony_ci* File colprobe.cpp
122e5b6d6dSopenharmony_ci*
132e5b6d6dSopenharmony_ci* Modification History:
142e5b6d6dSopenharmony_ci*
152e5b6d6dSopenharmony_ci*   Date        Name        Description
162e5b6d6dSopenharmony_ci*   03/18/2003  weiv        Creation.
172e5b6d6dSopenharmony_ci*******************************************************************************
182e5b6d6dSopenharmony_ci*/
192e5b6d6dSopenharmony_ci
202e5b6d6dSopenharmony_ci#include "uoptions.h"
212e5b6d6dSopenharmony_ci#include "unicode/ucol.h"
222e5b6d6dSopenharmony_ci#include "unicode/ucoleitr.h"
232e5b6d6dSopenharmony_ci#include "unicode/ures.h"
242e5b6d6dSopenharmony_ci#include "unicode/uniset.h"
252e5b6d6dSopenharmony_ci#include "unicode/usetiter.h"
262e5b6d6dSopenharmony_ci#include "unicode/ustring.h"
272e5b6d6dSopenharmony_ci#include "unicode/uchar.h"
282e5b6d6dSopenharmony_ci#include "unicode/uscript.h"
292e5b6d6dSopenharmony_ci#include "unicode/locid.h"
302e5b6d6dSopenharmony_ci#include "unicode/ucnv.h"
312e5b6d6dSopenharmony_ci#include "uprops.h"
322e5b6d6dSopenharmony_ci#include "hash.h"
332e5b6d6dSopenharmony_ci#include "ucol_imp.h"
342e5b6d6dSopenharmony_ci
352e5b6d6dSopenharmony_ci#include "unicode/ustdio.h"
362e5b6d6dSopenharmony_ci#include "unicode/utrans.h"
372e5b6d6dSopenharmony_ci
382e5b6d6dSopenharmony_ci#include <stdio.h>
392e5b6d6dSopenharmony_ci#include <stdlib.h>
402e5b6d6dSopenharmony_ci#include <string.h>
412e5b6d6dSopenharmony_ci#include <fcntl.h>
422e5b6d6dSopenharmony_ci
432e5b6d6dSopenharmony_ci// unix tolower
442e5b6d6dSopenharmony_ci#include <ctype.h>
452e5b6d6dSopenharmony_ci// unix setlocale
462e5b6d6dSopenharmony_ci#include <locale.h>
472e5b6d6dSopenharmony_ci
482e5b6d6dSopenharmony_ci#include "colprobe.h"
492e5b6d6dSopenharmony_ci
502e5b6d6dSopenharmony_ci#include "line.h"
512e5b6d6dSopenharmony_ci#include "sortedlines.h"
522e5b6d6dSopenharmony_ci#include "strengthprobe.h"
532e5b6d6dSopenharmony_ci
542e5b6d6dSopenharmony_civoid testWin(StrengthProbe &probe, UErrorCode &status) ;
552e5b6d6dSopenharmony_ci
562e5b6d6dSopenharmony_ci#if defined WIN32
572e5b6d6dSopenharmony_ci#include <io.h>
582e5b6d6dSopenharmony_ci#include <windows.h>
592e5b6d6dSopenharmony_ci#include <sys/types.h>
602e5b6d6dSopenharmony_ci#include <sys/stat.h>
612e5b6d6dSopenharmony_ci#include <direct.h>
622e5b6d6dSopenharmony_ci
632e5b6d6dSopenharmony_ciint createDir(const char* dirName) {
642e5b6d6dSopenharmony_ci  struct _stat myStat;
652e5b6d6dSopenharmony_ci  int result = _stat(dirName, &myStat);
662e5b6d6dSopenharmony_ci
672e5b6d6dSopenharmony_ci  if(result == -1) {
682e5b6d6dSopenharmony_ci    result = _mkdir(dirName);
692e5b6d6dSopenharmony_ci    return result;
702e5b6d6dSopenharmony_ci  } else if(myStat.st_mode & _S_IFDIR) {
712e5b6d6dSopenharmony_ci    return 0;
722e5b6d6dSopenharmony_ci  } else {
732e5b6d6dSopenharmony_ci    return 1;
742e5b6d6dSopenharmony_ci  }
752e5b6d6dSopenharmony_ci}
762e5b6d6dSopenharmony_ci
772e5b6d6dSopenharmony_ci//#elif defined POSIX
782e5b6d6dSopenharmony_ci#else
792e5b6d6dSopenharmony_ci#include <sys/stat.h>
802e5b6d6dSopenharmony_ci#include <unistd.h>
812e5b6d6dSopenharmony_ci
822e5b6d6dSopenharmony_ciint createDir(const char* dirName) {
832e5b6d6dSopenharmony_ci  struct stat myStat;
842e5b6d6dSopenharmony_ci  int result = stat(dirName, &myStat);
852e5b6d6dSopenharmony_ci
862e5b6d6dSopenharmony_ci  if(result == -1) {
872e5b6d6dSopenharmony_ci    result = mkdir(dirName, S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IWOTH|S_IXOTH);
882e5b6d6dSopenharmony_ci    return result;
892e5b6d6dSopenharmony_ci  } else if(S_ISDIR(myStat.st_mode)) {
902e5b6d6dSopenharmony_ci    return 0;
912e5b6d6dSopenharmony_ci  } else {
922e5b6d6dSopenharmony_ci    return 1;
932e5b6d6dSopenharmony_ci  }
942e5b6d6dSopenharmony_ci}
952e5b6d6dSopenharmony_ci//
962e5b6d6dSopenharmony_ci//  Stubs for Windows API functions when building on UNIXes.
972e5b6d6dSopenharmony_ci//
982e5b6d6dSopenharmony_citypedef int DWORD;
992e5b6d6dSopenharmony_ciinline int CompareStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;};
1002e5b6d6dSopenharmony_ci//#else
1012e5b6d6dSopenharmony_ci//#error "Not POSIX or Windows. Won't work."
1022e5b6d6dSopenharmony_ci#endif
1032e5b6d6dSopenharmony_ci
1042e5b6d6dSopenharmony_ci#include "line.h"
1052e5b6d6dSopenharmony_ci
1062e5b6d6dSopenharmony_cistatic UBool gVerbose = false;
1072e5b6d6dSopenharmony_cistatic UBool gDebug = false;
1082e5b6d6dSopenharmony_cistatic UBool gQuiet = false;
1092e5b6d6dSopenharmony_cistatic UBool gExemplar = false;
1102e5b6d6dSopenharmony_ci
1112e5b6d6dSopenharmony_ciDWORD          gWinLCID;
1122e5b6d6dSopenharmony_ciint            gCount;
1132e5b6d6dSopenharmony_ciUCollator     *gCol;
1142e5b6d6dSopenharmony_ciUCollator     *gUCA;
1152e5b6d6dSopenharmony_ciUConverter    *utf8cnv;
1162e5b6d6dSopenharmony_ciCompareFn gComparer;
1172e5b6d6dSopenharmony_ciint       gRefNum;
1182e5b6d6dSopenharmony_ciUnicodeSet gExcludeSet;
1192e5b6d6dSopenharmony_ciUnicodeSet gRepertoire;
1202e5b6d6dSopenharmony_ci
1212e5b6d6dSopenharmony_ciconst UChar separatorChar = 0x0030;
1222e5b6d6dSopenharmony_ci
1232e5b6d6dSopenharmony_ciUPrinter *logger;
1242e5b6d6dSopenharmony_ciUPrinter *debug;
1252e5b6d6dSopenharmony_ciUPrinter *tailoringBundle;
1262e5b6d6dSopenharmony_ciUPrinter *referenceBundle;
1272e5b6d6dSopenharmony_ciUPrinter *bundle;
1282e5b6d6dSopenharmony_ciFILE     *fTailoringDump;
1292e5b6d6dSopenharmony_ciFILE     *fDefaultDump;
1302e5b6d6dSopenharmony_ci
1312e5b6d6dSopenharmony_ciconst char *progName = "colprobe";
1322e5b6d6dSopenharmony_ci
1332e5b6d6dSopenharmony_ciconst char *gLocale = NULL;
1342e5b6d6dSopenharmony_ciint32_t platformIndex = -1;
1352e5b6d6dSopenharmony_ciint32_t gPlatformNo = 0;
1362e5b6d6dSopenharmony_ciint32_t gPlatformIndexes[10];
1372e5b6d6dSopenharmony_ciint32_t gLocaleNo = 0;
1382e5b6d6dSopenharmony_ciconst char* gLocales[100];
1392e5b6d6dSopenharmony_ciUBool gRulesStdin = false;
1402e5b6d6dSopenharmony_ciconst char *outputFormat = "HTML";
1412e5b6d6dSopenharmony_ciconst char *outExtension = "html";
1422e5b6d6dSopenharmony_ci
1432e5b6d6dSopenharmony_cienum {
1442e5b6d6dSopenharmony_ci  HELP1,
1452e5b6d6dSopenharmony_ci    HELP2,
1462e5b6d6dSopenharmony_ci    VERBOSE,
1472e5b6d6dSopenharmony_ci    QUIET,
1482e5b6d6dSopenharmony_ci    VERSION,
1492e5b6d6dSopenharmony_ci    ICUDATADIR,
1502e5b6d6dSopenharmony_ci    COPYRIGHT,
1512e5b6d6dSopenharmony_ci    LOCALE,
1522e5b6d6dSopenharmony_ci    PLATFORM,
1532e5b6d6dSopenharmony_ci    DEBUG,
1542e5b6d6dSopenharmony_ci    EXEMPLAR,
1552e5b6d6dSopenharmony_ci    RULESSTDIN,
1562e5b6d6dSopenharmony_ci    REFERENCE,
1572e5b6d6dSopenharmony_ci    EXCLUDESET,
1582e5b6d6dSopenharmony_ci    REPERTOIRE,
1592e5b6d6dSopenharmony_ci  INTERACTIVE,
1602e5b6d6dSopenharmony_ci  PRINTREF,
1612e5b6d6dSopenharmony_ci  DIFF,
1622e5b6d6dSopenharmony_ci  OUTPUT
1632e5b6d6dSopenharmony_ci};
1642e5b6d6dSopenharmony_ci
1652e5b6d6dSopenharmony_ciUOption options[]={
1662e5b6d6dSopenharmony_ci  /*0*/ UOPTION_HELP_H,
1672e5b6d6dSopenharmony_ci  /*1*/ UOPTION_HELP_QUESTION_MARK,
1682e5b6d6dSopenharmony_ci  /*2*/ UOPTION_VERBOSE,
1692e5b6d6dSopenharmony_ci  /*3*/ UOPTION_QUIET,
1702e5b6d6dSopenharmony_ci  /*4*/ UOPTION_VERSION,
1712e5b6d6dSopenharmony_ci  /*5*/ UOPTION_ICUDATADIR,
1722e5b6d6dSopenharmony_ci  /*6*/ UOPTION_COPYRIGHT,
1732e5b6d6dSopenharmony_ci  /*7*/ UOPTION_DEF("locale", 'l', UOPT_REQUIRES_ARG),
1742e5b6d6dSopenharmony_ci  /*8*/ UOPTION_DEF("platform", 'p', UOPT_REQUIRES_ARG),
1752e5b6d6dSopenharmony_ci  /*9*/ UOPTION_DEF("debug", 'D', UOPT_NO_ARG),
1762e5b6d6dSopenharmony_ci  /*10*/ UOPTION_DEF("exemplar", 'E', UOPT_NO_ARG),
1772e5b6d6dSopenharmony_ci  /*11*/ UOPTION_DEF("rulesstdin", 'R', UOPT_NO_ARG),
1782e5b6d6dSopenharmony_ci  /*12*/ UOPTION_DEF("ref", 'c', UOPT_REQUIRES_ARG),
1792e5b6d6dSopenharmony_ci  /*13*/ UOPTION_DEF("excludeset", 'x', UOPT_REQUIRES_ARG),
1802e5b6d6dSopenharmony_ci  /*14*/ UOPTION_DEF("repertoire", 't', UOPT_REQUIRES_ARG),
1812e5b6d6dSopenharmony_ci  /*15*/ UOPTION_DEF("interactive", 'I', UOPT_NO_ARG),
1822e5b6d6dSopenharmony_ci  /*16*/ UOPTION_DEF("printref", 0, UOPT_NO_ARG),
1832e5b6d6dSopenharmony_ci  /*17*/ UOPTION_DEF("diff", 0, UOPT_NO_ARG),
1842e5b6d6dSopenharmony_ci  /*18*/ UOPTION_DEF("output", 0, UOPT_REQUIRES_ARG)
1852e5b6d6dSopenharmony_ci};
1862e5b6d6dSopenharmony_ci
1872e5b6d6dSopenharmony_ciUChar compA[256];
1882e5b6d6dSopenharmony_ciUChar compB[256];
1892e5b6d6dSopenharmony_ciint32_t compALen = 0;
1902e5b6d6dSopenharmony_ciint32_t compBLen = 0;
1912e5b6d6dSopenharmony_ci
1922e5b6d6dSopenharmony_cichar compUTF8A[256];
1932e5b6d6dSopenharmony_cichar compUTF8B[256];
1942e5b6d6dSopenharmony_ciint32_t compUTF8ALen = 0;
1952e5b6d6dSopenharmony_ciint32_t compUTF8BLen = 0;
1962e5b6d6dSopenharmony_ci
1972e5b6d6dSopenharmony_ciint UNIXstrcmp(const void *a, const void *b) {
1982e5b6d6dSopenharmony_ci  UErrorCode status = U_ZERO_ERROR;
1992e5b6d6dSopenharmony_ci    gCount++;
2002e5b6d6dSopenharmony_ci    int t;
2012e5b6d6dSopenharmony_ci    compALen = unorm_normalize((*(Line **)a)->name, (*(Line **)a)->len, UNORM_NFC, 0, compA, 256, &status);
2022e5b6d6dSopenharmony_ci    compBLen = unorm_normalize((*(Line **)b)->name, (*(Line **)b)->len, UNORM_NFC, 0, compB, 256, &status);
2032e5b6d6dSopenharmony_ci    compUTF8ALen = ucnv_fromUChars(utf8cnv, compUTF8A, 256, compA, compALen, &status);
2042e5b6d6dSopenharmony_ci    compUTF8A[compUTF8ALen] = 0;
2052e5b6d6dSopenharmony_ci    compUTF8BLen = ucnv_fromUChars(utf8cnv, compUTF8B, 256, compB, compBLen, &status);
2062e5b6d6dSopenharmony_ci    compUTF8B[compUTF8BLen] = 0;
2072e5b6d6dSopenharmony_ci    t = strcoll(compUTF8A, compUTF8B);
2082e5b6d6dSopenharmony_ci    return t;
2092e5b6d6dSopenharmony_ci}
2102e5b6d6dSopenharmony_ci
2112e5b6d6dSopenharmony_ciint UNIXgetSortKey(const UChar *string, int32_t len, uint8_t *buffer, int32_t buffCapacity) {
2122e5b6d6dSopenharmony_ci  UErrorCode status = U_ZERO_ERROR;
2132e5b6d6dSopenharmony_ci  compALen = unorm_normalize(string, len, UNORM_NFC, 0, compA, 256, &status);
2142e5b6d6dSopenharmony_ci  compUTF8ALen = ucnv_fromUChars(utf8cnv, compUTF8A, 256, compA, compALen, &status);
2152e5b6d6dSopenharmony_ci  compUTF8A[compUTF8ALen] = 0;
2162e5b6d6dSopenharmony_ci  return (strxfrm((char *)buffer, compUTF8A, buffCapacity)+1);
2172e5b6d6dSopenharmony_ci}
2182e5b6d6dSopenharmony_ci
2192e5b6d6dSopenharmony_ci#ifdef WIN32
2202e5b6d6dSopenharmony_ciint Winstrcmp(const void *a, const void *b) {
2212e5b6d6dSopenharmony_ci  UErrorCode status = U_ZERO_ERROR;
2222e5b6d6dSopenharmony_ci    gCount++;
2232e5b6d6dSopenharmony_ci    int t;
2242e5b6d6dSopenharmony_ci    //compALen = unorm_compose(compA, 256, (*(Line **)a)->name, (*(Line **)a)->len, false, 0, &status);
2252e5b6d6dSopenharmony_ci    //compBLen = unorm_compose(compB, 256, (*(Line **)b)->name, (*(Line **)b)->len, false, 0, &status);
2262e5b6d6dSopenharmony_ci    compALen = unorm_normalize((*(Line **)a)->name, (*(Line **)a)->len, UNORM_NFC, 0, compA, 256, &status);
2272e5b6d6dSopenharmony_ci    compBLen = unorm_normalize((*(Line **)b)->name, (*(Line **)b)->len, UNORM_NFC, 0, compB, 256, &status);
2282e5b6d6dSopenharmony_ci    t = CompareStringW(gWinLCID,  SORT_STRINGSORT, //0,
2292e5b6d6dSopenharmony_ci      compA, compALen,
2302e5b6d6dSopenharmony_ci      compB, compBLen);
2312e5b6d6dSopenharmony_ci
2322e5b6d6dSopenharmony_ci/*
2332e5b6d6dSopenharmony_ci    t = CompareStringW(gWinLCID, 0,
2342e5b6d6dSopenharmony_ci      (*(Line **)a)->name, (*(Line **)a)->len,
2352e5b6d6dSopenharmony_ci      (*(Line **)b)->name, (*(Line **)b)->len);
2362e5b6d6dSopenharmony_ci*/
2372e5b6d6dSopenharmony_ci    return t-2;
2382e5b6d6dSopenharmony_ci}
2392e5b6d6dSopenharmony_ci
2402e5b6d6dSopenharmony_ciint WingetSortKey(const UChar *string, int32_t len, uint8_t *buffer, int32_t buffCapacity) {
2412e5b6d6dSopenharmony_ci  UErrorCode status = U_ZERO_ERROR;
2422e5b6d6dSopenharmony_ci  compALen = unorm_normalize(string, len, UNORM_NFC, 0, compA, 256, &status);
2432e5b6d6dSopenharmony_ci  return LCMapStringW(gWinLCID, LCMAP_SORTKEY | SORT_STRINGSORT, compA, compALen, (unsigned short *)buffer, buffCapacity);
2442e5b6d6dSopenharmony_ci}
2452e5b6d6dSopenharmony_ci
2462e5b6d6dSopenharmony_ci#if 0
2472e5b6d6dSopenharmony_ciint Winstrcmp(const void *a, const void *b) {
2482e5b6d6dSopenharmony_ci  UErrorCode status = U_ZERO_ERROR;
2492e5b6d6dSopenharmony_ci  uint8_t b1[256], b2[256];
2502e5b6d6dSopenharmony_ci  int32_t b1Len, b2Len;
2512e5b6d6dSopenharmony_ci  b1Len = WingetSortKey((*(Line **)a)->name, (*(Line **)a)->len, b1, 256);
2522e5b6d6dSopenharmony_ci  b2Len = WingetSortKey((*(Line **)b)->name, (*(Line **)b)->len, b2, 256);
2532e5b6d6dSopenharmony_ci
2542e5b6d6dSopenharmony_ci  b1[b1Len] = 0;
2552e5b6d6dSopenharmony_ci  b2[b2Len] = 0;
2562e5b6d6dSopenharmony_ci
2572e5b6d6dSopenharmony_ci  return strcmp((const char *)b1, (const char *)b2);
2582e5b6d6dSopenharmony_ci}
2592e5b6d6dSopenharmony_ci#endif
2602e5b6d6dSopenharmony_ci
2612e5b6d6dSopenharmony_ci#else
2622e5b6d6dSopenharmony_ciint Winstrcmp(const void *a, const void *b) {
2632e5b6d6dSopenharmony_ci  if(a == b);
2642e5b6d6dSopenharmony_ci  return 0;
2652e5b6d6dSopenharmony_ci}
2662e5b6d6dSopenharmony_ciint WingetSortKey(const UChar *, int32_t , uint8_t *, int32_t ) {
2672e5b6d6dSopenharmony_ci  return 0;
2682e5b6d6dSopenharmony_ci}
2692e5b6d6dSopenharmony_ci#endif
2702e5b6d6dSopenharmony_ci
2712e5b6d6dSopenharmony_ciint ICUstrcmp(const void *a, const void *b) {
2722e5b6d6dSopenharmony_ci    gCount++;
2732e5b6d6dSopenharmony_ci    UCollationResult t;
2742e5b6d6dSopenharmony_ci    t = ucol_strcoll(gCol,
2752e5b6d6dSopenharmony_ci      (*(Line **)a)->name, (*(Line **)a)->len,
2762e5b6d6dSopenharmony_ci      (*(Line **)b)->name, (*(Line **)b)->len);
2772e5b6d6dSopenharmony_ci    if (t == UCOL_LESS) return -1;
2782e5b6d6dSopenharmony_ci    if (t == UCOL_GREATER) return +1;
2792e5b6d6dSopenharmony_ci    return 0;
2802e5b6d6dSopenharmony_ci}
2812e5b6d6dSopenharmony_ci
2822e5b6d6dSopenharmony_ciint ICUgetSortKey(const UChar *string, int32_t len, uint8_t *buffer, int32_t buffCapacity) {
2832e5b6d6dSopenharmony_ci  return ucol_getSortKey(gCol, string, len, buffer, buffCapacity);
2842e5b6d6dSopenharmony_ci}
2852e5b6d6dSopenharmony_ci
2862e5b6d6dSopenharmony_cistruct {
2872e5b6d6dSopenharmony_ci  const char* name;
2882e5b6d6dSopenharmony_ci  CompareFn comparer;
2892e5b6d6dSopenharmony_ci  GetSortKeyFn skgetter;
2902e5b6d6dSopenharmony_ci} platforms[] = {
2912e5b6d6dSopenharmony_ci  { "icu", ICUstrcmp, ICUgetSortKey },
2922e5b6d6dSopenharmony_ci  { "w2k", Winstrcmp, WingetSortKey},
2932e5b6d6dSopenharmony_ci  { "winxp", Winstrcmp, WingetSortKey},
2942e5b6d6dSopenharmony_ci  { "aix", UNIXstrcmp, UNIXgetSortKey},
2952e5b6d6dSopenharmony_ci  { "linux", UNIXstrcmp, UNIXgetSortKey}
2962e5b6d6dSopenharmony_ci};
2972e5b6d6dSopenharmony_ci
2982e5b6d6dSopenharmony_ci
2992e5b6d6dSopenharmony_civoid stringToLower(char *string) {
3002e5b6d6dSopenharmony_ci  uint32_t i = 0;
3012e5b6d6dSopenharmony_ci  for(i = 0; i < strlen(string); i++) {
3022e5b6d6dSopenharmony_ci    string[i] = tolower(string[i]);
3032e5b6d6dSopenharmony_ci  }
3042e5b6d6dSopenharmony_ci}
3052e5b6d6dSopenharmony_ci
3062e5b6d6dSopenharmony_civoid usage(const char *name) {
3072e5b6d6dSopenharmony_ci  logger->log("Usage: %s --locale loc_name --platform platform\n", name);
3082e5b6d6dSopenharmony_ci}
3092e5b6d6dSopenharmony_ci
3102e5b6d6dSopenharmony_civoid listKnownPlatforms() {
3112e5b6d6dSopenharmony_ci  uint32_t i = 0;
3122e5b6d6dSopenharmony_ci  logger->log("Known platforms:\n");
3132e5b6d6dSopenharmony_ci  for(i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) {
3142e5b6d6dSopenharmony_ci    logger->log("\t%s\n", platforms[i]);
3152e5b6d6dSopenharmony_ci  }
3162e5b6d6dSopenharmony_ci}
3172e5b6d6dSopenharmony_ci
3182e5b6d6dSopenharmony_civoid addPlatform(const char *platform) {
3192e5b6d6dSopenharmony_ci  uint32_t i;
3202e5b6d6dSopenharmony_ci  //stringToLower(platform);
3212e5b6d6dSopenharmony_ci  int32_t oldPlatformNo = gPlatformNo;
3222e5b6d6dSopenharmony_ci
3232e5b6d6dSopenharmony_ci  for(i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) {
3242e5b6d6dSopenharmony_ci    if(strcmp(platform, platforms[i].name) == 0) {
3252e5b6d6dSopenharmony_ci      gPlatformIndexes[gPlatformNo++] = i;
3262e5b6d6dSopenharmony_ci    }
3272e5b6d6dSopenharmony_ci  }
3282e5b6d6dSopenharmony_ci  if(gPlatformNo == oldPlatformNo) {
3292e5b6d6dSopenharmony_ci    logger->log("Unknown platform %s\n", platform);
3302e5b6d6dSopenharmony_ci    listKnownPlatforms();
3312e5b6d6dSopenharmony_ci  }
3322e5b6d6dSopenharmony_ci}
3332e5b6d6dSopenharmony_ci
3342e5b6d6dSopenharmony_civoid processArgs(int argc, char* argv[], UErrorCode &status)
3352e5b6d6dSopenharmony_ci{
3362e5b6d6dSopenharmony_ci  int32_t i = 0;
3372e5b6d6dSopenharmony_ci  U_MAIN_INIT_ARGS(argc, argv);
3382e5b6d6dSopenharmony_ci
3392e5b6d6dSopenharmony_ci  argc = u_parseArgs(argc, argv, (int32_t)(sizeof(options)/sizeof(options[0])), options);
3402e5b6d6dSopenharmony_ci
3412e5b6d6dSopenharmony_ci  if(argc < 0) {
3422e5b6d6dSopenharmony_ci    logger->log("Unknown option: %s\n", argv[-argc]);
3432e5b6d6dSopenharmony_ci    usage(progName);
3442e5b6d6dSopenharmony_ci    return;
3452e5b6d6dSopenharmony_ci  }
3462e5b6d6dSopenharmony_ci
3472e5b6d6dSopenharmony_ci  if(options[0].doesOccur || options[1].doesOccur) {
3482e5b6d6dSopenharmony_ci    usage(progName);
3492e5b6d6dSopenharmony_ci    return;
3502e5b6d6dSopenharmony_ci  }
3512e5b6d6dSopenharmony_ci  if(options[VERBOSE].doesOccur) {
3522e5b6d6dSopenharmony_ci    gVerbose = true;
3532e5b6d6dSopenharmony_ci  }
3542e5b6d6dSopenharmony_ci  if(options[DEBUG].doesOccur) {
3552e5b6d6dSopenharmony_ci    gDebug = true;
3562e5b6d6dSopenharmony_ci    gVerbose = true;
3572e5b6d6dSopenharmony_ci  }
3582e5b6d6dSopenharmony_ci  if(options[EXEMPLAR].doesOccur) {
3592e5b6d6dSopenharmony_ci    gExemplar = true;
3602e5b6d6dSopenharmony_ci  }
3612e5b6d6dSopenharmony_ci  if(options[QUIET].doesOccur) {
3622e5b6d6dSopenharmony_ci    gQuiet = true;
3632e5b6d6dSopenharmony_ci  }
3642e5b6d6dSopenharmony_ci
3652e5b6d6dSopenharmony_ci  // ASCII based options specified on the command line
3662e5b6d6dSopenharmony_ci  // this is for testing purposes, will allow to load
3672e5b6d6dSopenharmony_ci  // up ICU rules and then poke through them.
3682e5b6d6dSopenharmony_ci  // In that case, we test only ICU and don't need
3692e5b6d6dSopenharmony_ci  // a locale.
3702e5b6d6dSopenharmony_ci  if(options[RULESSTDIN].doesOccur) {
3712e5b6d6dSopenharmony_ci    gRulesStdin = true;
3722e5b6d6dSopenharmony_ci    addPlatform("icu");
3732e5b6d6dSopenharmony_ci    return;
3742e5b6d6dSopenharmony_ci  }
3752e5b6d6dSopenharmony_ci
3762e5b6d6dSopenharmony_ci  if(options[LOCALE].doesOccur) {
3772e5b6d6dSopenharmony_ci    gLocale = options[LOCALE].value;
3782e5b6d6dSopenharmony_ci  } else {
3792e5b6d6dSopenharmony_ci    gLocale = argv[1];
3802e5b6d6dSopenharmony_ci    //for(i = 1; i < argc; i++) {
3812e5b6d6dSopenharmony_ci    //gLocales[gLocaleNo++] = argv[i];
3822e5b6d6dSopenharmony_ci    //}
3832e5b6d6dSopenharmony_ci  }
3842e5b6d6dSopenharmony_ci
3852e5b6d6dSopenharmony_ci  if(options[PLATFORM].doesOccur) {
3862e5b6d6dSopenharmony_ci    addPlatform(options[PLATFORM].value);
3872e5b6d6dSopenharmony_ci  } else { // there is a list of platforms
3882e5b6d6dSopenharmony_ci    addPlatform("icu");
3892e5b6d6dSopenharmony_ci  }
3902e5b6d6dSopenharmony_ci
3912e5b6d6dSopenharmony_ci  if(options[REFERENCE].doesOccur) {
3922e5b6d6dSopenharmony_ci    for(i = 0; i < (int32_t)(sizeof(platforms)/sizeof(platforms[0])); i++) {
3932e5b6d6dSopenharmony_ci      if(strcmp(options[REFERENCE].value, platforms[i].name) == 0) {
3942e5b6d6dSopenharmony_ci        gRefNum = i;
3952e5b6d6dSopenharmony_ci        break;
3962e5b6d6dSopenharmony_ci      }
3972e5b6d6dSopenharmony_ci    }
3982e5b6d6dSopenharmony_ci    if(i == sizeof(platforms)/sizeof(platforms[0])) {
3992e5b6d6dSopenharmony_ci      logger->log("Unknown reference %s!\n", options[REFERENCE].value);
4002e5b6d6dSopenharmony_ci      status = U_ILLEGAL_ARGUMENT_ERROR;
4012e5b6d6dSopenharmony_ci      return;
4022e5b6d6dSopenharmony_ci    }
4032e5b6d6dSopenharmony_ci  } else {
4042e5b6d6dSopenharmony_ci    gRefNum = 0;
4052e5b6d6dSopenharmony_ci  }
4062e5b6d6dSopenharmony_ci
4072e5b6d6dSopenharmony_ci  if(options[EXCLUDESET].doesOccur) {
4082e5b6d6dSopenharmony_ci    gExcludeSet.applyPattern(UnicodeString(options[EXCLUDESET].value), status);
4092e5b6d6dSopenharmony_ci    if(U_FAILURE(status)) {
4102e5b6d6dSopenharmony_ci      logger->log("Cannot construct exclude set from argument %s. Error %s\n", options[EXCLUDESET].value, u_errorName(status));
4112e5b6d6dSopenharmony_ci      return;
4122e5b6d6dSopenharmony_ci    } else {
4132e5b6d6dSopenharmony_ci      UnicodeString pattern;
4142e5b6d6dSopenharmony_ci      logger->log(gExcludeSet.toPattern(pattern, true), true);
4152e5b6d6dSopenharmony_ci    }
4162e5b6d6dSopenharmony_ci  }
4172e5b6d6dSopenharmony_ci
4182e5b6d6dSopenharmony_ci  if(options[REPERTOIRE].doesOccur)  {
4192e5b6d6dSopenharmony_ci    gRepertoire.applyPattern(UnicodeString(options[REPERTOIRE].value), status);
4202e5b6d6dSopenharmony_ci    if(U_FAILURE(status)) {
4212e5b6d6dSopenharmony_ci      logger->log("Cannot construct repertoire from argument %s. Error %s\n", options[REPERTOIRE].value, u_errorName(status));
4222e5b6d6dSopenharmony_ci      return;
4232e5b6d6dSopenharmony_ci    }
4242e5b6d6dSopenharmony_ci  }
4252e5b6d6dSopenharmony_ci
4262e5b6d6dSopenharmony_ci  if(options[OUTPUT].doesOccur) {
4272e5b6d6dSopenharmony_ci    outputFormat = options[OUTPUT].value;
4282e5b6d6dSopenharmony_ci    if(strcmp(outputFormat, "HTML") == 0) {
4292e5b6d6dSopenharmony_ci      outExtension = "html";
4302e5b6d6dSopenharmony_ci    } else if(strcmp(outputFormat, "XML") == 0) {
4312e5b6d6dSopenharmony_ci      outExtension = "xml";
4322e5b6d6dSopenharmony_ci    } else {
4332e5b6d6dSopenharmony_ci      outExtension = "txt";
4342e5b6d6dSopenharmony_ci    }
4352e5b6d6dSopenharmony_ci  }
4362e5b6d6dSopenharmony_ci
4372e5b6d6dSopenharmony_ci}
4382e5b6d6dSopenharmony_ci
4392e5b6d6dSopenharmony_ci// Check whether upper case comes before lower case or vice-versa
4402e5b6d6dSopenharmony_ciint32_t
4412e5b6d6dSopenharmony_cicheckCaseOrdering(void) {
4422e5b6d6dSopenharmony_ci  UChar stuff[][3] = {
4432e5b6d6dSopenharmony_ci    { 0x0061, separatorChar, 0x0061}, //"aa",
4442e5b6d6dSopenharmony_ci    { 0x0061, separatorChar, 0x0041 }, //"a\\u00E0",
4452e5b6d6dSopenharmony_ci    { 0x0041, separatorChar, 0x0061 }, //"\\u00E0a",
4462e5b6d6dSopenharmony_ci    { 0x0041, separatorChar, 0x0041 }, //"\\u00E0a",
4472e5b6d6dSopenharmony_ci    //{ 0x00E0, separatorChar, 0x00E0 }  //"\\u00E0\\u00E0"
4482e5b6d6dSopenharmony_ci  };
4492e5b6d6dSopenharmony_ci  const int32_t size = sizeof(stuff)/sizeof(stuff[0]);
4502e5b6d6dSopenharmony_ci
4512e5b6d6dSopenharmony_ci  Line **sortedLines = new Line*[size];
4522e5b6d6dSopenharmony_ci  Line lines[size];
4532e5b6d6dSopenharmony_ci
4542e5b6d6dSopenharmony_ci  int32_t i = 0;
4552e5b6d6dSopenharmony_ci  int32_t ordered = 0, reversed = 0;
4562e5b6d6dSopenharmony_ci
4572e5b6d6dSopenharmony_ci  for(i = 0; i < size; i++) {
4582e5b6d6dSopenharmony_ci    lines[i].setName(stuff[i], 3);
4592e5b6d6dSopenharmony_ci  }
4602e5b6d6dSopenharmony_ci  //setArray(sortedLines, lines, size);
4612e5b6d6dSopenharmony_ci  qsort(sortedLines, size, sizeof(Line*), gComparer);
4622e5b6d6dSopenharmony_ci
4632e5b6d6dSopenharmony_ci  for(i = 0; i < size; i++) {
4642e5b6d6dSopenharmony_ci    if(*(sortedLines+i) == &lines[i]) {
4652e5b6d6dSopenharmony_ci      ordered++;
4662e5b6d6dSopenharmony_ci    }
4672e5b6d6dSopenharmony_ci    if(*(sortedLines+i) == &lines[size-i-1]) {
4682e5b6d6dSopenharmony_ci      reversed++;
4692e5b6d6dSopenharmony_ci    }
4702e5b6d6dSopenharmony_ci  }
4712e5b6d6dSopenharmony_ci
4722e5b6d6dSopenharmony_ci  delete[] sortedLines;
4732e5b6d6dSopenharmony_ci  if(ordered == size) {
4742e5b6d6dSopenharmony_ci    return 0; // in normal order
4752e5b6d6dSopenharmony_ci  } else if(reversed == size) {
4762e5b6d6dSopenharmony_ci    return 1; // in reversed order
4772e5b6d6dSopenharmony_ci  } else {
4782e5b6d6dSopenharmony_ci    return -1; // unknown order
4792e5b6d6dSopenharmony_ci  }
4802e5b6d6dSopenharmony_ci}
4812e5b6d6dSopenharmony_ci
4822e5b6d6dSopenharmony_civoid
4832e5b6d6dSopenharmony_cigetExemplars(const char *locale, UnicodeSet &exemplars, UErrorCode &status) {
4842e5b6d6dSopenharmony_ci  // first we fill out structures with exemplar characters.
4852e5b6d6dSopenharmony_ci  UResourceBundle *res = ures_open(NULL, locale, &status);
4862e5b6d6dSopenharmony_ci  UnicodeString exemplarString = ures_getUnicodeStringByKey(res, "ExemplarCharacters", &status);
4872e5b6d6dSopenharmony_ci  exemplars.clear();
4882e5b6d6dSopenharmony_ci  exemplars.applyPattern(exemplarString, status);
4892e5b6d6dSopenharmony_ci  ures_close(res);
4902e5b6d6dSopenharmony_ci}
4912e5b6d6dSopenharmony_ci
4922e5b6d6dSopenharmony_ci
4932e5b6d6dSopenharmony_civoid
4942e5b6d6dSopenharmony_cigetFileNames(const char *name, char *tailoringName, char *tailoringDumpName, char *defaultName, char *defaultDumpName, char *diffName) {
4952e5b6d6dSopenharmony_ci  if(tailoringName) {
4962e5b6d6dSopenharmony_ci    strcpy(tailoringName, platforms[gPlatformIndexes[0]].name);
4972e5b6d6dSopenharmony_ci    strcat(tailoringName, "/");
4982e5b6d6dSopenharmony_ci    strcat(tailoringName, name);
4992e5b6d6dSopenharmony_ci    strcat(tailoringName, "_raw.");
5002e5b6d6dSopenharmony_ci    strcat(tailoringName, outExtension);
5012e5b6d6dSopenharmony_ci  }
5022e5b6d6dSopenharmony_ci  if(tailoringDumpName) {
5032e5b6d6dSopenharmony_ci    strcpy(tailoringDumpName, platforms[gPlatformIndexes[0]].name);
5042e5b6d6dSopenharmony_ci    strcat(tailoringDumpName, "/");
5052e5b6d6dSopenharmony_ci    strcat(tailoringDumpName, name);
5062e5b6d6dSopenharmony_ci    strcat(tailoringDumpName, ".dump");
5072e5b6d6dSopenharmony_ci  }
5082e5b6d6dSopenharmony_ci
5092e5b6d6dSopenharmony_ci  if(diffName) {
5102e5b6d6dSopenharmony_ci    strcpy(diffName, platforms[gPlatformIndexes[0]].name);
5112e5b6d6dSopenharmony_ci    strcat(diffName, "/");
5122e5b6d6dSopenharmony_ci    strcat(diffName, name);
5132e5b6d6dSopenharmony_ci    strcat(diffName, "_collation.");
5142e5b6d6dSopenharmony_ci    strcat(diffName, outExtension);
5152e5b6d6dSopenharmony_ci  }
5162e5b6d6dSopenharmony_ci
5172e5b6d6dSopenharmony_ci  if(defaultName) {
5182e5b6d6dSopenharmony_ci    strcpy(defaultName, platforms[gRefNum].name);
5192e5b6d6dSopenharmony_ci    strcat(defaultName, "/");
5202e5b6d6dSopenharmony_ci    strcat(defaultName, name);
5212e5b6d6dSopenharmony_ci    strcat(defaultName, "_default_raw.");
5222e5b6d6dSopenharmony_ci    strcat(defaultName, outExtension);
5232e5b6d6dSopenharmony_ci  }
5242e5b6d6dSopenharmony_ci
5252e5b6d6dSopenharmony_ci  if(defaultDumpName) {
5262e5b6d6dSopenharmony_ci    strcpy(defaultDumpName, platforms[gRefNum].name);
5272e5b6d6dSopenharmony_ci    strcat(defaultDumpName, "/");
5282e5b6d6dSopenharmony_ci    strcat(defaultDumpName, name);
5292e5b6d6dSopenharmony_ci    strcat(defaultDumpName, "_default.dump");
5302e5b6d6dSopenharmony_ci  }
5312e5b6d6dSopenharmony_ci}
5322e5b6d6dSopenharmony_ci
5332e5b6d6dSopenharmony_civoid
5342e5b6d6dSopenharmony_cisetFiles(const char *name, UErrorCode &status) {
5352e5b6d6dSopenharmony_ci  if(U_FAILURE(status)) {
5362e5b6d6dSopenharmony_ci    return;
5372e5b6d6dSopenharmony_ci  }
5382e5b6d6dSopenharmony_ci  int32_t i = 0;
5392e5b6d6dSopenharmony_ci  char tailoringName[256];
5402e5b6d6dSopenharmony_ci  char tailoringDumpName[256];
5412e5b6d6dSopenharmony_ci  char defaultName[256];
5422e5b6d6dSopenharmony_ci  char defaultDumpName[256];
5432e5b6d6dSopenharmony_ci  char diffName[256];
5442e5b6d6dSopenharmony_ci
5452e5b6d6dSopenharmony_ci  getFileNames(name, tailoringName, tailoringDumpName, defaultName, defaultDumpName, diffName);
5462e5b6d6dSopenharmony_ci  if(options[PLATFORM].doesOccur && !options[DIFF].doesOccur) {
5472e5b6d6dSopenharmony_ci    if(createDir(platforms[gPlatformIndexes[0]].name) == 0) {
5482e5b6d6dSopenharmony_ci      tailoringBundle = new UPrinter(tailoringName, "en", "utf-8", NULL, false);
5492e5b6d6dSopenharmony_ci      fTailoringDump = fopen(tailoringDumpName, "wb");
5502e5b6d6dSopenharmony_ci    } else {
5512e5b6d6dSopenharmony_ci      status = U_FILE_ACCESS_ERROR;
5522e5b6d6dSopenharmony_ci      return;
5532e5b6d6dSopenharmony_ci    }
5542e5b6d6dSopenharmony_ci  }
5552e5b6d6dSopenharmony_ci
5562e5b6d6dSopenharmony_ci  if(options[REFERENCE].doesOccur && !options[DIFF].doesOccur) {
5572e5b6d6dSopenharmony_ci    if(createDir(platforms[gRefNum].name) == 0) {
5582e5b6d6dSopenharmony_ci      referenceBundle = new UPrinter(defaultName, "en", "utf-8", NULL, false);
5592e5b6d6dSopenharmony_ci      fDefaultDump = fopen(defaultDumpName, "wb");
5602e5b6d6dSopenharmony_ci    } else {
5612e5b6d6dSopenharmony_ci      status = U_FILE_ACCESS_ERROR;
5622e5b6d6dSopenharmony_ci      return;
5632e5b6d6dSopenharmony_ci    }
5642e5b6d6dSopenharmony_ci  }
5652e5b6d6dSopenharmony_ci
5662e5b6d6dSopenharmony_ci  if((options[PLATFORM].doesOccur && options[REFERENCE].doesOccur) || options[DIFF].doesOccur) {
5672e5b6d6dSopenharmony_ci    if(createDir(platforms[gPlatformIndexes[0]].name) == 0) {
5682e5b6d6dSopenharmony_ci      bundle = new UPrinter(diffName, "en", "utf-8", NULL, false);
5692e5b6d6dSopenharmony_ci    }
5702e5b6d6dSopenharmony_ci  }
5712e5b6d6dSopenharmony_ci  if(options[DIFF].doesOccur) {
5722e5b6d6dSopenharmony_ci    fTailoringDump = fopen(tailoringDumpName, "rb");
5732e5b6d6dSopenharmony_ci    fDefaultDump = fopen(defaultDumpName, "rb");
5742e5b6d6dSopenharmony_ci  }
5752e5b6d6dSopenharmony_ci}
5762e5b6d6dSopenharmony_ci
5772e5b6d6dSopenharmony_ci
5782e5b6d6dSopenharmony_ciUErrorCode status = U_ZERO_ERROR;
5792e5b6d6dSopenharmony_cistatic UnicodeSet UNASSIGNED(UnicodeString("[:Cn:]"), status);
5802e5b6d6dSopenharmony_cistatic UnicodeSet GENERAL_ACCENTS(UnicodeString("[[:block=Combining Diacritical Marks:]-[:Cn:]]"), status);
5812e5b6d6dSopenharmony_ci//static UnicodeSet ASCII_BASE(UnicodeString("[[:ASCII:]-[:L:]-[:N:]]"), status);
5822e5b6d6dSopenharmony_cistatic UnicodeSet ASCII_BASE(UnicodeString("[[:ASCII:]]"), status);
5832e5b6d6dSopenharmony_cistatic UnicodeSet ALPHABETIC(UnicodeString("[:alphabetic:]"), status);
5842e5b6d6dSopenharmony_ci//static UnicodeSet CONTROL(UnicodeString("[[:control:][\\u0000-\\u002F]]"), status);
5852e5b6d6dSopenharmony_cistatic UnicodeSet BMP(UnicodeString("[\\u0000-\\uFFFF]"), status);
5862e5b6d6dSopenharmony_ci
5872e5b6d6dSopenharmony_cistatic UnicodeSet CONTROL(UnicodeString("[:control:]"), status);
5882e5b6d6dSopenharmony_ci
5892e5b6d6dSopenharmony_ciUCollator *
5902e5b6d6dSopenharmony_cisetLocale(const char* locale, UErrorCode &status)
5912e5b6d6dSopenharmony_ci{
5922e5b6d6dSopenharmony_ci  gWinLCID = uloc_getLCID(locale);
5932e5b6d6dSopenharmony_ci  setlocale(LC_COLLATE, locale);
5942e5b6d6dSopenharmony_ci
5952e5b6d6dSopenharmony_ci  if(gCol) {
5962e5b6d6dSopenharmony_ci    ucol_close(gCol);
5972e5b6d6dSopenharmony_ci  }
5982e5b6d6dSopenharmony_ci  gCol = ucol_open(locale, &status);
5992e5b6d6dSopenharmony_ci  ucol_setAttribute(gCol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
6002e5b6d6dSopenharmony_ci  //ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
6012e5b6d6dSopenharmony_ci  //ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
6022e5b6d6dSopenharmony_ci
6032e5b6d6dSopenharmony_ci  return gCol;
6042e5b6d6dSopenharmony_ci}
6052e5b6d6dSopenharmony_ci
6062e5b6d6dSopenharmony_ci
6072e5b6d6dSopenharmony_ci
6082e5b6d6dSopenharmony_ciUCollator *
6092e5b6d6dSopenharmony_cisetReference(UErrorCode &status)
6102e5b6d6dSopenharmony_ci{
6112e5b6d6dSopenharmony_ci  gWinLCID = uloc_getLCID("en");
6122e5b6d6dSopenharmony_ci  setlocale(LC_COLLATE, "en_US.UTF-8");
6132e5b6d6dSopenharmony_ci  if(gCol) {
6142e5b6d6dSopenharmony_ci    ucol_close(gCol);
6152e5b6d6dSopenharmony_ci  }
6162e5b6d6dSopenharmony_ci  gCol = ucol_open("root", &status);
6172e5b6d6dSopenharmony_ci  ucol_setAttribute(gCol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
6182e5b6d6dSopenharmony_ci  return gCol;
6192e5b6d6dSopenharmony_ci}
6202e5b6d6dSopenharmony_ci
6212e5b6d6dSopenharmony_civoid
6222e5b6d6dSopenharmony_ciprocessInteractive() {
6232e5b6d6dSopenharmony_ci  char command[256];
6242e5b6d6dSopenharmony_ci  while(fgets(command, 256, stdin)) {
6252e5b6d6dSopenharmony_ci
6262e5b6d6dSopenharmony_ci  }
6272e5b6d6dSopenharmony_ci}
6282e5b6d6dSopenharmony_ci
6292e5b6d6dSopenharmony_ciUChar probeChars[][4] = {
6302e5b6d6dSopenharmony_ci  { 0x0061, 0x0062, 0x00E1, 0x0041 }, // latin with a-grave
6312e5b6d6dSopenharmony_ci  { 0x0041, 0x0042, 0x00C1, 0x0061 }, // upper first
6322e5b6d6dSopenharmony_ci  { 0x006E, 0x006F, 0x00F1, 0x004E }, // latin with n-tilda
6332e5b6d6dSopenharmony_ci  { 0x004E, 0x004F, 0x00D1, 0x006E }, // upper first
6342e5b6d6dSopenharmony_ci  { 0x0433, 0x0493, 0x0491, 0x0413 }, // Cyrillic
6352e5b6d6dSopenharmony_ci  { 0x0413, 0x0492, 0x0490, 0x0433 }, // upper first
6362e5b6d6dSopenharmony_ci  { 0x3045, 0x3047, 0x3094, 0x3046 }  // Hiragana/Katakana (last resort)
6372e5b6d6dSopenharmony_ci
6382e5b6d6dSopenharmony_ci};
6392e5b6d6dSopenharmony_ci
6402e5b6d6dSopenharmony_civoid
6412e5b6d6dSopenharmony_ciprocessCollator(UCollator *col, UErrorCode &status) {
6422e5b6d6dSopenharmony_ci  int32_t i = 0;
6432e5b6d6dSopenharmony_ci  uint32_t j = 0;
6442e5b6d6dSopenharmony_ci  gCol = col;
6452e5b6d6dSopenharmony_ci  UChar ruleString[16384];
6462e5b6d6dSopenharmony_ci  char myLoc[256];
6472e5b6d6dSopenharmony_ci
6482e5b6d6dSopenharmony_ci  int32_t ruleStringLength = ucol_getRulesEx(gCol, UCOL_TAILORING_ONLY, ruleString, 16384);
6492e5b6d6dSopenharmony_ci  logger->log(UnicodeString(ruleString, ruleStringLength), true);
6502e5b6d6dSopenharmony_ci  const char *locale = ucol_getLocale(gCol, ULOC_REQUESTED_LOCALE, &status);
6512e5b6d6dSopenharmony_ci  if(locale == NULL) {
6522e5b6d6dSopenharmony_ci    locale = "en";
6532e5b6d6dSopenharmony_ci  }
6542e5b6d6dSopenharmony_ci  strcpy(myLoc, locale);
6552e5b6d6dSopenharmony_ci  UnicodeSet exemplarUSet;
6562e5b6d6dSopenharmony_ci  UnicodeSet RefRepertoire;
6572e5b6d6dSopenharmony_ci
6582e5b6d6dSopenharmony_ci  UnicodeSet tailored;
6592e5b6d6dSopenharmony_ci
6602e5b6d6dSopenharmony_ci  tailored = *((UnicodeSet *)ucol_getTailoredSet(gCol, &status));
6612e5b6d6dSopenharmony_ci  tailored.removeAll(CONTROL);
6622e5b6d6dSopenharmony_ci
6632e5b6d6dSopenharmony_ci
6642e5b6d6dSopenharmony_ci  UnicodeString pattern;
6652e5b6d6dSopenharmony_ci  int sanityResult;
6662e5b6d6dSopenharmony_ci
6672e5b6d6dSopenharmony_ci  UnicodeSet hanSet;
6682e5b6d6dSopenharmony_ci  UBool hanAppears = false;
6692e5b6d6dSopenharmony_ci
6702e5b6d6dSopenharmony_ci  debug->log("\nGenerating order for platform: %s\n", platforms[gPlatformIndexes[0]].name);
6712e5b6d6dSopenharmony_ci  gComparer = platforms[gPlatformIndexes[0]].comparer;
6722e5b6d6dSopenharmony_ci
6732e5b6d6dSopenharmony_ci  StrengthProbe probe(platforms[gPlatformIndexes[0]].comparer, platforms[gPlatformIndexes[0]].skgetter, 0x0030, probeChars[0][0], probeChars[0][1], probeChars[0][2], probeChars[0][3]);
6742e5b6d6dSopenharmony_ci  sanityResult = probe.checkSanity();
6752e5b6d6dSopenharmony_ci  j = 0;
6762e5b6d6dSopenharmony_ci  while(sanityResult && j+1 < sizeof(probeChars)/sizeof(probeChars[0])) {
6772e5b6d6dSopenharmony_ci   j++;
6782e5b6d6dSopenharmony_ci   sanityResult =  probe.setProbeChars(probeChars[j][0], probeChars[j][1], probeChars[j][2], probeChars[j][3]);
6792e5b6d6dSopenharmony_ci  }
6802e5b6d6dSopenharmony_ci  if(sanityResult) {
6812e5b6d6dSopenharmony_ci    logger->log("Bad choice of probe characters! Sanity returned %i. Exiting\n", sanityResult, sanityResult);
6822e5b6d6dSopenharmony_ci    return;
6832e5b6d6dSopenharmony_ci  }
6842e5b6d6dSopenharmony_ci  logger->log("Probe chars: %C, %C, %C, %C\n", probeChars[j][0], probeChars[j][1], probeChars[j][2], probeChars[j][3]);
6852e5b6d6dSopenharmony_ci
6862e5b6d6dSopenharmony_ci  debug->off();
6872e5b6d6dSopenharmony_ci
6882e5b6d6dSopenharmony_ci  if(gRepertoire.size()) {
6892e5b6d6dSopenharmony_ci    exemplarUSet = gRepertoire;
6902e5b6d6dSopenharmony_ci  } else {
6912e5b6d6dSopenharmony_ci    generateRepertoire(locale, exemplarUSet, hanAppears, status);
6922e5b6d6dSopenharmony_ci  }
6932e5b6d6dSopenharmony_ci  exemplarUSet.addAll(tailored);
6942e5b6d6dSopenharmony_ci  hanSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_HAN, status);
6952e5b6d6dSopenharmony_ci  exemplarUSet.removeAll(hanSet);
6962e5b6d6dSopenharmony_ci
6972e5b6d6dSopenharmony_ci  logger->log(exemplarUSet.toPattern(pattern, true), true);
6982e5b6d6dSopenharmony_ci
6992e5b6d6dSopenharmony_ci  exemplarUSet = flatten(exemplarUSet, status);
7002e5b6d6dSopenharmony_ci  logger->log(exemplarUSet.toPattern(pattern, true), true);
7012e5b6d6dSopenharmony_ci
7022e5b6d6dSopenharmony_ci  if(!options[PRINTREF].doesOccur) {
7032e5b6d6dSopenharmony_ci
7042e5b6d6dSopenharmony_ci    logger->log("\n*** Detecting ordering for the locale\n\n");
7052e5b6d6dSopenharmony_ci
7062e5b6d6dSopenharmony_ci    debug->on();
7072e5b6d6dSopenharmony_ci    SortedLines lines(exemplarUSet, gExcludeSet, probe, logger, debug);
7082e5b6d6dSopenharmony_ci    lines.analyse(status);
7092e5b6d6dSopenharmony_ci    lines.calculateSortKeys();
7102e5b6d6dSopenharmony_ci    debug->log("\n*** Final order\n\n");
7112e5b6d6dSopenharmony_ci    debug->log(lines.toPrettyString(true, true), true);
7122e5b6d6dSopenharmony_ci    lines.toFile(fTailoringDump, true, status);
7132e5b6d6dSopenharmony_ci    tailoringBundle->log(lines.toOutput(outputFormat, myLoc, platforms[gPlatformIndexes[0]].name, NULL, true, true, hanAppears), true);
7142e5b6d6dSopenharmony_ci    //debug->off();
7152e5b6d6dSopenharmony_ci
7162e5b6d6dSopenharmony_ci    if(options[REFERENCE].doesOccur) {
7172e5b6d6dSopenharmony_ci      status = U_ZERO_ERROR;
7182e5b6d6dSopenharmony_ci      lines.getRepertoire(RefRepertoire);
7192e5b6d6dSopenharmony_ci      setReference(status);
7202e5b6d6dSopenharmony_ci
7212e5b6d6dSopenharmony_ci      logger->log(exemplarUSet.toPattern(pattern, true), true);
7222e5b6d6dSopenharmony_ci      logger->log(RefRepertoire.toPattern(pattern, true), true);
7232e5b6d6dSopenharmony_ci
7242e5b6d6dSopenharmony_ci      StrengthProbe RefProbe(platforms[gRefNum].comparer, platforms[gRefNum].skgetter);
7252e5b6d6dSopenharmony_ci      logger->log("\n*** Detecting ordering for reference\n\n");
7262e5b6d6dSopenharmony_ci      SortedLines RefLines(exemplarUSet, gExcludeSet, RefProbe, logger, debug);
7272e5b6d6dSopenharmony_ci      RefLines.analyse(status);
7282e5b6d6dSopenharmony_ci      referenceBundle->log(RefLines.toOutput(outputFormat, myLoc, platforms[gRefNum].name, NULL, true, true, false), true);
7292e5b6d6dSopenharmony_ci      RefLines.toFile(fDefaultDump, true, status);
7302e5b6d6dSopenharmony_ci
7312e5b6d6dSopenharmony_ci      lines.reduceDifference(RefLines);
7322e5b6d6dSopenharmony_ci      logger->log("\n*** Final rules\n\n");
7332e5b6d6dSopenharmony_ci      logger->log(lines.toPrettyString(true), true);
7342e5b6d6dSopenharmony_ci      bundle->log(lines.toOutput(outputFormat, myLoc, platforms[gPlatformIndexes[0]].name, platforms[gRefNum].name, true, true, hanAppears), true);
7352e5b6d6dSopenharmony_ci    }
7362e5b6d6dSopenharmony_ci  } else {
7372e5b6d6dSopenharmony_ci    setReference(status);
7382e5b6d6dSopenharmony_ci    StrengthProbe RefProbe(platforms[gRefNum].comparer, platforms[gRefNum].skgetter);
7392e5b6d6dSopenharmony_ci    logger->log("\n*** Detecting ordering for reference\n\n");
7402e5b6d6dSopenharmony_ci    SortedLines RefLines(exemplarUSet, gExcludeSet, RefProbe, logger, debug);
7412e5b6d6dSopenharmony_ci    RefLines.analyse(status);
7422e5b6d6dSopenharmony_ci    logger->log(RefLines.toPrettyString(true), true);
7432e5b6d6dSopenharmony_ci    referenceBundle->log(RefLines.toOutput(outputFormat, myLoc, platforms[gRefNum].name, NULL, true, true, false), true);
7442e5b6d6dSopenharmony_ci  }
7452e5b6d6dSopenharmony_ci  if(hanAppears) {
7462e5b6d6dSopenharmony_ci    // there are Han characters. This is a huge block. The best we can do is to just sort it, compare to empty
7472e5b6d6dSopenharmony_ci    // and spit it out. Anything else would be a suicide (actually is - kernel just kills you :)
7482e5b6d6dSopenharmony_ci    logger->log("\n*** Detecting order for Han\n");
7492e5b6d6dSopenharmony_ci    debug->off();
7502e5b6d6dSopenharmony_ci    setLocale(gLocale, status);
7512e5b6d6dSopenharmony_ci    exemplarUSet.clear();
7522e5b6d6dSopenharmony_ci    exemplarUSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_HAN, status);
7532e5b6d6dSopenharmony_ci    exemplarUSet = flatten(exemplarUSet, status);
7542e5b6d6dSopenharmony_ci    SortedLines han(exemplarUSet, gExcludeSet, probe, logger, debug);
7552e5b6d6dSopenharmony_ci    han.sort(true, true);
7562e5b6d6dSopenharmony_ci    han.classifyRepertoire();
7572e5b6d6dSopenharmony_ci    han.getBounds(status);
7582e5b6d6dSopenharmony_ci    tailoringBundle->log("Han ordering:<br>\n");
7592e5b6d6dSopenharmony_ci    tailoringBundle->log(han.toOutput(outputFormat, myLoc, platforms[gPlatformIndexes[0]].name, NULL, true, false, false), true);
7602e5b6d6dSopenharmony_ci    bundle->log(han.toOutput(outputFormat, myLoc, platforms[gPlatformIndexes[0]].name, NULL, true, false, false), true);
7612e5b6d6dSopenharmony_ci  }
7622e5b6d6dSopenharmony_ci  ucol_close(gCol);
7632e5b6d6dSopenharmony_ci}
7642e5b6d6dSopenharmony_ci
7652e5b6d6dSopenharmony_civoid
7662e5b6d6dSopenharmony_ciprocessLocale(const char *locale, UErrorCode &status) {
7672e5b6d6dSopenharmony_ci  setLocale(locale, status);
7682e5b6d6dSopenharmony_ci  setFiles(locale, status);
7692e5b6d6dSopenharmony_ci  if(U_FAILURE(status)) {
7702e5b6d6dSopenharmony_ci    return;
7712e5b6d6dSopenharmony_ci  }
7722e5b6d6dSopenharmony_ci
7732e5b6d6dSopenharmony_ci  debug->log("Locale %s (LCID:%06X, unix:%s)\n", locale, gWinLCID, setlocale(LC_COLLATE, NULL));
7742e5b6d6dSopenharmony_ci  tailoringBundle->log("// Ordering for locale %s (LCID:%06X, unix:%s), platform %s reference %s<br>\n",
7752e5b6d6dSopenharmony_ci    locale, gWinLCID, setlocale(LC_COLLATE, NULL),
7762e5b6d6dSopenharmony_ci    platforms[gPlatformIndexes[0]].name, platforms[gRefNum].name);
7772e5b6d6dSopenharmony_ci  if(options[REFERENCE].doesOccur) {
7782e5b6d6dSopenharmony_ci    referenceBundle->log("// Reference for locale %s (LCID:%06X, unix:%s), platform %s reference %s<br>\n",
7792e5b6d6dSopenharmony_ci      locale, gWinLCID, setlocale(LC_COLLATE, NULL),
7802e5b6d6dSopenharmony_ci      platforms[gPlatformIndexes[0]].name, platforms[gRefNum].name);
7812e5b6d6dSopenharmony_ci  }
7822e5b6d6dSopenharmony_ci
7832e5b6d6dSopenharmony_ci
7842e5b6d6dSopenharmony_ci  processCollator(gCol, status);
7852e5b6d6dSopenharmony_ci}
7862e5b6d6dSopenharmony_ci
7872e5b6d6dSopenharmony_ci
7882e5b6d6dSopenharmony_ci
7892e5b6d6dSopenharmony_ciUBool
7902e5b6d6dSopenharmony_cihasCollationElements(const char *locName) {
7912e5b6d6dSopenharmony_ci
7922e5b6d6dSopenharmony_ci  UErrorCode status = U_ZERO_ERROR;
7932e5b6d6dSopenharmony_ci  UResourceBundle *ColEl = NULL;
7942e5b6d6dSopenharmony_ci
7952e5b6d6dSopenharmony_ci  UResourceBundle *loc = ures_open(NULL, locName, &status);;
7962e5b6d6dSopenharmony_ci
7972e5b6d6dSopenharmony_ci  if(U_SUCCESS(status)) {
7982e5b6d6dSopenharmony_ci    status = U_ZERO_ERROR;
7992e5b6d6dSopenharmony_ci    ColEl = ures_getByKey(loc, "CollationElements", ColEl, &status);
8002e5b6d6dSopenharmony_ci    if(status == U_ZERO_ERROR) { /* do the test - there are real elements */
8012e5b6d6dSopenharmony_ci      ures_close(ColEl);
8022e5b6d6dSopenharmony_ci      ures_close(loc);
8032e5b6d6dSopenharmony_ci      return true;
8042e5b6d6dSopenharmony_ci    }
8052e5b6d6dSopenharmony_ci    ures_close(ColEl);
8062e5b6d6dSopenharmony_ci    ures_close(loc);
8072e5b6d6dSopenharmony_ci  }
8082e5b6d6dSopenharmony_ci  return false;
8092e5b6d6dSopenharmony_ci}
8102e5b6d6dSopenharmony_ci
8112e5b6d6dSopenharmony_ciint
8122e5b6d6dSopenharmony_cimain(int argc,
8132e5b6d6dSopenharmony_ci     char* argv[])
8142e5b6d6dSopenharmony_ci{
8152e5b6d6dSopenharmony_ci  UErrorCode status = U_ZERO_ERROR;
8162e5b6d6dSopenharmony_ci  logger = new UPrinter(stdout, "en", "latin-1");
8172e5b6d6dSopenharmony_ci  debug =  new UPrinter(stderr, "en", "latin-1");
8182e5b6d6dSopenharmony_ci
8192e5b6d6dSopenharmony_ci/*
8202e5b6d6dSopenharmony_ci  USet *wsp = uprv_openRuleWhiteSpaceSet(&status);
8212e5b6d6dSopenharmony_ci  uset_add(wsp, 0x0041);
8222e5b6d6dSopenharmony_ci  uset_remove(wsp, 0x0041);
8232e5b6d6dSopenharmony_ci  UnicodeString pat;
8242e5b6d6dSopenharmony_ci  ((UnicodeSet *)wsp)->toPattern(pat, true);
8252e5b6d6dSopenharmony_ci  pat.setCharAt(pat.length(), 0);
8262e5b6d6dSopenharmony_ci  escapeString(pat.getBuffer(), pat.length(), log);
8272e5b6d6dSopenharmony_ci  u_fflush(log);
8282e5b6d6dSopenharmony_ci*/
8292e5b6d6dSopenharmony_ci
8302e5b6d6dSopenharmony_ci  processArgs(argc, argv, status);
8312e5b6d6dSopenharmony_ci  int32_t i = 0;
8322e5b6d6dSopenharmony_ci
8332e5b6d6dSopenharmony_ci
8342e5b6d6dSopenharmony_ci
8352e5b6d6dSopenharmony_ci  if(U_FAILURE(status) || gPlatformNo == 0) {
8362e5b6d6dSopenharmony_ci    return -1;
8372e5b6d6dSopenharmony_ci  }
8382e5b6d6dSopenharmony_ci
8392e5b6d6dSopenharmony_ci  utf8cnv = ucnv_open("utf-8", &status);    // we are just doing UTF-8 locales for now.
8402e5b6d6dSopenharmony_ci  gUCA = ucol_open("root", &status);
8412e5b6d6dSopenharmony_ci
8422e5b6d6dSopenharmony_ci  if(options[INTERACTIVE].doesOccur) {
8432e5b6d6dSopenharmony_ci    processInteractive();
8442e5b6d6dSopenharmony_ci  } else {
8452e5b6d6dSopenharmony_ci    if(gRulesStdin) {
8462e5b6d6dSopenharmony_ci      char buffer[1024];
8472e5b6d6dSopenharmony_ci      UChar ruleBuffer[16384];
8482e5b6d6dSopenharmony_ci      UChar *rules = ruleBuffer;
8492e5b6d6dSopenharmony_ci      int32_t maxRuleLen = 16384;
8502e5b6d6dSopenharmony_ci      int32_t rLen = 0;
8512e5b6d6dSopenharmony_ci      while(fgets(buffer, 1024, stdin)) {
8522e5b6d6dSopenharmony_ci        if(buffer[0] != '/' && buffer[1] != '/') {
8532e5b6d6dSopenharmony_ci          rLen = u_unescape(buffer, rules, maxRuleLen);
8542e5b6d6dSopenharmony_ci          rules += rLen;
8552e5b6d6dSopenharmony_ci          maxRuleLen -= rLen;
8562e5b6d6dSopenharmony_ci        }
8572e5b6d6dSopenharmony_ci      }
8582e5b6d6dSopenharmony_ci      UParseError parseError;
8592e5b6d6dSopenharmony_ci      //escapeString(ruleBuffer, rules-ruleBuffer, log);//
8602e5b6d6dSopenharmony_ci      debug->log("%U\n", ruleBuffer);
8612e5b6d6dSopenharmony_ci
8622e5b6d6dSopenharmony_ci      UCollator *col = ucol_openRules(ruleBuffer, rules-ruleBuffer, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
8632e5b6d6dSopenharmony_ci      if(U_SUCCESS(status)) {
8642e5b6d6dSopenharmony_ci        setFiles("stdinRules", status);
8652e5b6d6dSopenharmony_ci        processCollator(col, status);
8662e5b6d6dSopenharmony_ci      } else {
8672e5b6d6dSopenharmony_ci        logger->log("Error %s\n", u_errorName(status));
8682e5b6d6dSopenharmony_ci      }
8692e5b6d6dSopenharmony_ci    } else if(options[DIFF].doesOccur) {
8702e5b6d6dSopenharmony_ci      logger->log("Diffing two dumps\n");
8712e5b6d6dSopenharmony_ci      // must have locale, platform and ref in order to be
8722e5b6d6dSopenharmony_ci      // able to find dump files.
8732e5b6d6dSopenharmony_ci      setFiles(gLocale, status);
8742e5b6d6dSopenharmony_ci
8752e5b6d6dSopenharmony_ci      if(fTailoringDump && fDefaultDump) {
8762e5b6d6dSopenharmony_ci	    SortedLines tailoring(fTailoringDump, logger, debug, status);
8772e5b6d6dSopenharmony_ci	    logger->log(tailoring.toString(true), true);
8782e5b6d6dSopenharmony_ci	    SortedLines reference(fDefaultDump, logger, debug, status);
8792e5b6d6dSopenharmony_ci	    logger->log(reference.toString(true), true);
8802e5b6d6dSopenharmony_ci	    tailoring.reduceDifference(reference);
8812e5b6d6dSopenharmony_ci	    logger->log("\n*** Final rules\n\n");
8822e5b6d6dSopenharmony_ci	    logger->log(tailoring.toPrettyString(true), true);
8832e5b6d6dSopenharmony_ci	    //result->log(lines.toPrettyString(true), true);
8842e5b6d6dSopenharmony_ci	    bundle->log(tailoring.toOutput(outputFormat, gLocale, platforms[gPlatformIndexes[0]].name, platforms[gRefNum].name, true, true, false), true);
8852e5b6d6dSopenharmony_ci      }
8862e5b6d6dSopenharmony_ci
8872e5b6d6dSopenharmony_ci    } else {
8882e5b6d6dSopenharmony_ci      if(gLocale) {
8892e5b6d6dSopenharmony_ci        processLocale(gLocale, status);
8902e5b6d6dSopenharmony_ci      } else if(gLocaleNo) {
8912e5b6d6dSopenharmony_ci        for(i = 0; i < gLocaleNo; i++) {
8922e5b6d6dSopenharmony_ci          processLocale(gLocales[i], status);
8932e5b6d6dSopenharmony_ci        }
8942e5b6d6dSopenharmony_ci      } else { // do the loop through all the locales
8952e5b6d6dSopenharmony_ci        int32_t noOfLoc = uloc_countAvailable();
8962e5b6d6dSopenharmony_ci        const char *locName = NULL;
8972e5b6d6dSopenharmony_ci        for(i = 0; i<noOfLoc; i++) {
8982e5b6d6dSopenharmony_ci          status = U_ZERO_ERROR;
8992e5b6d6dSopenharmony_ci          locName = uloc_getAvailable(i);
9002e5b6d6dSopenharmony_ci          if(hasCollationElements(locName)) {
9012e5b6d6dSopenharmony_ci            processLocale(locName, status);
9022e5b6d6dSopenharmony_ci          }
9032e5b6d6dSopenharmony_ci        }
9042e5b6d6dSopenharmony_ci      }
9052e5b6d6dSopenharmony_ci    }
9062e5b6d6dSopenharmony_ci  }
9072e5b6d6dSopenharmony_ci
9082e5b6d6dSopenharmony_ci
9092e5b6d6dSopenharmony_ci  ucol_close(gUCA);
9102e5b6d6dSopenharmony_ci  ucnv_close(utf8cnv);
9112e5b6d6dSopenharmony_ci
9122e5b6d6dSopenharmony_ci  delete logger;
9132e5b6d6dSopenharmony_ci  delete debug;
9142e5b6d6dSopenharmony_ci  if(tailoringBundle) {
9152e5b6d6dSopenharmony_ci    delete tailoringBundle;
9162e5b6d6dSopenharmony_ci  }
9172e5b6d6dSopenharmony_ci  if(referenceBundle) {
9182e5b6d6dSopenharmony_ci    delete referenceBundle;
9192e5b6d6dSopenharmony_ci  }
9202e5b6d6dSopenharmony_ci  if(bundle) {
9212e5b6d6dSopenharmony_ci    delete bundle;
9222e5b6d6dSopenharmony_ci  }
9232e5b6d6dSopenharmony_ci  if(fTailoringDump) {
9242e5b6d6dSopenharmony_ci    fclose(fTailoringDump);
9252e5b6d6dSopenharmony_ci  }
9262e5b6d6dSopenharmony_ci  if(fDefaultDump) {
9272e5b6d6dSopenharmony_ci    fclose(fDefaultDump);
9282e5b6d6dSopenharmony_ci  }
9292e5b6d6dSopenharmony_ci  return 0;
9302e5b6d6dSopenharmony_ci}
9312e5b6d6dSopenharmony_ci
9322e5b6d6dSopenharmony_ci
9332e5b6d6dSopenharmony_ciUnicodeString propertyAndValueName(UProperty prop, int32_t i) {
9342e5b6d6dSopenharmony_ci  UnicodeString result;
9352e5b6d6dSopenharmony_ci  result.append(u_getPropertyName(prop, U_LONG_PROPERTY_NAME));
9362e5b6d6dSopenharmony_ci  result.append("=");
9372e5b6d6dSopenharmony_ci  result.append(u_getPropertyValueName(prop, i, U_LONG_PROPERTY_NAME));
9382e5b6d6dSopenharmony_ci
9392e5b6d6dSopenharmony_ci    //+ "(" + prop + "," + i + ") ";
9402e5b6d6dSopenharmony_ci  return result;
9412e5b6d6dSopenharmony_ci}
9422e5b6d6dSopenharmony_ci
9432e5b6d6dSopenharmony_ci
9442e5b6d6dSopenharmony_civoid generateRepertoire(const char *locale, UnicodeSet &rep, UBool &hanAppears, UErrorCode &status) {
9452e5b6d6dSopenharmony_ci    UnicodeString dispName;
9462e5b6d6dSopenharmony_ci    debug->log("Getting repertoire for %s\n", locale);
9472e5b6d6dSopenharmony_ci    tailoringBundle->log("// Scripts in repertoire: ");
9482e5b6d6dSopenharmony_ci    if(options[REFERENCE].doesOccur) {
9492e5b6d6dSopenharmony_ci      referenceBundle->log("// Scripts in repertoire: ");
9502e5b6d6dSopenharmony_ci    }
9512e5b6d6dSopenharmony_ci	rep.clear();
9522e5b6d6dSopenharmony_ci    UnicodeSet delta;
9532e5b6d6dSopenharmony_ci
9542e5b6d6dSopenharmony_ci    UScriptCode script[256];
9552e5b6d6dSopenharmony_ci    int32_t i = 0;
9562e5b6d6dSopenharmony_ci    // now add the scripts for the locale
9572e5b6d6dSopenharmony_ci    UProperty prop = UCHAR_SCRIPT;
9582e5b6d6dSopenharmony_ci	int32_t scriptLength = uscript_getCode(locale, script, 256, &status);
9592e5b6d6dSopenharmony_ci    if(scriptLength) {
9602e5b6d6dSopenharmony_ci	  for (i = 0; i < scriptLength; ++i) {
9612e5b6d6dSopenharmony_ci        if(script[i] == USCRIPT_HAN) {
9622e5b6d6dSopenharmony_ci          hanAppears = true;
9632e5b6d6dSopenharmony_ci          continue;
9642e5b6d6dSopenharmony_ci        }
9652e5b6d6dSopenharmony_ci        delta.applyIntPropertyValue(prop, script[i], status);
9662e5b6d6dSopenharmony_ci        debug->log("Adding ");
9672e5b6d6dSopenharmony_ci        debug->log(propertyAndValueName(prop, script[i]), true);
9682e5b6d6dSopenharmony_ci        tailoringBundle->log("// ");
9692e5b6d6dSopenharmony_ci        tailoringBundle->log(propertyAndValueName(prop, script[i]), true);
9702e5b6d6dSopenharmony_ci        if(options[REFERENCE].doesOccur) {
9712e5b6d6dSopenharmony_ci          referenceBundle->log("// ");
9722e5b6d6dSopenharmony_ci          referenceBundle->log(propertyAndValueName(prop, script[i]), true);
9732e5b6d6dSopenharmony_ci        }
9742e5b6d6dSopenharmony_ci		rep.addAll(delta);
9752e5b6d6dSopenharmony_ci	  }
9762e5b6d6dSopenharmony_ci    } else {
9772e5b6d6dSopenharmony_ci      delta.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_LATIN, status);
9782e5b6d6dSopenharmony_ci      rep.addAll(delta);
9792e5b6d6dSopenharmony_ci    }
9802e5b6d6dSopenharmony_ci
9812e5b6d6dSopenharmony_ci    // now see which blocks those overlap, and add
9822e5b6d6dSopenharmony_ci    prop = UCHAR_BLOCK;
9832e5b6d6dSopenharmony_ci    int32_t min = u_getIntPropertyMinValue(prop);
9842e5b6d6dSopenharmony_ci    int32_t max = u_getIntPropertyMaxValue(prop);
9852e5b6d6dSopenharmony_ci    UnicodeSet checkDelta;
9862e5b6d6dSopenharmony_ci    for (i = min; i <= max; ++i) {
9872e5b6d6dSopenharmony_ci        // skip certain blocks
9882e5b6d6dSopenharmony_ci        const char *name = u_getPropertyValueName(prop, i, U_LONG_PROPERTY_NAME);
9892e5b6d6dSopenharmony_ci        if (strcmp(name, "Superscripts_and_Subscripts") == 0
9902e5b6d6dSopenharmony_ci        || strcmp(name, "Letterlike_Symbols") == 0
9912e5b6d6dSopenharmony_ci        || strcmp(name, "Alphabetic_Presentation_Forms") == 0
9922e5b6d6dSopenharmony_ci        || strcmp(name, "Halfwidth_and_Fullwidth_Forms") == 0) continue;
9932e5b6d6dSopenharmony_ci
9942e5b6d6dSopenharmony_ci        delta.applyIntPropertyValue(prop, i, status).removeAll(UNASSIGNED);
9952e5b6d6dSopenharmony_ci        if (!rep.containsSome(delta)) continue;
9962e5b6d6dSopenharmony_ci        if (rep.containsAll(delta)) continue; // just to see what we are adding
9972e5b6d6dSopenharmony_ci        debug->log("Adding ");
9982e5b6d6dSopenharmony_ci        debug->log(propertyAndValueName(prop, i), true);
9992e5b6d6dSopenharmony_ci        tailoringBundle->log("// ");
10002e5b6d6dSopenharmony_ci        tailoringBundle->log(propertyAndValueName(prop, i), true);
10012e5b6d6dSopenharmony_ci        if(options[REFERENCE].doesOccur) {
10022e5b6d6dSopenharmony_ci          referenceBundle->log("// ");
10032e5b6d6dSopenharmony_ci          referenceBundle->log(propertyAndValueName(prop, i), true);
10042e5b6d6dSopenharmony_ci        }
10052e5b6d6dSopenharmony_ci        rep.addAll(delta);
10062e5b6d6dSopenharmony_ci    }
10072e5b6d6dSopenharmony_ci
10082e5b6d6dSopenharmony_ci    // add ASCII and general accents
10092e5b6d6dSopenharmony_ci    rep.addAll(GENERAL_ACCENTS).addAll(ASCII_BASE);
10102e5b6d6dSopenharmony_ci    rep.removeAll(CONTROL);
10112e5b6d6dSopenharmony_ci    //delta.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_HAN, status);
10122e5b6d6dSopenharmony_ci    //rep.removeAll(delta);
10132e5b6d6dSopenharmony_ci
10142e5b6d6dSopenharmony_ci    // now add the exemplar characters
10152e5b6d6dSopenharmony_ci    // can't get at them from Java right now
10162e5b6d6dSopenharmony_ci    tailoringBundle->log("<br>\n");
10172e5b6d6dSopenharmony_ci    if(options[REFERENCE].doesOccur) {
10182e5b6d6dSopenharmony_ci      referenceBundle->log("<br>\n");
10192e5b6d6dSopenharmony_ci    }
10202e5b6d6dSopenharmony_ci}
10212e5b6d6dSopenharmony_ci
10222e5b6d6dSopenharmony_ciUnicodeSet flatten(const UnicodeSet &source, UErrorCode &status) {
10232e5b6d6dSopenharmony_ci    UnicodeSet result;
10242e5b6d6dSopenharmony_ci    UnicodeSetIterator it(source);
10252e5b6d6dSopenharmony_ci    UnicodeString item, itemNFKD, toNormalize;
10262e5b6d6dSopenharmony_ci    while (it.next()) {
10272e5b6d6dSopenharmony_ci        // would be nicer if UnicodeSetIterator had a getString function
10282e5b6d6dSopenharmony_ci        if (it.isString()) {
10292e5b6d6dSopenharmony_ci          Normalizer::normalize(it.getString(), UNORM_NFD, 0, item, status);
10302e5b6d6dSopenharmony_ci          Normalizer::normalize(it.getString(), UNORM_NFKD, 0, itemNFKD, status);
10312e5b6d6dSopenharmony_ci        } else {
10322e5b6d6dSopenharmony_ci          toNormalize.setTo(it.getCodepoint());
10332e5b6d6dSopenharmony_ci          Normalizer::normalize(toNormalize, UNORM_NFD, 0, item, status);
10342e5b6d6dSopenharmony_ci          Normalizer::normalize(toNormalize, UNORM_NFKD, 0, itemNFKD, status);
10352e5b6d6dSopenharmony_ci        }
10362e5b6d6dSopenharmony_ci        result.addAll(item);
10372e5b6d6dSopenharmony_ci        result.addAll(itemNFKD);
10382e5b6d6dSopenharmony_ci    }
10392e5b6d6dSopenharmony_ci    return result;
10402e5b6d6dSopenharmony_ci}
10412e5b6d6dSopenharmony_ci
10422e5b6d6dSopenharmony_ci
10432e5b6d6dSopenharmony_civoid testWin(StrengthProbe &probe, UErrorCode &status)
10442e5b6d6dSopenharmony_ci{
10452e5b6d6dSopenharmony_ci  UnicodeSet trailings(UnicodeString("[\\uFE7D\\uFE7C\\u30FD\\uFF70\\u30FC\\u309D\\u3032\\u3031\\u3005\\u0651]"), status);
10462e5b6d6dSopenharmony_ci  char intChar[] = "\\uFE7D\\uFE7C\\u30FD\\uFF70\\u30FC\\u309D\\u3032\\u3031\\u3005\\u0651";
10472e5b6d6dSopenharmony_ci  UChar interesting[256];
10482e5b6d6dSopenharmony_ci  int32_t intLen = u_unescape(intChar, interesting, 256);
10492e5b6d6dSopenharmony_ci  UChar i = 0;
10502e5b6d6dSopenharmony_ci  UChar j = 0,  k = 0;
10512e5b6d6dSopenharmony_ci  int32_t count;
10522e5b6d6dSopenharmony_ci  Line myCh, combo, trial, inter, kLine;
10532e5b6d6dSopenharmony_ci  for(i = 0; i < intLen; i++) {
10542e5b6d6dSopenharmony_ci    inter.setTo(interesting[i]);
10552e5b6d6dSopenharmony_ci    logger->log(inter.toString(true), true);
10562e5b6d6dSopenharmony_ci    logger->log("----------------------\n");
10572e5b6d6dSopenharmony_ci    for(j = 0; j < 0xFFFF; j++) {
10582e5b6d6dSopenharmony_ci      myCh.setTo(j);
10592e5b6d6dSopenharmony_ci      if(probe.distanceFromEmptyString(myCh) == UCOL_IDENTICAL) {
10602e5b6d6dSopenharmony_ci        continue;
10612e5b6d6dSopenharmony_ci      }
10622e5b6d6dSopenharmony_ci      logger->log(myCh.toString(true));
10632e5b6d6dSopenharmony_ci      combo.setTo(j);
10642e5b6d6dSopenharmony_ci      combo.append(interesting[i]);
10652e5b6d6dSopenharmony_ci      count = 0;
10662e5b6d6dSopenharmony_ci      for(k = 0; k < 0xFFFF; k++) {
10672e5b6d6dSopenharmony_ci        kLine.setTo(k);
10682e5b6d6dSopenharmony_ci        trial.setTo(j);
10692e5b6d6dSopenharmony_ci        trial.append(k);
10702e5b6d6dSopenharmony_ci        if(probe.compare(kLine, inter) < 0) {
10712e5b6d6dSopenharmony_ci          if(probe.compare(trial, combo) >= 0) {
10722e5b6d6dSopenharmony_ci            count++;
10732e5b6d6dSopenharmony_ci          }
10742e5b6d6dSopenharmony_ci        }
10752e5b6d6dSopenharmony_ci      }
10762e5b6d6dSopenharmony_ci      logger->log("%i %i\n", count, count);
10772e5b6d6dSopenharmony_ci    }
10782e5b6d6dSopenharmony_ci  }
10792e5b6d6dSopenharmony_ci}
10802e5b6d6dSopenharmony_ci
1081