1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*
6*   Copyright (C) 1999-2015, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9******************************************************************************
10*
11*
12*  ucnv_io.cpp:
13*  initializes global variables and defines functions pertaining to converter
14*  name resolution aspect of the conversion code.
15*
16*   new implementation:
17*
18*   created on: 1999nov22
19*   created by: Markus W. Scherer
20*
21*   Use the binary cnvalias.icu (created from convrtrs.txt) to work
22*   with aliases for converter names.
23*
24*   Date        Name        Description
25*   11/22/1999  markus      Created
26*   06/28/2002  grhoten     Major overhaul of the converter alias design.
27*                           Now an alias can map to different converters
28*                           depending on the specified standard.
29*******************************************************************************
30*/
31
32#include "unicode/utypes.h"
33
34#if !UCONFIG_NO_CONVERSION
35
36#include "unicode/ucnv.h"
37#include "unicode/udata.h"
38
39#include "umutex.h"
40#include "uarrsort.h"
41#include "uassert.h"
42#include "udataswp.h"
43#include "cstring.h"
44#include "cmemory.h"
45#include "ucnv_io.h"
46#include "uenumimp.h"
47#include "ucln_cmn.h"
48
49/* Format of cnvalias.icu -----------------------------------------------------
50 *
51 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
52 * This binary form contains several tables. All indexes are to uint16_t
53 * units, and not to the bytes (uint8_t units). Addressing everything on
54 * 16-bit boundaries allows us to store more information with small index
55 * numbers, which are also 16-bit in size. The majority of the table (except
56 * the string table) are 16-bit numbers.
57 *
58 * First there is the size of the Table of Contents (TOC). The TOC
59 * entries contain the size of each section. In order to find the offset
60 * you just need to sum up the previous offsets.
61 * The TOC length and entries are an array of uint32_t values.
62 * The first section after the TOC starts immediately after the TOC.
63 *
64 * 1) This section contains a list of converters. This list contains indexes
65 * into the string table for the converter name. The index of this list is
66 * also used by other sections, which are mentioned later on.
67 * This list is not sorted.
68 *
69 * 2) This section contains a list of tags. This list contains indexes
70 * into the string table for the tag name. The index of this list is
71 * also used by other sections, which are mentioned later on.
72 * This list is in priority order of standards.
73 *
74 * 3) This section contains a list of sorted unique aliases. This
75 * list contains indexes into the string table for the alias name. The
76 * index of this list is also used by other sections, like the 4th section.
77 * The index for the 3rd and 4th section is used to get the
78 * alias -> converter name mapping. Section 3 and 4 form a two column table.
79 * Some of the most significant bits of each index may contain other
80 * information (see findConverter for details).
81 *
82 * 4) This section contains a list of mapped converter names. Consider this
83 * as a table that maps the 3rd section to the 1st section. This list contains
84 * indexes into the 1st section. The index of this list is the same index in
85 * the 3rd section. There is also some extra information in the high bits of
86 * each converter index in this table. Currently it's only used to say that
87 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
88 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
89 * the predigested form of the 5th section so that an alias lookup can be fast.
90 *
91 * 5) This section contains a 2D array with indexes to the 6th section. This
92 * section is the full form of all alias mappings. The column index is the
93 * index into the converter list (column header). The row index is the index
94 * to tag list (row header). This 2D array is the top part a 3D array. The
95 * third dimension is in the 6th section.
96 *
97 * 6) This is blob of variable length arrays. Each array starts with a size,
98 * and is followed by indexes to alias names in the string table. This is
99 * the third dimension to the section 5. No other section should be referencing
100 * this section.
101 *
102 * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
103 * presence indicates that a section 9 exists. UConverterAliasOptions specifies
104 * what type of string normalization is used among other potential things in the
105 * future.
106 *
107 * 8) This is the string table. All strings are indexed on an even address.
108 * There are two reasons for this. First many chip architectures locate strings
109 * faster on even address boundaries. Second, since all indexes are 16-bit
110 * numbers, this string table can be 128KB in size instead of 64KB when we
111 * only have strings starting on an even address.
112 *
113 * 9) When present this is a set of prenormalized strings from section 8. This
114 * table contains normalized strings with the dashes and spaces stripped out,
115 * and all strings lowercased. In the future, the options in section 7 may state
116 * other types of normalization.
117 *
118 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
119 * has a unique alias among all converters. That same alias can
120 * be mentioned in other standards on different converters,
121 * but only one alias per tag can be unique.
122 *
123 *
124 *              Converter Names (Usually in TR22 form)
125 *           -------------------------------------------.
126 *     T    /                                          /|
127 *     a   /                                          / |
128 *     g  /                                          /  |
129 *     s /                                          /   |
130 *      /                                          /    |
131 *      ------------------------------------------/     |
132 *    A |                                         |     |
133 *    l |                                         |     |
134 *    i |                                         |    /
135 *    a |                                         |   /
136 *    s |                                         |  /
137 *    e |                                         | /
138 *    s |                                         |/
139 *      -------------------------------------------
140 *
141 *
142 *
143 * Here is what it really looks like. It's like swiss cheese.
144 * There are holes. Some converters aren't recognized by
145 * a standard, or they are really old converters that the
146 * standard doesn't recognize anymore.
147 *
148 *              Converter Names (Usually in TR22 form)
149 *           -------------------------------------------.
150 *     T    /##########################################/|
151 *     a   /     #            #                       /#
152 *     g  /  #      ##     ##     ### # ### ### ### #/
153 *     s / #             #####  ####        ##  ## #/#
154 *      / ### # # ##  #  #   #          ### # #   #/##
155 *      ------------------------------------------/# #
156 *    A |### # # ##  #  #   #          ### # #   #|# #
157 *    l |# # #    #     #               ## #     #|# #
158 *    i |# # #    #     #                #       #|#
159 *    a |#                                       #|#
160 *    s |                                        #|#
161 *    e
162 *    s
163 *
164 */
165
166/**
167 * Used by the UEnumeration API
168 */
169typedef struct UAliasContext {
170    uint32_t listOffset;
171    uint32_t listIdx;
172} UAliasContext;
173
174static const char DATA_NAME[] = "cnvalias";
175static const char DATA_TYPE[] = "icu";
176
177static UDataMemory *gAliasData=nullptr;
178static icu::UInitOnce gAliasDataInitOnce {};
179
180enum {
181    tocLengthIndex=0,
182    converterListIndex=1,
183    tagListIndex=2,
184    aliasListIndex=3,
185    untaggedConvArrayIndex=4,
186    taggedAliasArrayIndex=5,
187    taggedAliasListsIndex=6,
188    tableOptionsIndex=7,
189    stringTableIndex=8,
190    normalizedStringTableIndex=9,
191    offsetsCount,    /* length of the swapper's temporary offsets[] */
192    minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
193};
194
195static const UConverterAliasOptions defaultTableOptions = {
196    UCNV_IO_UNNORMALIZED,
197    0 /* containsCnvOptionInfo */
198};
199static UConverterAlias gMainTable;
200
201#define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
202#define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
203
204static UBool U_CALLCONV
205isAcceptable(void * /*context*/,
206             const char * /*type*/, const char * /*name*/,
207             const UDataInfo *pInfo) {
208    return (UBool)(
209        pInfo->size>=20 &&
210        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
211        pInfo->charsetFamily==U_CHARSET_FAMILY &&
212        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
213        pInfo->dataFormat[1]==0x76 &&
214        pInfo->dataFormat[2]==0x41 &&
215        pInfo->dataFormat[3]==0x6c &&
216        pInfo->formatVersion[0]==3);
217}
218
219static UBool U_CALLCONV ucnv_io_cleanup()
220{
221    if (gAliasData) {
222        udata_close(gAliasData);
223        gAliasData = nullptr;
224    }
225    gAliasDataInitOnce.reset();
226
227    uprv_memset(&gMainTable, 0, sizeof(gMainTable));
228
229    return true;                   /* Everything was cleaned up */
230}
231
232static void U_CALLCONV initAliasData(UErrorCode &errCode) {
233    UDataMemory *data;
234    const uint16_t *table;
235    const uint32_t *sectionSizes;
236    uint32_t tableStart;
237    uint32_t currOffset;
238
239    ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
240
241    U_ASSERT(gAliasData == nullptr);
242    data = udata_openChoice(nullptr, DATA_TYPE, DATA_NAME, isAcceptable, nullptr, &errCode);
243    if(U_FAILURE(errCode)) {
244        return;
245    }
246
247    sectionSizes = (const uint32_t *)udata_getMemory(data);
248    table = (const uint16_t *)sectionSizes;
249
250    tableStart      = sectionSizes[0];
251    if (tableStart < minTocLength) {
252        errCode = U_INVALID_FORMAT_ERROR;
253        udata_close(data);
254        return;
255    }
256    gAliasData = data;
257
258    gMainTable.converterListSize      = sectionSizes[1];
259    gMainTable.tagListSize            = sectionSizes[2];
260    gMainTable.aliasListSize          = sectionSizes[3];
261    gMainTable.untaggedConvArraySize  = sectionSizes[4];
262    gMainTable.taggedAliasArraySize   = sectionSizes[5];
263    gMainTable.taggedAliasListsSize   = sectionSizes[6];
264    gMainTable.optionTableSize        = sectionSizes[7];
265    gMainTable.stringTableSize        = sectionSizes[8];
266
267    if (tableStart > 8) {
268        gMainTable.normalizedStringTableSize = sectionSizes[9];
269    }
270
271    currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
272    gMainTable.converterList = table + currOffset;
273
274    currOffset += gMainTable.converterListSize;
275    gMainTable.tagList = table + currOffset;
276
277    currOffset += gMainTable.tagListSize;
278    gMainTable.aliasList = table + currOffset;
279
280    currOffset += gMainTable.aliasListSize;
281    gMainTable.untaggedConvArray = table + currOffset;
282
283    currOffset += gMainTable.untaggedConvArraySize;
284    gMainTable.taggedAliasArray = table + currOffset;
285
286    /* aliasLists is a 1's based array, but it has a padding character */
287    currOffset += gMainTable.taggedAliasArraySize;
288    gMainTable.taggedAliasLists = table + currOffset;
289
290    currOffset += gMainTable.taggedAliasListsSize;
291    if (gMainTable.optionTableSize > 0
292        && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
293    {
294        /* Faster table */
295        gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
296    }
297    else {
298        /* Smaller table, or I can't handle this normalization mode!
299        Use the original slower table lookup. */
300        gMainTable.optionTable = &defaultTableOptions;
301    }
302
303    currOffset += gMainTable.optionTableSize;
304    gMainTable.stringTable = table + currOffset;
305
306    currOffset += gMainTable.stringTableSize;
307    gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
308        ? gMainTable.stringTable : (table + currOffset));
309}
310
311
312static UBool
313haveAliasData(UErrorCode *pErrorCode) {
314    umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
315    return U_SUCCESS(*pErrorCode);
316}
317
318static inline UBool
319isAlias(const char *alias, UErrorCode *pErrorCode) {
320    if(alias==nullptr) {
321        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
322        return false;
323    }
324    return (UBool)(*alias!=0);
325}
326
327static uint32_t getTagNumber(const char *tagname) {
328    if (gMainTable.tagList) {
329        uint32_t tagNum;
330        for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
331            if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
332                return tagNum;
333            }
334        }
335    }
336
337    return UINT32_MAX;
338}
339
340/* character types relevant for ucnv_compareNames() */
341enum {
342    UIGNORE,
343    ZERO,
344    NONZERO,
345    MINLETTER /* any values from here on are lowercase letter mappings */
346};
347
348/* character types for ASCII 00..7F */
349static const uint8_t asciiTypes[128] = {
350    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
351    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
352    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
353    ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
354    0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
355    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
356    0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
357    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
358};
359
360#define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
361
362/* character types for EBCDIC 80..FF */
363static const uint8_t ebcdicTypes[128] = {
364    0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
365    0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
366    0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
367    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
368    0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
369    0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
370    0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
371    ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
372};
373
374#define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
375
376#if U_CHARSET_FAMILY==U_ASCII_FAMILY
377#   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
378#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
379#   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
380#else
381#   error U_CHARSET_FAMILY is not valid
382#endif
383
384
385/* @see ucnv_compareNames */
386U_CAPI char * U_CALLCONV
387ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
388    char *dstItr = dst;
389    uint8_t type, nextType;
390    char c1;
391    UBool afterDigit = false;
392
393    while ((c1 = *name++) != 0) {
394        type = GET_ASCII_TYPE(c1);
395        switch (type) {
396        case UIGNORE:
397            afterDigit = false;
398            continue; /* ignore all but letters and digits */
399        case ZERO:
400            if (!afterDigit) {
401                nextType = GET_ASCII_TYPE(*name);
402                if (nextType == ZERO || nextType == NONZERO) {
403                    continue; /* ignore leading zero before another digit */
404                }
405            }
406            break;
407        case NONZERO:
408            afterDigit = true;
409            break;
410        default:
411            c1 = (char)type; /* lowercased letter */
412            afterDigit = false;
413            break;
414        }
415        *dstItr++ = c1;
416    }
417    *dstItr = 0;
418    return dst;
419}
420
421U_CAPI char * U_CALLCONV
422ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
423    char *dstItr = dst;
424    uint8_t type, nextType;
425    char c1;
426    UBool afterDigit = false;
427
428    while ((c1 = *name++) != 0) {
429        type = GET_EBCDIC_TYPE(c1);
430        switch (type) {
431        case UIGNORE:
432            afterDigit = false;
433            continue; /* ignore all but letters and digits */
434        case ZERO:
435            if (!afterDigit) {
436                nextType = GET_EBCDIC_TYPE(*name);
437                if (nextType == ZERO || nextType == NONZERO) {
438                    continue; /* ignore leading zero before another digit */
439                }
440            }
441            break;
442        case NONZERO:
443            afterDigit = true;
444            break;
445        default:
446            c1 = (char)type; /* lowercased letter */
447            afterDigit = false;
448            break;
449        }
450        *dstItr++ = c1;
451    }
452    *dstItr = 0;
453    return dst;
454}
455
456/**
457 * Do a fuzzy compare of two converter/alias names.
458 * The comparison is case-insensitive, ignores leading zeroes if they are not
459 * followed by further digits, and ignores all but letters and digits.
460 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
461 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
462 * at http://www.unicode.org/reports/tr22/
463 *
464 * This is a symmetrical (commutative) operation; order of arguments
465 * is insignificant.  This is an important property for sorting the
466 * list (when the list is preprocessed into binary form) and for
467 * performing binary searches on it at run time.
468 *
469 * @param name1 a converter name or alias, zero-terminated
470 * @param name2 a converter name or alias, zero-terminated
471 * @return 0 if the names match, or a negative value if the name1
472 * lexically precedes name2, or a positive value if the name1
473 * lexically follows name2.
474 *
475 * @see ucnv_io_stripForCompare
476 */
477U_CAPI int U_EXPORT2
478ucnv_compareNames(const char *name1, const char *name2) {
479    int rc;
480    uint8_t type, nextType;
481    char c1, c2;
482    UBool afterDigit1 = false, afterDigit2 = false;
483
484    for (;;) {
485        while ((c1 = *name1++) != 0) {
486            type = GET_CHAR_TYPE(c1);
487            switch (type) {
488            case UIGNORE:
489                afterDigit1 = false;
490                continue; /* ignore all but letters and digits */
491            case ZERO:
492                if (!afterDigit1) {
493                    nextType = GET_CHAR_TYPE(*name1);
494                    if (nextType == ZERO || nextType == NONZERO) {
495                        continue; /* ignore leading zero before another digit */
496                    }
497                }
498                break;
499            case NONZERO:
500                afterDigit1 = true;
501                break;
502            default:
503                c1 = (char)type; /* lowercased letter */
504                afterDigit1 = false;
505                break;
506            }
507            break; /* deliver c1 */
508        }
509        while ((c2 = *name2++) != 0) {
510            type = GET_CHAR_TYPE(c2);
511            switch (type) {
512            case UIGNORE:
513                afterDigit2 = false;
514                continue; /* ignore all but letters and digits */
515            case ZERO:
516                if (!afterDigit2) {
517                    nextType = GET_CHAR_TYPE(*name2);
518                    if (nextType == ZERO || nextType == NONZERO) {
519                        continue; /* ignore leading zero before another digit */
520                    }
521                }
522                break;
523            case NONZERO:
524                afterDigit2 = true;
525                break;
526            default:
527                c2 = (char)type; /* lowercased letter */
528                afterDigit2 = false;
529                break;
530            }
531            break; /* deliver c2 */
532        }
533
534        /* If we reach the ends of both strings then they match */
535        if ((c1|c2)==0) {
536            return 0;
537        }
538
539        /* Case-insensitive comparison */
540        rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
541        if (rc != 0) {
542            return rc;
543        }
544    }
545}
546
547/*
548 * search for an alias
549 * return the converter number index for gConverterList
550 */
551static inline uint32_t
552findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
553    uint32_t mid, start, limit;
554    uint32_t lastMid;
555    int result;
556    int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
557    char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
558
559    if (!isUnnormalized) {
560        if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
561            *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
562            return UINT32_MAX;
563        }
564
565        /* Lower case and remove ignoreable characters. */
566        ucnv_io_stripForCompare(strippedName, alias);
567        alias = strippedName;
568    }
569
570    /* do a binary search for the alias */
571    start = 0;
572    limit = gMainTable.untaggedConvArraySize;
573    mid = limit;
574    lastMid = UINT32_MAX;
575
576    for (;;) {
577        mid = (uint32_t)((start + limit) / 2);
578        if (lastMid == mid) {   /* Have we moved? */
579            break;  /* We haven't moved, and it wasn't found. */
580        }
581        lastMid = mid;
582        if (isUnnormalized) {
583            result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
584        }
585        else {
586            result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
587        }
588
589        if (result < 0) {
590            limit = mid;
591        } else if (result > 0) {
592            start = mid;
593        } else {
594            /* Since the gencnval tool folds duplicates into one entry,
595             * this alias in gAliasList is unique, but different standards
596             * may map an alias to different converters.
597             */
598            if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
599                *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
600            }
601            /* State whether the canonical converter name contains an option.
602            This information is contained in this list in order to maintain backward & forward compatibility. */
603            if (containsOption) {
604                UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
605                *containsOption = (UBool)((containsCnvOptionInfo
606                    && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
607                    || !containsCnvOptionInfo);
608            }
609            return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
610        }
611    }
612
613    return UINT32_MAX;
614}
615
616/*
617 * Is this alias in this list?
618 * alias and listOffset should be non-nullptr.
619 */
620static inline UBool
621isAliasInList(const char *alias, uint32_t listOffset) {
622    if (listOffset) {
623        uint32_t currAlias;
624        uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
625        /* +1 to skip listCount */
626        const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
627        for (currAlias = 0; currAlias < listCount; currAlias++) {
628            if (currList[currAlias]
629                && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
630            {
631                return true;
632            }
633        }
634    }
635    return false;
636}
637
638/*
639 * Search for an standard name of an alias (what is the default name
640 * that this standard uses?)
641 * return the listOffset for gTaggedAliasLists. If it's 0,
642 * the it couldn't be found, but the parameters are valid.
643 */
644static uint32_t
645findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
646    uint32_t idx;
647    uint32_t listOffset;
648    uint32_t convNum;
649    UErrorCode myErr = U_ZERO_ERROR;
650    uint32_t tagNum = getTagNumber(standard);
651
652    /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
653    convNum = findConverter(alias, nullptr, &myErr);
654    if (myErr != U_ZERO_ERROR) {
655        *pErrorCode = myErr;
656    }
657
658    if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
659        listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
660        if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
661            return listOffset;
662        }
663        if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
664            /* Uh Oh! They used an ambiguous alias.
665               We have to search the whole swiss cheese starting
666               at the highest standard affinity.
667               This may take a while.
668            */
669            for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
670                listOffset = gMainTable.taggedAliasArray[idx];
671                if (listOffset && isAliasInList(alias, listOffset)) {
672                    uint32_t currTagNum = idx/gMainTable.converterListSize;
673                    uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
674                    uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
675                    if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
676                        return tempListOffset;
677                    }
678                    /* else keep on looking */
679                    /* We could speed this up by starting on the next row
680                       because an alias is unique per row, right now.
681                       This would change if alias versioning appears. */
682                }
683            }
684            /* The standard doesn't know about the alias */
685        }
686        /* else no default name */
687        return 0;
688    }
689    /* else converter or tag not found */
690
691    return UINT32_MAX;
692}
693
694/* Return the canonical name */
695static uint32_t
696findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
697    uint32_t idx;
698    uint32_t listOffset;
699    uint32_t convNum;
700    UErrorCode myErr = U_ZERO_ERROR;
701    uint32_t tagNum = getTagNumber(standard);
702
703    /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
704    convNum = findConverter(alias, nullptr, &myErr);
705    if (myErr != U_ZERO_ERROR) {
706        *pErrorCode = myErr;
707    }
708
709    if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
710        listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
711        if (listOffset && isAliasInList(alias, listOffset)) {
712            return convNum;
713        }
714        if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
715            /* Uh Oh! They used an ambiguous alias.
716               We have to search one slice of the swiss cheese.
717               We search only in the requested tag, not the whole thing.
718               This may take a while.
719            */
720            uint32_t convStart = (tagNum)*gMainTable.converterListSize;
721            uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
722            for (idx = convStart; idx < convLimit; idx++) {
723                listOffset = gMainTable.taggedAliasArray[idx];
724                if (listOffset && isAliasInList(alias, listOffset)) {
725                    return idx-convStart;
726                }
727            }
728            /* The standard doesn't know about the alias */
729        }
730        /* else no canonical name */
731    }
732    /* else converter or tag not found */
733
734    return UINT32_MAX;
735}
736
737U_CAPI const char *
738ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
739    const char *aliasTmp = alias;
740    int32_t i = 0;
741    for (i = 0; i < 2; i++) {
742        if (i == 1) {
743            /*
744             * After the first unsuccess converter lookup, check to see if
745             * the name begins with 'x-'. If it does, strip it off and try
746             * again.  This behaviour is similar to how ICU4J does it.
747             */
748            if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') {
749                aliasTmp = aliasTmp+2;
750            } else {
751                break;
752            }
753        }
754        if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
755            uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
756            if (convNum < gMainTable.converterListSize) {
757                return GET_STRING(gMainTable.converterList[convNum]);
758            }
759            /* else converter not found */
760        } else {
761            break;
762        }
763    }
764
765    return nullptr;
766}
767
768U_CDECL_BEGIN
769
770
771static int32_t U_CALLCONV
772ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
773    int32_t value = 0;
774    UAliasContext *myContext = (UAliasContext *)(enumerator->context);
775    uint32_t listOffset = myContext->listOffset;
776
777    if (listOffset) {
778        value = gMainTable.taggedAliasLists[listOffset];
779    }
780    return value;
781}
782
783static const char * U_CALLCONV
784ucnv_io_nextStandardAliases(UEnumeration *enumerator,
785                            int32_t* resultLength,
786                            UErrorCode * /*pErrorCode*/)
787{
788    UAliasContext *myContext = (UAliasContext *)(enumerator->context);
789    uint32_t listOffset = myContext->listOffset;
790
791    if (listOffset) {
792        uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
793        const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
794
795        if (myContext->listIdx < listCount) {
796            const char *myStr = GET_STRING(currList[myContext->listIdx++]);
797            if (resultLength) {
798                *resultLength = (int32_t)uprv_strlen(myStr);
799            }
800            return myStr;
801        }
802    }
803    /* Either we accessed a zero length list, or we enumerated too far. */
804    if (resultLength) {
805        *resultLength = 0;
806    }
807    return nullptr;
808}
809
810static void U_CALLCONV
811ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
812    ((UAliasContext *)(enumerator->context))->listIdx = 0;
813}
814
815static void U_CALLCONV
816ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
817    uprv_free(enumerator->context);
818    uprv_free(enumerator);
819}
820
821U_CDECL_END
822
823/* Enumerate the aliases for the specified converter and standard tag */
824static const UEnumeration gEnumAliases = {
825    nullptr,
826    nullptr,
827    ucnv_io_closeUEnumeration,
828    ucnv_io_countStandardAliases,
829    uenum_unextDefault,
830    ucnv_io_nextStandardAliases,
831    ucnv_io_resetStandardAliases
832};
833
834U_CAPI UEnumeration * U_EXPORT2
835ucnv_openStandardNames(const char *convName,
836                       const char *standard,
837                       UErrorCode *pErrorCode)
838{
839    UEnumeration *myEnum = nullptr;
840    if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
841        uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
842
843        /* When listOffset == 0, we want to acknowledge that the
844           converter name and standard are okay, but there
845           is nothing to enumerate. */
846        if (listOffset < gMainTable.taggedAliasListsSize) {
847            UAliasContext *myContext;
848
849            myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
850            if (myEnum == nullptr) {
851                *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
852                return nullptr;
853            }
854            uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
855            myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
856            if (myContext == nullptr) {
857                *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
858                uprv_free(myEnum);
859                return nullptr;
860            }
861            myContext->listOffset = listOffset;
862            myContext->listIdx = 0;
863            myEnum->context = myContext;
864        }
865        /* else converter or tag not found */
866    }
867    return myEnum;
868}
869
870static uint16_t
871ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
872    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
873        uint32_t convNum = findConverter(alias, nullptr, pErrorCode);
874        if (convNum < gMainTable.converterListSize) {
875            /* tagListNum - 1 is the ALL tag */
876            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
877
878            if (listOffset) {
879                return gMainTable.taggedAliasLists[listOffset];
880            }
881            /* else this shouldn't happen. internal program error */
882        }
883        /* else converter not found */
884    }
885    return 0;
886}
887
888static uint16_t
889ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
890    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
891        uint32_t currAlias;
892        uint32_t convNum = findConverter(alias, nullptr, pErrorCode);
893        if (convNum < gMainTable.converterListSize) {
894            /* tagListNum - 1 is the ALL tag */
895            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
896
897            if (listOffset) {
898                uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
899                /* +1 to skip listCount */
900                const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
901
902                for (currAlias = start; currAlias < listCount; currAlias++) {
903                    aliases[currAlias] = GET_STRING(currList[currAlias]);
904                }
905            }
906            /* else this shouldn't happen. internal program error */
907        }
908        /* else converter not found */
909    }
910    return 0;
911}
912
913static const char *
914ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
915    if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
916        uint32_t convNum = findConverter(alias, nullptr, pErrorCode);
917        if (convNum < gMainTable.converterListSize) {
918            /* tagListNum - 1 is the ALL tag */
919            int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
920
921            if (listOffset) {
922                uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
923                /* +1 to skip listCount */
924                const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
925
926                if (n < listCount)  {
927                    return GET_STRING(currList[n]);
928                }
929                *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
930            }
931            /* else this shouldn't happen. internal program error */
932        }
933        /* else converter not found */
934    }
935    return nullptr;
936}
937
938static uint16_t
939ucnv_io_countStandards(UErrorCode *pErrorCode) {
940    if (haveAliasData(pErrorCode)) {
941        /* Don't include the empty list */
942        return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
943    }
944
945    return 0;
946}
947
948U_CAPI const char * U_EXPORT2
949ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
950    if (haveAliasData(pErrorCode)) {
951        if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
952            return GET_STRING(gMainTable.tagList[n]);
953        }
954        *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
955    }
956
957    return nullptr;
958}
959
960U_CAPI const char * U_EXPORT2
961ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
962    if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
963        uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
964
965        if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
966            const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
967
968            /* Get the preferred name from this list */
969            if (currList[0]) {
970                return GET_STRING(currList[0]);
971            }
972            /* else someone screwed up the alias table. */
973            /* *pErrorCode = U_INVALID_FORMAT_ERROR */
974        }
975    }
976
977    return nullptr;
978}
979
980U_CAPI uint16_t U_EXPORT2
981ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
982{
983    return ucnv_io_countAliases(alias, pErrorCode);
984}
985
986
987U_CAPI const char* U_EXPORT2
988ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
989{
990    return ucnv_io_getAlias(alias, n, pErrorCode);
991}
992
993U_CAPI void U_EXPORT2
994ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
995{
996    ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
997}
998
999U_CAPI uint16_t U_EXPORT2
1000ucnv_countStandards()
1001{
1002    UErrorCode err = U_ZERO_ERROR;
1003    return ucnv_io_countStandards(&err);
1004}
1005
1006U_CAPI const char * U_EXPORT2
1007ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
1008    if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
1009        uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
1010
1011        if (convNum < gMainTable.converterListSize) {
1012            return GET_STRING(gMainTable.converterList[convNum]);
1013        }
1014    }
1015
1016    return nullptr;
1017}
1018
1019U_CDECL_BEGIN
1020
1021
1022static int32_t U_CALLCONV
1023ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
1024    return gMainTable.converterListSize;
1025}
1026
1027static const char * U_CALLCONV
1028ucnv_io_nextAllConverters(UEnumeration *enumerator,
1029                            int32_t* resultLength,
1030                            UErrorCode * /*pErrorCode*/)
1031{
1032    uint16_t *myContext = (uint16_t *)(enumerator->context);
1033
1034    if (*myContext < gMainTable.converterListSize) {
1035        const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
1036        if (resultLength) {
1037            *resultLength = (int32_t)uprv_strlen(myStr);
1038        }
1039        return myStr;
1040    }
1041    /* Either we accessed a zero length list, or we enumerated too far. */
1042    if (resultLength) {
1043        *resultLength = 0;
1044    }
1045    return nullptr;
1046}
1047
1048static void U_CALLCONV
1049ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
1050    *((uint16_t *)(enumerator->context)) = 0;
1051}
1052U_CDECL_END
1053static const UEnumeration gEnumAllConverters = {
1054    nullptr,
1055    nullptr,
1056    ucnv_io_closeUEnumeration,
1057    ucnv_io_countAllConverters,
1058    uenum_unextDefault,
1059    ucnv_io_nextAllConverters,
1060    ucnv_io_resetAllConverters
1061};
1062
1063U_CAPI UEnumeration * U_EXPORT2
1064ucnv_openAllNames(UErrorCode *pErrorCode) {
1065    UEnumeration *myEnum = nullptr;
1066    if (haveAliasData(pErrorCode)) {
1067        uint16_t *myContext;
1068
1069        myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
1070        if (myEnum == nullptr) {
1071            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1072            return nullptr;
1073        }
1074        uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
1075        myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
1076        if (myContext == nullptr) {
1077            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1078            uprv_free(myEnum);
1079            return nullptr;
1080        }
1081        *myContext = 0;
1082        myEnum->context = myContext;
1083    }
1084    return myEnum;
1085}
1086
1087U_CAPI uint16_t
1088ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
1089    if (haveAliasData(pErrorCode)) {
1090        return (uint16_t)gMainTable.converterListSize;
1091    }
1092    return 0;
1093}
1094
1095/* alias table swapping ----------------------------------------------------- */
1096
1097U_CDECL_BEGIN
1098
1099typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
1100U_CDECL_END
1101
1102
1103/*
1104 * row of a temporary array
1105 *
1106 * gets platform-endian charset string indexes and sorting indexes;
1107 * after sorting this array by strings, the actual arrays are permutated
1108 * according to the sorting indexes
1109 */
1110typedef struct TempRow {
1111    uint16_t strIndex, sortIndex;
1112} TempRow;
1113
1114typedef struct TempAliasTable {
1115    const char *chars;
1116    TempRow *rows;
1117    uint16_t *resort;
1118    StripForCompareFn *stripForCompare;
1119} TempAliasTable;
1120
1121enum {
1122    STACK_ROW_CAPACITY=500
1123};
1124
1125static int32_t U_CALLCONV
1126io_compareRows(const void *context, const void *left, const void *right) {
1127    char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
1128         strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
1129
1130    TempAliasTable *tempTable=(TempAliasTable *)context;
1131    const char *chars=tempTable->chars;
1132
1133    return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
1134                                tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
1135}
1136
1137U_CAPI int32_t U_EXPORT2
1138ucnv_swapAliases(const UDataSwapper *ds,
1139                 const void *inData, int32_t length, void *outData,
1140                 UErrorCode *pErrorCode) {
1141    const UDataInfo *pInfo;
1142    int32_t headerSize;
1143
1144    const uint16_t *inTable;
1145    const uint32_t *inSectionSizes;
1146    uint32_t toc[offsetsCount];
1147    uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
1148    uint32_t i, count, tocLength, topOffset;
1149
1150    TempRow rows[STACK_ROW_CAPACITY];
1151    uint16_t resort[STACK_ROW_CAPACITY];
1152    TempAliasTable tempTable;
1153
1154    /* udata_swapDataHeader checks the arguments */
1155    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1156    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
1157        return 0;
1158    }
1159
1160    /* check data format and format version */
1161    pInfo=(const UDataInfo *)((const char *)inData+4);
1162    if(!(
1163        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
1164        pInfo->dataFormat[1]==0x76 &&
1165        pInfo->dataFormat[2]==0x41 &&
1166        pInfo->dataFormat[3]==0x6c &&
1167        pInfo->formatVersion[0]==3
1168    )) {
1169        udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1170                         pInfo->dataFormat[0], pInfo->dataFormat[1],
1171                         pInfo->dataFormat[2], pInfo->dataFormat[3],
1172                         pInfo->formatVersion[0]);
1173        *pErrorCode=U_UNSUPPORTED_ERROR;
1174        return 0;
1175    }
1176
1177    /* an alias table must contain at least the table of contents array */
1178    if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
1179        udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1180                         length-headerSize);
1181        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1182        return 0;
1183    }
1184
1185    inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
1186    inTable=(const uint16_t *)inSectionSizes;
1187    uprv_memset(toc, 0, sizeof(toc));
1188    toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
1189    if(tocLength<minTocLength || offsetsCount<=tocLength) {
1190        udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
1191        *pErrorCode=U_INVALID_FORMAT_ERROR;
1192        return 0;
1193    }
1194
1195    /* read the known part of the table of contents */
1196    for(i=converterListIndex; i<=tocLength; ++i) {
1197        toc[i]=ds->readUInt32(inSectionSizes[i]);
1198    }
1199
1200    /* compute offsets */
1201    uprv_memset(offsets, 0, sizeof(offsets));
1202    offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
1203    for(i=tagListIndex; i<=tocLength; ++i) {
1204        offsets[i]=offsets[i-1]+toc[i-1];
1205    }
1206
1207    /* compute the overall size of the after-header data, in numbers of 16-bit units */
1208    topOffset=offsets[i-1]+toc[i-1];
1209
1210    if(length>=0) {
1211        uint16_t *outTable;
1212        const uint16_t *p, *p2;
1213        uint16_t *q, *q2;
1214        uint16_t oldIndex;
1215
1216        if((length-headerSize)<(2*(int32_t)topOffset)) {
1217            udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1218                             length-headerSize);
1219            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1220            return 0;
1221        }
1222
1223        outTable=(uint16_t *)((char *)outData+headerSize);
1224
1225        /* swap the entire table of contents */
1226        ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
1227
1228        /* swap unormalized strings & normalized strings */
1229        ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
1230                             outTable+offsets[stringTableIndex], pErrorCode);
1231        if(U_FAILURE(*pErrorCode)) {
1232            udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
1233            return 0;
1234        }
1235
1236        if(ds->inCharset==ds->outCharset) {
1237            /* no need to sort, just swap all 16-bit values together */
1238            ds->swapArray16(ds,
1239                            inTable+offsets[converterListIndex],
1240                            2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
1241                            outTable+offsets[converterListIndex],
1242                            pErrorCode);
1243        } else {
1244            /* allocate the temporary table for sorting */
1245            count=toc[aliasListIndex];
1246
1247            tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
1248
1249            if(count<=STACK_ROW_CAPACITY) {
1250                tempTable.rows=rows;
1251                tempTable.resort=resort;
1252            } else {
1253                tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
1254                if(tempTable.rows==nullptr) {
1255                    udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1256                                     count);
1257                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1258                    return 0;
1259                }
1260                tempTable.resort=(uint16_t *)(tempTable.rows+count);
1261            }
1262
1263            if(ds->outCharset==U_ASCII_FAMILY) {
1264                tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
1265            } else /* U_EBCDIC_FAMILY */ {
1266                tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
1267            }
1268
1269            /*
1270             * Sort unique aliases+mapped names.
1271             *
1272             * We need to sort the list again by outCharset strings because they
1273             * sort differently for different charset families.
1274             * First we set up a temporary table with the string indexes and
1275             * sorting indexes and sort that.
1276             * Then we permutate and copy/swap the actual values.
1277             */
1278            p=inTable+offsets[aliasListIndex];
1279            q=outTable+offsets[aliasListIndex];
1280
1281            p2=inTable+offsets[untaggedConvArrayIndex];
1282            q2=outTable+offsets[untaggedConvArrayIndex];
1283
1284            for(i=0; i<count; ++i) {
1285                tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
1286                tempTable.rows[i].sortIndex=(uint16_t)i;
1287            }
1288
1289            uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
1290                           io_compareRows, &tempTable,
1291                           false, pErrorCode);
1292
1293            if(U_SUCCESS(*pErrorCode)) {
1294                /* copy/swap/permutate items */
1295                if(p!=q) {
1296                    for(i=0; i<count; ++i) {
1297                        oldIndex=tempTable.rows[i].sortIndex;
1298                        ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
1299                        ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
1300                    }
1301                } else {
1302                    /*
1303                     * If we swap in-place, then the permutation must use another
1304                     * temporary array (tempTable.resort)
1305                     * before the results are copied to the outBundle.
1306                     */
1307                    uint16_t *r=tempTable.resort;
1308
1309                    for(i=0; i<count; ++i) {
1310                        oldIndex=tempTable.rows[i].sortIndex;
1311                        ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
1312                    }
1313                    uprv_memcpy(q, r, 2*(size_t)count);
1314
1315                    for(i=0; i<count; ++i) {
1316                        oldIndex=tempTable.rows[i].sortIndex;
1317                        ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
1318                    }
1319                    uprv_memcpy(q2, r, 2*(size_t)count);
1320                }
1321            }
1322
1323            if(tempTable.rows!=rows) {
1324                uprv_free(tempTable.rows);
1325            }
1326
1327            if(U_FAILURE(*pErrorCode)) {
1328                udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1329                                 count);
1330                return 0;
1331            }
1332
1333            /* swap remaining 16-bit values */
1334            ds->swapArray16(ds,
1335                            inTable+offsets[converterListIndex],
1336                            2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
1337                            outTable+offsets[converterListIndex],
1338                            pErrorCode);
1339            ds->swapArray16(ds,
1340                            inTable+offsets[taggedAliasArrayIndex],
1341                            2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
1342                            outTable+offsets[taggedAliasArrayIndex],
1343                            pErrorCode);
1344        }
1345    }
1346
1347    return headerSize+2*(int32_t)topOffset;
1348}
1349
1350#endif
1351
1352
1353/*
1354 * Hey, Emacs, please set the following:
1355 *
1356 * Local Variables:
1357 * indent-tabs-mode: nil
1358 * End:
1359 *
1360 */
1361