11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 31cb0ef41Sopenharmony_ci/* 41cb0ef41Sopenharmony_ci******************************************************************************* 51cb0ef41Sopenharmony_ci* 61cb0ef41Sopenharmony_ci* Copyright (C) 2008-2011, International Business Machines 71cb0ef41Sopenharmony_ci* Corporation, Google and others. All Rights Reserved. 81cb0ef41Sopenharmony_ci* 91cb0ef41Sopenharmony_ci******************************************************************************* 101cb0ef41Sopenharmony_ci*/ 111cb0ef41Sopenharmony_ci// Author : eldawy@google.com (Mohamed Eldawy) 121cb0ef41Sopenharmony_ci// ucnvsel.cpp 131cb0ef41Sopenharmony_ci// 141cb0ef41Sopenharmony_ci// Purpose: To generate a list of encodings capable of handling 151cb0ef41Sopenharmony_ci// a given Unicode text 161cb0ef41Sopenharmony_ci// 171cb0ef41Sopenharmony_ci// Started 09-April-2008 181cb0ef41Sopenharmony_ci 191cb0ef41Sopenharmony_ci/** 201cb0ef41Sopenharmony_ci * \file 211cb0ef41Sopenharmony_ci * 221cb0ef41Sopenharmony_ci * This is an implementation of an encoding selector. 231cb0ef41Sopenharmony_ci * The goal is, given a unicode string, find the encodings 241cb0ef41Sopenharmony_ci * this string can be mapped to. To make processing faster 251cb0ef41Sopenharmony_ci * a trie is built when you call ucnvsel_open() that 261cb0ef41Sopenharmony_ci * stores all encodings a codepoint can map to 271cb0ef41Sopenharmony_ci */ 281cb0ef41Sopenharmony_ci 291cb0ef41Sopenharmony_ci#include "unicode/ucnvsel.h" 301cb0ef41Sopenharmony_ci 311cb0ef41Sopenharmony_ci#if !UCONFIG_NO_CONVERSION 321cb0ef41Sopenharmony_ci 331cb0ef41Sopenharmony_ci#include <string.h> 341cb0ef41Sopenharmony_ci 351cb0ef41Sopenharmony_ci#include "unicode/uchar.h" 361cb0ef41Sopenharmony_ci#include "unicode/uniset.h" 371cb0ef41Sopenharmony_ci#include "unicode/ucnv.h" 381cb0ef41Sopenharmony_ci#include "unicode/ustring.h" 391cb0ef41Sopenharmony_ci#include "unicode/uchriter.h" 401cb0ef41Sopenharmony_ci#include "utrie2.h" 411cb0ef41Sopenharmony_ci#include "propsvec.h" 421cb0ef41Sopenharmony_ci#include "uassert.h" 431cb0ef41Sopenharmony_ci#include "ucmndata.h" 441cb0ef41Sopenharmony_ci#include "udataswp.h" 451cb0ef41Sopenharmony_ci#include "uenumimp.h" 461cb0ef41Sopenharmony_ci#include "cmemory.h" 471cb0ef41Sopenharmony_ci#include "cstring.h" 481cb0ef41Sopenharmony_ci 491cb0ef41Sopenharmony_ciU_NAMESPACE_USE 501cb0ef41Sopenharmony_ci 511cb0ef41Sopenharmony_cistruct UConverterSelector { 521cb0ef41Sopenharmony_ci UTrie2 *trie; // 16 bit trie containing offsets into pv 531cb0ef41Sopenharmony_ci uint32_t* pv; // table of bits! 541cb0ef41Sopenharmony_ci int32_t pvCount; 551cb0ef41Sopenharmony_ci char** encodings; // which encodings did user ask to use? 561cb0ef41Sopenharmony_ci int32_t encodingsCount; 571cb0ef41Sopenharmony_ci int32_t encodingStrLength; 581cb0ef41Sopenharmony_ci uint8_t* swapped; 591cb0ef41Sopenharmony_ci UBool ownPv, ownEncodingStrings; 601cb0ef41Sopenharmony_ci}; 611cb0ef41Sopenharmony_ci 621cb0ef41Sopenharmony_cistatic void generateSelectorData(UConverterSelector* result, 631cb0ef41Sopenharmony_ci UPropsVectors *upvec, 641cb0ef41Sopenharmony_ci const USet* excludedCodePoints, 651cb0ef41Sopenharmony_ci const UConverterUnicodeSet whichSet, 661cb0ef41Sopenharmony_ci UErrorCode* status) { 671cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 681cb0ef41Sopenharmony_ci return; 691cb0ef41Sopenharmony_ci } 701cb0ef41Sopenharmony_ci 711cb0ef41Sopenharmony_ci int32_t columns = (result->encodingsCount+31)/32; 721cb0ef41Sopenharmony_ci 731cb0ef41Sopenharmony_ci // set errorValue to all-ones 741cb0ef41Sopenharmony_ci for (int32_t col = 0; col < columns; col++) { 751cb0ef41Sopenharmony_ci upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP, 761cb0ef41Sopenharmony_ci col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), status); 771cb0ef41Sopenharmony_ci } 781cb0ef41Sopenharmony_ci 791cb0ef41Sopenharmony_ci for (int32_t i = 0; i < result->encodingsCount; ++i) { 801cb0ef41Sopenharmony_ci uint32_t mask; 811cb0ef41Sopenharmony_ci uint32_t column; 821cb0ef41Sopenharmony_ci int32_t item_count; 831cb0ef41Sopenharmony_ci int32_t j; 841cb0ef41Sopenharmony_ci UConverter* test_converter = ucnv_open(result->encodings[i], status); 851cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 861cb0ef41Sopenharmony_ci return; 871cb0ef41Sopenharmony_ci } 881cb0ef41Sopenharmony_ci USet* unicode_point_set; 891cb0ef41Sopenharmony_ci unicode_point_set = uset_open(1, 0); // empty set 901cb0ef41Sopenharmony_ci 911cb0ef41Sopenharmony_ci ucnv_getUnicodeSet(test_converter, unicode_point_set, 921cb0ef41Sopenharmony_ci whichSet, status); 931cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 941cb0ef41Sopenharmony_ci ucnv_close(test_converter); 951cb0ef41Sopenharmony_ci return; 961cb0ef41Sopenharmony_ci } 971cb0ef41Sopenharmony_ci 981cb0ef41Sopenharmony_ci column = i / 32; 991cb0ef41Sopenharmony_ci mask = 1 << (i%32); 1001cb0ef41Sopenharmony_ci // now iterate over intervals on set i! 1011cb0ef41Sopenharmony_ci item_count = uset_getItemCount(unicode_point_set); 1021cb0ef41Sopenharmony_ci 1031cb0ef41Sopenharmony_ci for (j = 0; j < item_count; ++j) { 1041cb0ef41Sopenharmony_ci UChar32 start_char; 1051cb0ef41Sopenharmony_ci UChar32 end_char; 1061cb0ef41Sopenharmony_ci UErrorCode smallStatus = U_ZERO_ERROR; 1071cb0ef41Sopenharmony_ci uset_getItem(unicode_point_set, j, &start_char, &end_char, nullptr, 0, 1081cb0ef41Sopenharmony_ci &smallStatus); 1091cb0ef41Sopenharmony_ci if (U_FAILURE(smallStatus)) { 1101cb0ef41Sopenharmony_ci // this will be reached for the converters that fill the set with 1111cb0ef41Sopenharmony_ci // strings. Those should be ignored by our system 1121cb0ef41Sopenharmony_ci } else { 1131cb0ef41Sopenharmony_ci upvec_setValue(upvec, start_char, end_char, column, static_cast<uint32_t>(~0), mask, 1141cb0ef41Sopenharmony_ci status); 1151cb0ef41Sopenharmony_ci } 1161cb0ef41Sopenharmony_ci } 1171cb0ef41Sopenharmony_ci ucnv_close(test_converter); 1181cb0ef41Sopenharmony_ci uset_close(unicode_point_set); 1191cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 1201cb0ef41Sopenharmony_ci return; 1211cb0ef41Sopenharmony_ci } 1221cb0ef41Sopenharmony_ci } 1231cb0ef41Sopenharmony_ci 1241cb0ef41Sopenharmony_ci // handle excluded encodings! Simply set their values to all 1's in the upvec 1251cb0ef41Sopenharmony_ci if (excludedCodePoints) { 1261cb0ef41Sopenharmony_ci int32_t item_count = uset_getItemCount(excludedCodePoints); 1271cb0ef41Sopenharmony_ci for (int32_t j = 0; j < item_count; ++j) { 1281cb0ef41Sopenharmony_ci UChar32 start_char; 1291cb0ef41Sopenharmony_ci UChar32 end_char; 1301cb0ef41Sopenharmony_ci 1311cb0ef41Sopenharmony_ci uset_getItem(excludedCodePoints, j, &start_char, &end_char, nullptr, 0, 1321cb0ef41Sopenharmony_ci status); 1331cb0ef41Sopenharmony_ci for (int32_t col = 0; col < columns; col++) { 1341cb0ef41Sopenharmony_ci upvec_setValue(upvec, start_char, end_char, col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), 1351cb0ef41Sopenharmony_ci status); 1361cb0ef41Sopenharmony_ci } 1371cb0ef41Sopenharmony_ci } 1381cb0ef41Sopenharmony_ci } 1391cb0ef41Sopenharmony_ci 1401cb0ef41Sopenharmony_ci // alright. Now, let's put things in the same exact form you'd get when you 1411cb0ef41Sopenharmony_ci // unserialize things. 1421cb0ef41Sopenharmony_ci result->trie = upvec_compactToUTrie2WithRowIndexes(upvec, status); 1431cb0ef41Sopenharmony_ci result->pv = upvec_cloneArray(upvec, &result->pvCount, nullptr, status); 1441cb0ef41Sopenharmony_ci result->pvCount *= columns; // number of uint32_t = rows * columns 1451cb0ef41Sopenharmony_ci result->ownPv = true; 1461cb0ef41Sopenharmony_ci} 1471cb0ef41Sopenharmony_ci 1481cb0ef41Sopenharmony_ci/* open a selector. If converterListSize is 0, build for all converters. 1491cb0ef41Sopenharmony_ci If excludedCodePoints is nullptr, don't exclude any codepoints */ 1501cb0ef41Sopenharmony_ciU_CAPI UConverterSelector* U_EXPORT2 1511cb0ef41Sopenharmony_ciucnvsel_open(const char* const* converterList, int32_t converterListSize, 1521cb0ef41Sopenharmony_ci const USet* excludedCodePoints, 1531cb0ef41Sopenharmony_ci const UConverterUnicodeSet whichSet, UErrorCode* status) { 1541cb0ef41Sopenharmony_ci // check if already failed 1551cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 1561cb0ef41Sopenharmony_ci return nullptr; 1571cb0ef41Sopenharmony_ci } 1581cb0ef41Sopenharmony_ci // ensure args make sense! 1591cb0ef41Sopenharmony_ci if (converterListSize < 0 || (converterList == nullptr && converterListSize != 0)) { 1601cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 1611cb0ef41Sopenharmony_ci return nullptr; 1621cb0ef41Sopenharmony_ci } 1631cb0ef41Sopenharmony_ci 1641cb0ef41Sopenharmony_ci // allocate a new converter 1651cb0ef41Sopenharmony_ci LocalUConverterSelectorPointer newSelector( 1661cb0ef41Sopenharmony_ci (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector))); 1671cb0ef41Sopenharmony_ci if (newSelector.isNull()) { 1681cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 1691cb0ef41Sopenharmony_ci return nullptr; 1701cb0ef41Sopenharmony_ci } 1711cb0ef41Sopenharmony_ci uprv_memset(newSelector.getAlias(), 0, sizeof(UConverterSelector)); 1721cb0ef41Sopenharmony_ci 1731cb0ef41Sopenharmony_ci if (converterListSize == 0) { 1741cb0ef41Sopenharmony_ci converterList = nullptr; 1751cb0ef41Sopenharmony_ci converterListSize = ucnv_countAvailable(); 1761cb0ef41Sopenharmony_ci } 1771cb0ef41Sopenharmony_ci newSelector->encodings = 1781cb0ef41Sopenharmony_ci (char**)uprv_malloc(converterListSize * sizeof(char*)); 1791cb0ef41Sopenharmony_ci if (!newSelector->encodings) { 1801cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 1811cb0ef41Sopenharmony_ci return nullptr; 1821cb0ef41Sopenharmony_ci } 1831cb0ef41Sopenharmony_ci newSelector->encodings[0] = nullptr; // now we can call ucnvsel_close() 1841cb0ef41Sopenharmony_ci 1851cb0ef41Sopenharmony_ci // make a backup copy of the list of converters 1861cb0ef41Sopenharmony_ci int32_t totalSize = 0; 1871cb0ef41Sopenharmony_ci int32_t i; 1881cb0ef41Sopenharmony_ci for (i = 0; i < converterListSize; i++) { 1891cb0ef41Sopenharmony_ci totalSize += 1901cb0ef41Sopenharmony_ci (int32_t)uprv_strlen(converterList != nullptr ? converterList[i] : ucnv_getAvailableName(i)) + 1; 1911cb0ef41Sopenharmony_ci } 1921cb0ef41Sopenharmony_ci // 4-align the totalSize to 4-align the size of the serialized form 1931cb0ef41Sopenharmony_ci int32_t encodingStrPadding = totalSize & 3; 1941cb0ef41Sopenharmony_ci if (encodingStrPadding != 0) { 1951cb0ef41Sopenharmony_ci encodingStrPadding = 4 - encodingStrPadding; 1961cb0ef41Sopenharmony_ci } 1971cb0ef41Sopenharmony_ci newSelector->encodingStrLength = totalSize += encodingStrPadding; 1981cb0ef41Sopenharmony_ci char* allStrings = (char*) uprv_malloc(totalSize); 1991cb0ef41Sopenharmony_ci if (!allStrings) { 2001cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 2011cb0ef41Sopenharmony_ci return nullptr; 2021cb0ef41Sopenharmony_ci } 2031cb0ef41Sopenharmony_ci 2041cb0ef41Sopenharmony_ci for (i = 0; i < converterListSize; i++) { 2051cb0ef41Sopenharmony_ci newSelector->encodings[i] = allStrings; 2061cb0ef41Sopenharmony_ci uprv_strcpy(newSelector->encodings[i], 2071cb0ef41Sopenharmony_ci converterList != nullptr ? converterList[i] : ucnv_getAvailableName(i)); 2081cb0ef41Sopenharmony_ci allStrings += uprv_strlen(newSelector->encodings[i]) + 1; 2091cb0ef41Sopenharmony_ci } 2101cb0ef41Sopenharmony_ci while (encodingStrPadding > 0) { 2111cb0ef41Sopenharmony_ci *allStrings++ = 0; 2121cb0ef41Sopenharmony_ci --encodingStrPadding; 2131cb0ef41Sopenharmony_ci } 2141cb0ef41Sopenharmony_ci 2151cb0ef41Sopenharmony_ci newSelector->ownEncodingStrings = true; 2161cb0ef41Sopenharmony_ci newSelector->encodingsCount = converterListSize; 2171cb0ef41Sopenharmony_ci UPropsVectors *upvec = upvec_open((converterListSize+31)/32, status); 2181cb0ef41Sopenharmony_ci generateSelectorData(newSelector.getAlias(), upvec, excludedCodePoints, whichSet, status); 2191cb0ef41Sopenharmony_ci upvec_close(upvec); 2201cb0ef41Sopenharmony_ci 2211cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 2221cb0ef41Sopenharmony_ci return nullptr; 2231cb0ef41Sopenharmony_ci } 2241cb0ef41Sopenharmony_ci 2251cb0ef41Sopenharmony_ci return newSelector.orphan(); 2261cb0ef41Sopenharmony_ci} 2271cb0ef41Sopenharmony_ci 2281cb0ef41Sopenharmony_ci/* close opened selector */ 2291cb0ef41Sopenharmony_ciU_CAPI void U_EXPORT2 2301cb0ef41Sopenharmony_ciucnvsel_close(UConverterSelector *sel) { 2311cb0ef41Sopenharmony_ci if (!sel) { 2321cb0ef41Sopenharmony_ci return; 2331cb0ef41Sopenharmony_ci } 2341cb0ef41Sopenharmony_ci if (sel->ownEncodingStrings) { 2351cb0ef41Sopenharmony_ci uprv_free(sel->encodings[0]); 2361cb0ef41Sopenharmony_ci } 2371cb0ef41Sopenharmony_ci uprv_free(sel->encodings); 2381cb0ef41Sopenharmony_ci if (sel->ownPv) { 2391cb0ef41Sopenharmony_ci uprv_free(sel->pv); 2401cb0ef41Sopenharmony_ci } 2411cb0ef41Sopenharmony_ci utrie2_close(sel->trie); 2421cb0ef41Sopenharmony_ci uprv_free(sel->swapped); 2431cb0ef41Sopenharmony_ci uprv_free(sel); 2441cb0ef41Sopenharmony_ci} 2451cb0ef41Sopenharmony_ci 2461cb0ef41Sopenharmony_cistatic const UDataInfo dataInfo = { 2471cb0ef41Sopenharmony_ci sizeof(UDataInfo), 2481cb0ef41Sopenharmony_ci 0, 2491cb0ef41Sopenharmony_ci 2501cb0ef41Sopenharmony_ci U_IS_BIG_ENDIAN, 2511cb0ef41Sopenharmony_ci U_CHARSET_FAMILY, 2521cb0ef41Sopenharmony_ci U_SIZEOF_UCHAR, 2531cb0ef41Sopenharmony_ci 0, 2541cb0ef41Sopenharmony_ci 2551cb0ef41Sopenharmony_ci { 0x43, 0x53, 0x65, 0x6c }, /* dataFormat="CSel" */ 2561cb0ef41Sopenharmony_ci { 1, 0, 0, 0 }, /* formatVersion */ 2571cb0ef41Sopenharmony_ci { 0, 0, 0, 0 } /* dataVersion */ 2581cb0ef41Sopenharmony_ci}; 2591cb0ef41Sopenharmony_ci 2601cb0ef41Sopenharmony_cienum { 2611cb0ef41Sopenharmony_ci UCNVSEL_INDEX_TRIE_SIZE, // trie size in bytes 2621cb0ef41Sopenharmony_ci UCNVSEL_INDEX_PV_COUNT, // number of uint32_t in the bit vectors 2631cb0ef41Sopenharmony_ci UCNVSEL_INDEX_NAMES_COUNT, // number of encoding names 2641cb0ef41Sopenharmony_ci UCNVSEL_INDEX_NAMES_LENGTH, // number of encoding name bytes including padding 2651cb0ef41Sopenharmony_ci UCNVSEL_INDEX_SIZE = 15, // bytes following the DataHeader 2661cb0ef41Sopenharmony_ci UCNVSEL_INDEX_COUNT = 16 2671cb0ef41Sopenharmony_ci}; 2681cb0ef41Sopenharmony_ci 2691cb0ef41Sopenharmony_ci/* 2701cb0ef41Sopenharmony_ci * Serialized form of a UConverterSelector, formatVersion 1: 2711cb0ef41Sopenharmony_ci * 2721cb0ef41Sopenharmony_ci * The serialized form begins with a standard ICU DataHeader with a UDataInfo 2731cb0ef41Sopenharmony_ci * as the template above. 2741cb0ef41Sopenharmony_ci * This is followed by: 2751cb0ef41Sopenharmony_ci * int32_t indexes[UCNVSEL_INDEX_COUNT]; // see index entry constants above 2761cb0ef41Sopenharmony_ci * serialized UTrie2; // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes 2771cb0ef41Sopenharmony_ci * uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]]; // bit vectors 2781cb0ef41Sopenharmony_ci * char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]]; // NUL-terminated strings + padding 2791cb0ef41Sopenharmony_ci */ 2801cb0ef41Sopenharmony_ci 2811cb0ef41Sopenharmony_ci/* serialize a selector */ 2821cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 2831cb0ef41Sopenharmony_ciucnvsel_serialize(const UConverterSelector* sel, 2841cb0ef41Sopenharmony_ci void* buffer, int32_t bufferCapacity, UErrorCode* status) { 2851cb0ef41Sopenharmony_ci // check if already failed 2861cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 2871cb0ef41Sopenharmony_ci return 0; 2881cb0ef41Sopenharmony_ci } 2891cb0ef41Sopenharmony_ci // ensure args make sense! 2901cb0ef41Sopenharmony_ci uint8_t *p = (uint8_t *)buffer; 2911cb0ef41Sopenharmony_ci if (bufferCapacity < 0 || 2921cb0ef41Sopenharmony_ci (bufferCapacity > 0 && (p == nullptr || (U_POINTER_MASK_LSB(p, 3) != 0))) 2931cb0ef41Sopenharmony_ci ) { 2941cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 2951cb0ef41Sopenharmony_ci return 0; 2961cb0ef41Sopenharmony_ci } 2971cb0ef41Sopenharmony_ci // add up the size of the serialized form 2981cb0ef41Sopenharmony_ci int32_t serializedTrieSize = utrie2_serialize(sel->trie, nullptr, 0, status); 2991cb0ef41Sopenharmony_ci if (*status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(*status)) { 3001cb0ef41Sopenharmony_ci return 0; 3011cb0ef41Sopenharmony_ci } 3021cb0ef41Sopenharmony_ci *status = U_ZERO_ERROR; 3031cb0ef41Sopenharmony_ci 3041cb0ef41Sopenharmony_ci DataHeader header; 3051cb0ef41Sopenharmony_ci uprv_memset(&header, 0, sizeof(header)); 3061cb0ef41Sopenharmony_ci header.dataHeader.headerSize = (uint16_t)((sizeof(header) + 15) & ~15); 3071cb0ef41Sopenharmony_ci header.dataHeader.magic1 = 0xda; 3081cb0ef41Sopenharmony_ci header.dataHeader.magic2 = 0x27; 3091cb0ef41Sopenharmony_ci uprv_memcpy(&header.info, &dataInfo, sizeof(dataInfo)); 3101cb0ef41Sopenharmony_ci 3111cb0ef41Sopenharmony_ci int32_t indexes[UCNVSEL_INDEX_COUNT] = { 3121cb0ef41Sopenharmony_ci serializedTrieSize, 3131cb0ef41Sopenharmony_ci sel->pvCount, 3141cb0ef41Sopenharmony_ci sel->encodingsCount, 3151cb0ef41Sopenharmony_ci sel->encodingStrLength 3161cb0ef41Sopenharmony_ci }; 3171cb0ef41Sopenharmony_ci 3181cb0ef41Sopenharmony_ci int32_t totalSize = 3191cb0ef41Sopenharmony_ci header.dataHeader.headerSize + 3201cb0ef41Sopenharmony_ci (int32_t)sizeof(indexes) + 3211cb0ef41Sopenharmony_ci serializedTrieSize + 3221cb0ef41Sopenharmony_ci sel->pvCount * 4 + 3231cb0ef41Sopenharmony_ci sel->encodingStrLength; 3241cb0ef41Sopenharmony_ci indexes[UCNVSEL_INDEX_SIZE] = totalSize - header.dataHeader.headerSize; 3251cb0ef41Sopenharmony_ci if (totalSize > bufferCapacity) { 3261cb0ef41Sopenharmony_ci *status = U_BUFFER_OVERFLOW_ERROR; 3271cb0ef41Sopenharmony_ci return totalSize; 3281cb0ef41Sopenharmony_ci } 3291cb0ef41Sopenharmony_ci // ok, save! 3301cb0ef41Sopenharmony_ci int32_t length = header.dataHeader.headerSize; 3311cb0ef41Sopenharmony_ci uprv_memcpy(p, &header, sizeof(header)); 3321cb0ef41Sopenharmony_ci uprv_memset(p + sizeof(header), 0, length - sizeof(header)); 3331cb0ef41Sopenharmony_ci p += length; 3341cb0ef41Sopenharmony_ci 3351cb0ef41Sopenharmony_ci length = (int32_t)sizeof(indexes); 3361cb0ef41Sopenharmony_ci uprv_memcpy(p, indexes, length); 3371cb0ef41Sopenharmony_ci p += length; 3381cb0ef41Sopenharmony_ci 3391cb0ef41Sopenharmony_ci utrie2_serialize(sel->trie, p, serializedTrieSize, status); 3401cb0ef41Sopenharmony_ci p += serializedTrieSize; 3411cb0ef41Sopenharmony_ci 3421cb0ef41Sopenharmony_ci length = sel->pvCount * 4; 3431cb0ef41Sopenharmony_ci uprv_memcpy(p, sel->pv, length); 3441cb0ef41Sopenharmony_ci p += length; 3451cb0ef41Sopenharmony_ci 3461cb0ef41Sopenharmony_ci uprv_memcpy(p, sel->encodings[0], sel->encodingStrLength); 3471cb0ef41Sopenharmony_ci p += sel->encodingStrLength; 3481cb0ef41Sopenharmony_ci 3491cb0ef41Sopenharmony_ci return totalSize; 3501cb0ef41Sopenharmony_ci} 3511cb0ef41Sopenharmony_ci 3521cb0ef41Sopenharmony_ci/** 3531cb0ef41Sopenharmony_ci * swap a selector into the desired Endianness and Asciiness of 3541cb0ef41Sopenharmony_ci * the system. Just as FYI, selectors are always saved in the format 3551cb0ef41Sopenharmony_ci * of the system that created them. They are only converted if used 3561cb0ef41Sopenharmony_ci * on another system. In other words, selectors created on different 3571cb0ef41Sopenharmony_ci * system can be different even if the params are identical (endianness 3581cb0ef41Sopenharmony_ci * and Asciiness differences only) 3591cb0ef41Sopenharmony_ci * 3601cb0ef41Sopenharmony_ci * @param ds pointer to data swapper containing swapping info 3611cb0ef41Sopenharmony_ci * @param inData pointer to incoming data 3621cb0ef41Sopenharmony_ci * @param length length of inData in bytes 3631cb0ef41Sopenharmony_ci * @param outData pointer to output data. Capacity should 3641cb0ef41Sopenharmony_ci * be at least equal to capacity of inData 3651cb0ef41Sopenharmony_ci * @param status an in/out ICU UErrorCode 3661cb0ef41Sopenharmony_ci * @return 0 on failure, number of bytes swapped on success 3671cb0ef41Sopenharmony_ci * number of bytes swapped can be smaller than length 3681cb0ef41Sopenharmony_ci */ 3691cb0ef41Sopenharmony_cistatic int32_t 3701cb0ef41Sopenharmony_ciucnvsel_swap(const UDataSwapper *ds, 3711cb0ef41Sopenharmony_ci const void *inData, int32_t length, 3721cb0ef41Sopenharmony_ci void *outData, UErrorCode *status) { 3731cb0ef41Sopenharmony_ci /* udata_swapDataHeader checks the arguments */ 3741cb0ef41Sopenharmony_ci int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status); 3751cb0ef41Sopenharmony_ci if(U_FAILURE(*status)) { 3761cb0ef41Sopenharmony_ci return 0; 3771cb0ef41Sopenharmony_ci } 3781cb0ef41Sopenharmony_ci 3791cb0ef41Sopenharmony_ci /* check data format and format version */ 3801cb0ef41Sopenharmony_ci const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4); 3811cb0ef41Sopenharmony_ci if(!( 3821cb0ef41Sopenharmony_ci pInfo->dataFormat[0] == 0x43 && /* dataFormat="CSel" */ 3831cb0ef41Sopenharmony_ci pInfo->dataFormat[1] == 0x53 && 3841cb0ef41Sopenharmony_ci pInfo->dataFormat[2] == 0x65 && 3851cb0ef41Sopenharmony_ci pInfo->dataFormat[3] == 0x6c 3861cb0ef41Sopenharmony_ci )) { 3871cb0ef41Sopenharmony_ci udata_printError(ds, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n", 3881cb0ef41Sopenharmony_ci pInfo->dataFormat[0], pInfo->dataFormat[1], 3891cb0ef41Sopenharmony_ci pInfo->dataFormat[2], pInfo->dataFormat[3]); 3901cb0ef41Sopenharmony_ci *status = U_INVALID_FORMAT_ERROR; 3911cb0ef41Sopenharmony_ci return 0; 3921cb0ef41Sopenharmony_ci } 3931cb0ef41Sopenharmony_ci if(pInfo->formatVersion[0] != 1) { 3941cb0ef41Sopenharmony_ci udata_printError(ds, "ucnvsel_swap(): format version %02x is not supported\n", 3951cb0ef41Sopenharmony_ci pInfo->formatVersion[0]); 3961cb0ef41Sopenharmony_ci *status = U_UNSUPPORTED_ERROR; 3971cb0ef41Sopenharmony_ci return 0; 3981cb0ef41Sopenharmony_ci } 3991cb0ef41Sopenharmony_ci 4001cb0ef41Sopenharmony_ci if(length >= 0) { 4011cb0ef41Sopenharmony_ci length -= headerSize; 4021cb0ef41Sopenharmony_ci if(length < 16*4) { 4031cb0ef41Sopenharmony_ci udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n", 4041cb0ef41Sopenharmony_ci length); 4051cb0ef41Sopenharmony_ci *status = U_INDEX_OUTOFBOUNDS_ERROR; 4061cb0ef41Sopenharmony_ci return 0; 4071cb0ef41Sopenharmony_ci } 4081cb0ef41Sopenharmony_ci } 4091cb0ef41Sopenharmony_ci 4101cb0ef41Sopenharmony_ci const uint8_t *inBytes = (const uint8_t *)inData + headerSize; 4111cb0ef41Sopenharmony_ci uint8_t *outBytes = (uint8_t *)outData + headerSize; 4121cb0ef41Sopenharmony_ci 4131cb0ef41Sopenharmony_ci /* read the indexes */ 4141cb0ef41Sopenharmony_ci const int32_t *inIndexes = (const int32_t *)inBytes; 4151cb0ef41Sopenharmony_ci int32_t indexes[16]; 4161cb0ef41Sopenharmony_ci int32_t i; 4171cb0ef41Sopenharmony_ci for(i = 0; i < 16; ++i) { 4181cb0ef41Sopenharmony_ci indexes[i] = udata_readInt32(ds, inIndexes[i]); 4191cb0ef41Sopenharmony_ci } 4201cb0ef41Sopenharmony_ci 4211cb0ef41Sopenharmony_ci /* get the total length of the data */ 4221cb0ef41Sopenharmony_ci int32_t size = indexes[UCNVSEL_INDEX_SIZE]; 4231cb0ef41Sopenharmony_ci if(length >= 0) { 4241cb0ef41Sopenharmony_ci if(length < size) { 4251cb0ef41Sopenharmony_ci udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n", 4261cb0ef41Sopenharmony_ci length); 4271cb0ef41Sopenharmony_ci *status = U_INDEX_OUTOFBOUNDS_ERROR; 4281cb0ef41Sopenharmony_ci return 0; 4291cb0ef41Sopenharmony_ci } 4301cb0ef41Sopenharmony_ci 4311cb0ef41Sopenharmony_ci /* copy the data for inaccessible bytes */ 4321cb0ef41Sopenharmony_ci if(inBytes != outBytes) { 4331cb0ef41Sopenharmony_ci uprv_memcpy(outBytes, inBytes, size); 4341cb0ef41Sopenharmony_ci } 4351cb0ef41Sopenharmony_ci 4361cb0ef41Sopenharmony_ci int32_t offset = 0, count; 4371cb0ef41Sopenharmony_ci 4381cb0ef41Sopenharmony_ci /* swap the int32_t indexes[] */ 4391cb0ef41Sopenharmony_ci count = UCNVSEL_INDEX_COUNT*4; 4401cb0ef41Sopenharmony_ci ds->swapArray32(ds, inBytes, count, outBytes, status); 4411cb0ef41Sopenharmony_ci offset += count; 4421cb0ef41Sopenharmony_ci 4431cb0ef41Sopenharmony_ci /* swap the UTrie2 */ 4441cb0ef41Sopenharmony_ci count = indexes[UCNVSEL_INDEX_TRIE_SIZE]; 4451cb0ef41Sopenharmony_ci utrie2_swap(ds, inBytes + offset, count, outBytes + offset, status); 4461cb0ef41Sopenharmony_ci offset += count; 4471cb0ef41Sopenharmony_ci 4481cb0ef41Sopenharmony_ci /* swap the uint32_t pv[] */ 4491cb0ef41Sopenharmony_ci count = indexes[UCNVSEL_INDEX_PV_COUNT]*4; 4501cb0ef41Sopenharmony_ci ds->swapArray32(ds, inBytes + offset, count, outBytes + offset, status); 4511cb0ef41Sopenharmony_ci offset += count; 4521cb0ef41Sopenharmony_ci 4531cb0ef41Sopenharmony_ci /* swap the encoding names */ 4541cb0ef41Sopenharmony_ci count = indexes[UCNVSEL_INDEX_NAMES_LENGTH]; 4551cb0ef41Sopenharmony_ci ds->swapInvChars(ds, inBytes + offset, count, outBytes + offset, status); 4561cb0ef41Sopenharmony_ci offset += count; 4571cb0ef41Sopenharmony_ci 4581cb0ef41Sopenharmony_ci U_ASSERT(offset == size); 4591cb0ef41Sopenharmony_ci } 4601cb0ef41Sopenharmony_ci 4611cb0ef41Sopenharmony_ci return headerSize + size; 4621cb0ef41Sopenharmony_ci} 4631cb0ef41Sopenharmony_ci 4641cb0ef41Sopenharmony_ci/* unserialize a selector */ 4651cb0ef41Sopenharmony_ciU_CAPI UConverterSelector* U_EXPORT2 4661cb0ef41Sopenharmony_ciucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status) { 4671cb0ef41Sopenharmony_ci // check if already failed 4681cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 4691cb0ef41Sopenharmony_ci return nullptr; 4701cb0ef41Sopenharmony_ci } 4711cb0ef41Sopenharmony_ci // ensure args make sense! 4721cb0ef41Sopenharmony_ci const uint8_t *p = (const uint8_t *)buffer; 4731cb0ef41Sopenharmony_ci if (length <= 0 || 4741cb0ef41Sopenharmony_ci (length > 0 && (p == nullptr || (U_POINTER_MASK_LSB(p, 3) != 0))) 4751cb0ef41Sopenharmony_ci ) { 4761cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 4771cb0ef41Sopenharmony_ci return nullptr; 4781cb0ef41Sopenharmony_ci } 4791cb0ef41Sopenharmony_ci // header 4801cb0ef41Sopenharmony_ci if (length < 32) { 4811cb0ef41Sopenharmony_ci // not even enough space for a minimal header 4821cb0ef41Sopenharmony_ci *status = U_INDEX_OUTOFBOUNDS_ERROR; 4831cb0ef41Sopenharmony_ci return nullptr; 4841cb0ef41Sopenharmony_ci } 4851cb0ef41Sopenharmony_ci const DataHeader *pHeader = (const DataHeader *)p; 4861cb0ef41Sopenharmony_ci if (!( 4871cb0ef41Sopenharmony_ci pHeader->dataHeader.magic1==0xda && 4881cb0ef41Sopenharmony_ci pHeader->dataHeader.magic2==0x27 && 4891cb0ef41Sopenharmony_ci pHeader->info.dataFormat[0] == 0x43 && 4901cb0ef41Sopenharmony_ci pHeader->info.dataFormat[1] == 0x53 && 4911cb0ef41Sopenharmony_ci pHeader->info.dataFormat[2] == 0x65 && 4921cb0ef41Sopenharmony_ci pHeader->info.dataFormat[3] == 0x6c 4931cb0ef41Sopenharmony_ci )) { 4941cb0ef41Sopenharmony_ci /* header not valid or dataFormat not recognized */ 4951cb0ef41Sopenharmony_ci *status = U_INVALID_FORMAT_ERROR; 4961cb0ef41Sopenharmony_ci return nullptr; 4971cb0ef41Sopenharmony_ci } 4981cb0ef41Sopenharmony_ci if (pHeader->info.formatVersion[0] != 1) { 4991cb0ef41Sopenharmony_ci *status = U_UNSUPPORTED_ERROR; 5001cb0ef41Sopenharmony_ci return nullptr; 5011cb0ef41Sopenharmony_ci } 5021cb0ef41Sopenharmony_ci uint8_t* swapped = nullptr; 5031cb0ef41Sopenharmony_ci if (pHeader->info.isBigEndian != U_IS_BIG_ENDIAN || 5041cb0ef41Sopenharmony_ci pHeader->info.charsetFamily != U_CHARSET_FAMILY 5051cb0ef41Sopenharmony_ci ) { 5061cb0ef41Sopenharmony_ci // swap the data 5071cb0ef41Sopenharmony_ci UDataSwapper *ds = 5081cb0ef41Sopenharmony_ci udata_openSwapperForInputData(p, length, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, status); 5091cb0ef41Sopenharmony_ci int32_t totalSize = ucnvsel_swap(ds, p, -1, nullptr, status); 5101cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 5111cb0ef41Sopenharmony_ci udata_closeSwapper(ds); 5121cb0ef41Sopenharmony_ci return nullptr; 5131cb0ef41Sopenharmony_ci } 5141cb0ef41Sopenharmony_ci if (length < totalSize) { 5151cb0ef41Sopenharmony_ci udata_closeSwapper(ds); 5161cb0ef41Sopenharmony_ci *status = U_INDEX_OUTOFBOUNDS_ERROR; 5171cb0ef41Sopenharmony_ci return nullptr; 5181cb0ef41Sopenharmony_ci } 5191cb0ef41Sopenharmony_ci swapped = (uint8_t*)uprv_malloc(totalSize); 5201cb0ef41Sopenharmony_ci if (swapped == nullptr) { 5211cb0ef41Sopenharmony_ci udata_closeSwapper(ds); 5221cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 5231cb0ef41Sopenharmony_ci return nullptr; 5241cb0ef41Sopenharmony_ci } 5251cb0ef41Sopenharmony_ci ucnvsel_swap(ds, p, length, swapped, status); 5261cb0ef41Sopenharmony_ci udata_closeSwapper(ds); 5271cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 5281cb0ef41Sopenharmony_ci uprv_free(swapped); 5291cb0ef41Sopenharmony_ci return nullptr; 5301cb0ef41Sopenharmony_ci } 5311cb0ef41Sopenharmony_ci p = swapped; 5321cb0ef41Sopenharmony_ci pHeader = (const DataHeader *)p; 5331cb0ef41Sopenharmony_ci } 5341cb0ef41Sopenharmony_ci if (length < (pHeader->dataHeader.headerSize + 16 * 4)) { 5351cb0ef41Sopenharmony_ci // not even enough space for the header and the indexes 5361cb0ef41Sopenharmony_ci uprv_free(swapped); 5371cb0ef41Sopenharmony_ci *status = U_INDEX_OUTOFBOUNDS_ERROR; 5381cb0ef41Sopenharmony_ci return nullptr; 5391cb0ef41Sopenharmony_ci } 5401cb0ef41Sopenharmony_ci p += pHeader->dataHeader.headerSize; 5411cb0ef41Sopenharmony_ci length -= pHeader->dataHeader.headerSize; 5421cb0ef41Sopenharmony_ci // indexes 5431cb0ef41Sopenharmony_ci const int32_t *indexes = (const int32_t *)p; 5441cb0ef41Sopenharmony_ci if (length < indexes[UCNVSEL_INDEX_SIZE]) { 5451cb0ef41Sopenharmony_ci uprv_free(swapped); 5461cb0ef41Sopenharmony_ci *status = U_INDEX_OUTOFBOUNDS_ERROR; 5471cb0ef41Sopenharmony_ci return nullptr; 5481cb0ef41Sopenharmony_ci } 5491cb0ef41Sopenharmony_ci p += UCNVSEL_INDEX_COUNT * 4; 5501cb0ef41Sopenharmony_ci // create and populate the selector object 5511cb0ef41Sopenharmony_ci UConverterSelector* sel = (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector)); 5521cb0ef41Sopenharmony_ci char **encodings = 5531cb0ef41Sopenharmony_ci (char **)uprv_malloc( 5541cb0ef41Sopenharmony_ci indexes[UCNVSEL_INDEX_NAMES_COUNT] * sizeof(char *)); 5551cb0ef41Sopenharmony_ci if (sel == nullptr || encodings == nullptr) { 5561cb0ef41Sopenharmony_ci uprv_free(swapped); 5571cb0ef41Sopenharmony_ci uprv_free(sel); 5581cb0ef41Sopenharmony_ci uprv_free(encodings); 5591cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 5601cb0ef41Sopenharmony_ci return nullptr; 5611cb0ef41Sopenharmony_ci } 5621cb0ef41Sopenharmony_ci uprv_memset(sel, 0, sizeof(UConverterSelector)); 5631cb0ef41Sopenharmony_ci sel->pvCount = indexes[UCNVSEL_INDEX_PV_COUNT]; 5641cb0ef41Sopenharmony_ci sel->encodings = encodings; 5651cb0ef41Sopenharmony_ci sel->encodingsCount = indexes[UCNVSEL_INDEX_NAMES_COUNT]; 5661cb0ef41Sopenharmony_ci sel->encodingStrLength = indexes[UCNVSEL_INDEX_NAMES_LENGTH]; 5671cb0ef41Sopenharmony_ci sel->swapped = swapped; 5681cb0ef41Sopenharmony_ci // trie 5691cb0ef41Sopenharmony_ci sel->trie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, 5701cb0ef41Sopenharmony_ci p, indexes[UCNVSEL_INDEX_TRIE_SIZE], nullptr, 5711cb0ef41Sopenharmony_ci status); 5721cb0ef41Sopenharmony_ci p += indexes[UCNVSEL_INDEX_TRIE_SIZE]; 5731cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 5741cb0ef41Sopenharmony_ci ucnvsel_close(sel); 5751cb0ef41Sopenharmony_ci return nullptr; 5761cb0ef41Sopenharmony_ci } 5771cb0ef41Sopenharmony_ci // bit vectors 5781cb0ef41Sopenharmony_ci sel->pv = (uint32_t *)p; 5791cb0ef41Sopenharmony_ci p += sel->pvCount * 4; 5801cb0ef41Sopenharmony_ci // encoding names 5811cb0ef41Sopenharmony_ci char* s = (char*)p; 5821cb0ef41Sopenharmony_ci for (int32_t i = 0; i < sel->encodingsCount; ++i) { 5831cb0ef41Sopenharmony_ci sel->encodings[i] = s; 5841cb0ef41Sopenharmony_ci s += uprv_strlen(s) + 1; 5851cb0ef41Sopenharmony_ci } 5861cb0ef41Sopenharmony_ci p += sel->encodingStrLength; 5871cb0ef41Sopenharmony_ci 5881cb0ef41Sopenharmony_ci return sel; 5891cb0ef41Sopenharmony_ci} 5901cb0ef41Sopenharmony_ci 5911cb0ef41Sopenharmony_ci// a bunch of functions for the enumeration thingie! Nothing fancy here. Just 5921cb0ef41Sopenharmony_ci// iterate over the selected encodings 5931cb0ef41Sopenharmony_cistruct Enumerator { 5941cb0ef41Sopenharmony_ci int16_t* index; 5951cb0ef41Sopenharmony_ci int16_t length; 5961cb0ef41Sopenharmony_ci int16_t cur; 5971cb0ef41Sopenharmony_ci const UConverterSelector* sel; 5981cb0ef41Sopenharmony_ci}; 5991cb0ef41Sopenharmony_ci 6001cb0ef41Sopenharmony_ciU_CDECL_BEGIN 6011cb0ef41Sopenharmony_ci 6021cb0ef41Sopenharmony_cistatic void U_CALLCONV 6031cb0ef41Sopenharmony_ciucnvsel_close_selector_iterator(UEnumeration *enumerator) { 6041cb0ef41Sopenharmony_ci uprv_free(((Enumerator*)(enumerator->context))->index); 6051cb0ef41Sopenharmony_ci uprv_free(enumerator->context); 6061cb0ef41Sopenharmony_ci uprv_free(enumerator); 6071cb0ef41Sopenharmony_ci} 6081cb0ef41Sopenharmony_ci 6091cb0ef41Sopenharmony_ci 6101cb0ef41Sopenharmony_cistatic int32_t U_CALLCONV 6111cb0ef41Sopenharmony_ciucnvsel_count_encodings(UEnumeration *enumerator, UErrorCode *status) { 6121cb0ef41Sopenharmony_ci // check if already failed 6131cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 6141cb0ef41Sopenharmony_ci return 0; 6151cb0ef41Sopenharmony_ci } 6161cb0ef41Sopenharmony_ci return ((Enumerator*)(enumerator->context))->length; 6171cb0ef41Sopenharmony_ci} 6181cb0ef41Sopenharmony_ci 6191cb0ef41Sopenharmony_ci 6201cb0ef41Sopenharmony_cistatic const char* U_CALLCONV ucnvsel_next_encoding(UEnumeration* enumerator, 6211cb0ef41Sopenharmony_ci int32_t* resultLength, 6221cb0ef41Sopenharmony_ci UErrorCode* status) { 6231cb0ef41Sopenharmony_ci // check if already failed 6241cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 6251cb0ef41Sopenharmony_ci return nullptr; 6261cb0ef41Sopenharmony_ci } 6271cb0ef41Sopenharmony_ci 6281cb0ef41Sopenharmony_ci int16_t cur = ((Enumerator*)(enumerator->context))->cur; 6291cb0ef41Sopenharmony_ci const UConverterSelector* sel; 6301cb0ef41Sopenharmony_ci const char* result; 6311cb0ef41Sopenharmony_ci if (cur >= ((Enumerator*)(enumerator->context))->length) { 6321cb0ef41Sopenharmony_ci return nullptr; 6331cb0ef41Sopenharmony_ci } 6341cb0ef41Sopenharmony_ci sel = ((Enumerator*)(enumerator->context))->sel; 6351cb0ef41Sopenharmony_ci result = sel->encodings[((Enumerator*)(enumerator->context))->index[cur] ]; 6361cb0ef41Sopenharmony_ci ((Enumerator*)(enumerator->context))->cur++; 6371cb0ef41Sopenharmony_ci if (resultLength) { 6381cb0ef41Sopenharmony_ci *resultLength = (int32_t)uprv_strlen(result); 6391cb0ef41Sopenharmony_ci } 6401cb0ef41Sopenharmony_ci return result; 6411cb0ef41Sopenharmony_ci} 6421cb0ef41Sopenharmony_ci 6431cb0ef41Sopenharmony_cistatic void U_CALLCONV ucnvsel_reset_iterator(UEnumeration* enumerator, 6441cb0ef41Sopenharmony_ci UErrorCode* status) { 6451cb0ef41Sopenharmony_ci // check if already failed 6461cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 6471cb0ef41Sopenharmony_ci return ; 6481cb0ef41Sopenharmony_ci } 6491cb0ef41Sopenharmony_ci ((Enumerator*)(enumerator->context))->cur = 0; 6501cb0ef41Sopenharmony_ci} 6511cb0ef41Sopenharmony_ci 6521cb0ef41Sopenharmony_ciU_CDECL_END 6531cb0ef41Sopenharmony_ci 6541cb0ef41Sopenharmony_ci 6551cb0ef41Sopenharmony_cistatic const UEnumeration defaultEncodings = { 6561cb0ef41Sopenharmony_ci nullptr, 6571cb0ef41Sopenharmony_ci nullptr, 6581cb0ef41Sopenharmony_ci ucnvsel_close_selector_iterator, 6591cb0ef41Sopenharmony_ci ucnvsel_count_encodings, 6601cb0ef41Sopenharmony_ci uenum_unextDefault, 6611cb0ef41Sopenharmony_ci ucnvsel_next_encoding, 6621cb0ef41Sopenharmony_ci ucnvsel_reset_iterator 6631cb0ef41Sopenharmony_ci}; 6641cb0ef41Sopenharmony_ci 6651cb0ef41Sopenharmony_ci 6661cb0ef41Sopenharmony_ci// internal fn to intersect two sets of masks 6671cb0ef41Sopenharmony_ci// returns whether the mask has reduced to all zeros 6681cb0ef41Sopenharmony_cistatic UBool intersectMasks(uint32_t* dest, const uint32_t* source1, int32_t len) { 6691cb0ef41Sopenharmony_ci int32_t i; 6701cb0ef41Sopenharmony_ci uint32_t oredDest = 0; 6711cb0ef41Sopenharmony_ci for (i = 0 ; i < len ; ++i) { 6721cb0ef41Sopenharmony_ci oredDest |= (dest[i] &= source1[i]); 6731cb0ef41Sopenharmony_ci } 6741cb0ef41Sopenharmony_ci return oredDest == 0; 6751cb0ef41Sopenharmony_ci} 6761cb0ef41Sopenharmony_ci 6771cb0ef41Sopenharmony_ci// internal fn to count how many 1's are there in a mask 6781cb0ef41Sopenharmony_ci// algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html 6791cb0ef41Sopenharmony_cistatic int16_t countOnes(uint32_t* mask, int32_t len) { 6801cb0ef41Sopenharmony_ci int32_t i, totalOnes = 0; 6811cb0ef41Sopenharmony_ci for (i = 0 ; i < len ; ++i) { 6821cb0ef41Sopenharmony_ci uint32_t ent = mask[i]; 6831cb0ef41Sopenharmony_ci for (; ent; totalOnes++) 6841cb0ef41Sopenharmony_ci { 6851cb0ef41Sopenharmony_ci ent &= ent - 1; // clear the least significant bit set 6861cb0ef41Sopenharmony_ci } 6871cb0ef41Sopenharmony_ci } 6881cb0ef41Sopenharmony_ci return static_cast<int16_t>(totalOnes); 6891cb0ef41Sopenharmony_ci} 6901cb0ef41Sopenharmony_ci 6911cb0ef41Sopenharmony_ci 6921cb0ef41Sopenharmony_ci/* internal function! */ 6931cb0ef41Sopenharmony_cistatic UEnumeration *selectForMask(const UConverterSelector* sel, 6941cb0ef41Sopenharmony_ci uint32_t *theMask, UErrorCode *status) { 6951cb0ef41Sopenharmony_ci LocalMemory<uint32_t> mask(theMask); 6961cb0ef41Sopenharmony_ci // this is the context we will use. Store a table of indices to which 6971cb0ef41Sopenharmony_ci // encodings are legit. 6981cb0ef41Sopenharmony_ci LocalMemory<Enumerator> result(static_cast<Enumerator *>(uprv_malloc(sizeof(Enumerator)))); 6991cb0ef41Sopenharmony_ci if (result.isNull()) { 7001cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 7011cb0ef41Sopenharmony_ci return nullptr; 7021cb0ef41Sopenharmony_ci } 7031cb0ef41Sopenharmony_ci result->index = nullptr; // this will be allocated later! 7041cb0ef41Sopenharmony_ci result->length = result->cur = 0; 7051cb0ef41Sopenharmony_ci result->sel = sel; 7061cb0ef41Sopenharmony_ci 7071cb0ef41Sopenharmony_ci LocalMemory<UEnumeration> en(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)))); 7081cb0ef41Sopenharmony_ci if (en.isNull()) { 7091cb0ef41Sopenharmony_ci // TODO(markus): Combine Enumerator and UEnumeration into one struct. 7101cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 7111cb0ef41Sopenharmony_ci return nullptr; 7121cb0ef41Sopenharmony_ci } 7131cb0ef41Sopenharmony_ci memcpy(en.getAlias(), &defaultEncodings, sizeof(UEnumeration)); 7141cb0ef41Sopenharmony_ci 7151cb0ef41Sopenharmony_ci int32_t columns = (sel->encodingsCount+31)/32; 7161cb0ef41Sopenharmony_ci int16_t numOnes = countOnes(mask.getAlias(), columns); 7171cb0ef41Sopenharmony_ci // now, we know the exact space we need for index 7181cb0ef41Sopenharmony_ci if (numOnes > 0) { 7191cb0ef41Sopenharmony_ci result->index = static_cast<int16_t*>(uprv_malloc(numOnes * sizeof(int16_t))); 7201cb0ef41Sopenharmony_ci if (result->index == nullptr) { 7211cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 7221cb0ef41Sopenharmony_ci return nullptr; 7231cb0ef41Sopenharmony_ci } 7241cb0ef41Sopenharmony_ci int32_t i, j; 7251cb0ef41Sopenharmony_ci int16_t k = 0; 7261cb0ef41Sopenharmony_ci for (j = 0 ; j < columns; j++) { 7271cb0ef41Sopenharmony_ci uint32_t v = mask[j]; 7281cb0ef41Sopenharmony_ci for (i = 0 ; i < 32 && k < sel->encodingsCount; i++, k++) { 7291cb0ef41Sopenharmony_ci if ((v & 1) != 0) { 7301cb0ef41Sopenharmony_ci result->index[result->length++] = k; 7311cb0ef41Sopenharmony_ci } 7321cb0ef41Sopenharmony_ci v >>= 1; 7331cb0ef41Sopenharmony_ci } 7341cb0ef41Sopenharmony_ci } 7351cb0ef41Sopenharmony_ci } //otherwise, index will remain nullptr (and will never be touched by 7361cb0ef41Sopenharmony_ci //the enumerator code anyway) 7371cb0ef41Sopenharmony_ci en->context = result.orphan(); 7381cb0ef41Sopenharmony_ci return en.orphan(); 7391cb0ef41Sopenharmony_ci} 7401cb0ef41Sopenharmony_ci 7411cb0ef41Sopenharmony_ci/* check a string against the selector - UTF16 version */ 7421cb0ef41Sopenharmony_ciU_CAPI UEnumeration * U_EXPORT2 7431cb0ef41Sopenharmony_ciucnvsel_selectForString(const UConverterSelector* sel, 7441cb0ef41Sopenharmony_ci const char16_t *s, int32_t length, UErrorCode *status) { 7451cb0ef41Sopenharmony_ci // check if already failed 7461cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 7471cb0ef41Sopenharmony_ci return nullptr; 7481cb0ef41Sopenharmony_ci } 7491cb0ef41Sopenharmony_ci // ensure args make sense! 7501cb0ef41Sopenharmony_ci if (sel == nullptr || (s == nullptr && length != 0)) { 7511cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 7521cb0ef41Sopenharmony_ci return nullptr; 7531cb0ef41Sopenharmony_ci } 7541cb0ef41Sopenharmony_ci 7551cb0ef41Sopenharmony_ci int32_t columns = (sel->encodingsCount+31)/32; 7561cb0ef41Sopenharmony_ci uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4); 7571cb0ef41Sopenharmony_ci if (mask == nullptr) { 7581cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 7591cb0ef41Sopenharmony_ci return nullptr; 7601cb0ef41Sopenharmony_ci } 7611cb0ef41Sopenharmony_ci uprv_memset(mask, ~0, columns *4); 7621cb0ef41Sopenharmony_ci 7631cb0ef41Sopenharmony_ci if(s!=nullptr) { 7641cb0ef41Sopenharmony_ci const char16_t *limit; 7651cb0ef41Sopenharmony_ci if (length >= 0) { 7661cb0ef41Sopenharmony_ci limit = s + length; 7671cb0ef41Sopenharmony_ci } else { 7681cb0ef41Sopenharmony_ci limit = nullptr; 7691cb0ef41Sopenharmony_ci } 7701cb0ef41Sopenharmony_ci 7711cb0ef41Sopenharmony_ci while (limit == nullptr ? *s != 0 : s != limit) { 7721cb0ef41Sopenharmony_ci UChar32 c; 7731cb0ef41Sopenharmony_ci uint16_t pvIndex; 7741cb0ef41Sopenharmony_ci UTRIE2_U16_NEXT16(sel->trie, s, limit, c, pvIndex); 7751cb0ef41Sopenharmony_ci if (intersectMasks(mask, sel->pv+pvIndex, columns)) { 7761cb0ef41Sopenharmony_ci break; 7771cb0ef41Sopenharmony_ci } 7781cb0ef41Sopenharmony_ci } 7791cb0ef41Sopenharmony_ci } 7801cb0ef41Sopenharmony_ci return selectForMask(sel, mask, status); 7811cb0ef41Sopenharmony_ci} 7821cb0ef41Sopenharmony_ci 7831cb0ef41Sopenharmony_ci/* check a string against the selector - UTF8 version */ 7841cb0ef41Sopenharmony_ciU_CAPI UEnumeration * U_EXPORT2 7851cb0ef41Sopenharmony_ciucnvsel_selectForUTF8(const UConverterSelector* sel, 7861cb0ef41Sopenharmony_ci const char *s, int32_t length, UErrorCode *status) { 7871cb0ef41Sopenharmony_ci // check if already failed 7881cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 7891cb0ef41Sopenharmony_ci return nullptr; 7901cb0ef41Sopenharmony_ci } 7911cb0ef41Sopenharmony_ci // ensure args make sense! 7921cb0ef41Sopenharmony_ci if (sel == nullptr || (s == nullptr && length != 0)) { 7931cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 7941cb0ef41Sopenharmony_ci return nullptr; 7951cb0ef41Sopenharmony_ci } 7961cb0ef41Sopenharmony_ci 7971cb0ef41Sopenharmony_ci int32_t columns = (sel->encodingsCount+31)/32; 7981cb0ef41Sopenharmony_ci uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4); 7991cb0ef41Sopenharmony_ci if (mask == nullptr) { 8001cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 8011cb0ef41Sopenharmony_ci return nullptr; 8021cb0ef41Sopenharmony_ci } 8031cb0ef41Sopenharmony_ci uprv_memset(mask, ~0, columns *4); 8041cb0ef41Sopenharmony_ci 8051cb0ef41Sopenharmony_ci if (length < 0) { 8061cb0ef41Sopenharmony_ci length = (int32_t)uprv_strlen(s); 8071cb0ef41Sopenharmony_ci } 8081cb0ef41Sopenharmony_ci 8091cb0ef41Sopenharmony_ci if(s!=nullptr) { 8101cb0ef41Sopenharmony_ci const char *limit = s + length; 8111cb0ef41Sopenharmony_ci 8121cb0ef41Sopenharmony_ci while (s != limit) { 8131cb0ef41Sopenharmony_ci uint16_t pvIndex; 8141cb0ef41Sopenharmony_ci UTRIE2_U8_NEXT16(sel->trie, s, limit, pvIndex); 8151cb0ef41Sopenharmony_ci if (intersectMasks(mask, sel->pv+pvIndex, columns)) { 8161cb0ef41Sopenharmony_ci break; 8171cb0ef41Sopenharmony_ci } 8181cb0ef41Sopenharmony_ci } 8191cb0ef41Sopenharmony_ci } 8201cb0ef41Sopenharmony_ci return selectForMask(sel, mask, status); 8211cb0ef41Sopenharmony_ci} 8221cb0ef41Sopenharmony_ci 8231cb0ef41Sopenharmony_ci#endif // !UCONFIG_NO_CONVERSION 824