11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 31cb0ef41Sopenharmony_ci/* 41cb0ef41Sopenharmony_ci****************************************************************************** 51cb0ef41Sopenharmony_ci* 61cb0ef41Sopenharmony_ci* Copyright (C) 2000-2016, International Business Machines 71cb0ef41Sopenharmony_ci* Corporation and others. All Rights Reserved. 81cb0ef41Sopenharmony_ci* 91cb0ef41Sopenharmony_ci****************************************************************************** 101cb0ef41Sopenharmony_ci* file name: ucnvscsu.c 111cb0ef41Sopenharmony_ci* encoding: UTF-8 121cb0ef41Sopenharmony_ci* tab size: 8 (not used) 131cb0ef41Sopenharmony_ci* indentation:4 141cb0ef41Sopenharmony_ci* 151cb0ef41Sopenharmony_ci* created on: 2000nov18 161cb0ef41Sopenharmony_ci* created by: Markus W. Scherer 171cb0ef41Sopenharmony_ci* 181cb0ef41Sopenharmony_ci* This is an implementation of the Standard Compression Scheme for Unicode 191cb0ef41Sopenharmony_ci* as defined in https://www.unicode.org/reports/tr6/ . 201cb0ef41Sopenharmony_ci* Reserved commands and window settings are treated as illegal sequences and 211cb0ef41Sopenharmony_ci* will result in callback calls. 221cb0ef41Sopenharmony_ci*/ 231cb0ef41Sopenharmony_ci 241cb0ef41Sopenharmony_ci#include "unicode/utypes.h" 251cb0ef41Sopenharmony_ci 261cb0ef41Sopenharmony_ci#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 271cb0ef41Sopenharmony_ci 281cb0ef41Sopenharmony_ci#include "unicode/ucnv.h" 291cb0ef41Sopenharmony_ci#include "unicode/ucnv_cb.h" 301cb0ef41Sopenharmony_ci#include "unicode/utf16.h" 311cb0ef41Sopenharmony_ci#include "ucnv_bld.h" 321cb0ef41Sopenharmony_ci#include "ucnv_cnv.h" 331cb0ef41Sopenharmony_ci#include "cmemory.h" 341cb0ef41Sopenharmony_ci 351cb0ef41Sopenharmony_ci/* SCSU definitions --------------------------------------------------------- */ 361cb0ef41Sopenharmony_ci 371cb0ef41Sopenharmony_ci/* SCSU command byte values */ 381cb0ef41Sopenharmony_cienum { 391cb0ef41Sopenharmony_ci SQ0=0x01, /* Quote from window pair 0 */ 401cb0ef41Sopenharmony_ci SQ7=0x08, /* Quote from window pair 7 */ 411cb0ef41Sopenharmony_ci SDX=0x0B, /* Define a window as extended */ 421cb0ef41Sopenharmony_ci Srs=0x0C, /* reserved */ 431cb0ef41Sopenharmony_ci SQU=0x0E, /* Quote a single Unicode character */ 441cb0ef41Sopenharmony_ci SCU=0x0F, /* Change to Unicode mode */ 451cb0ef41Sopenharmony_ci SC0=0x10, /* Select window 0 */ 461cb0ef41Sopenharmony_ci SC7=0x17, /* Select window 7 */ 471cb0ef41Sopenharmony_ci SD0=0x18, /* Define and select window 0 */ 481cb0ef41Sopenharmony_ci SD7=0x1F, /* Define and select window 7 */ 491cb0ef41Sopenharmony_ci 501cb0ef41Sopenharmony_ci UC0=0xE0, /* Select window 0 */ 511cb0ef41Sopenharmony_ci UC7=0xE7, /* Select window 7 */ 521cb0ef41Sopenharmony_ci UD0=0xE8, /* Define and select window 0 */ 531cb0ef41Sopenharmony_ci UD7=0xEF, /* Define and select window 7 */ 541cb0ef41Sopenharmony_ci UQU=0xF0, /* Quote a single Unicode character */ 551cb0ef41Sopenharmony_ci UDX=0xF1, /* Define a Window as extended */ 561cb0ef41Sopenharmony_ci Urs=0xF2 /* reserved */ 571cb0ef41Sopenharmony_ci}; 581cb0ef41Sopenharmony_ci 591cb0ef41Sopenharmony_cienum { 601cb0ef41Sopenharmony_ci /* 611cb0ef41Sopenharmony_ci * Unicode code points from 3400 to E000 are not adressible by 621cb0ef41Sopenharmony_ci * dynamic window, since in these areas no short run alphabets are 631cb0ef41Sopenharmony_ci * found. Therefore add gapOffset to all values from gapThreshold. 641cb0ef41Sopenharmony_ci */ 651cb0ef41Sopenharmony_ci gapThreshold=0x68, 661cb0ef41Sopenharmony_ci gapOffset=0xAC00, 671cb0ef41Sopenharmony_ci 681cb0ef41Sopenharmony_ci /* values between reservedStart and fixedThreshold are reserved */ 691cb0ef41Sopenharmony_ci reservedStart=0xA8, 701cb0ef41Sopenharmony_ci 711cb0ef41Sopenharmony_ci /* use table of predefined fixed offsets for values from fixedThreshold */ 721cb0ef41Sopenharmony_ci fixedThreshold=0xF9 731cb0ef41Sopenharmony_ci}; 741cb0ef41Sopenharmony_ci 751cb0ef41Sopenharmony_ci/* constant offsets for the 8 static windows */ 761cb0ef41Sopenharmony_cistatic const uint32_t staticOffsets[8]={ 771cb0ef41Sopenharmony_ci 0x0000, /* ASCII for quoted tags */ 781cb0ef41Sopenharmony_ci 0x0080, /* Latin - 1 Supplement (for access to punctuation) */ 791cb0ef41Sopenharmony_ci 0x0100, /* Latin Extended-A */ 801cb0ef41Sopenharmony_ci 0x0300, /* Combining Diacritical Marks */ 811cb0ef41Sopenharmony_ci 0x2000, /* General Punctuation */ 821cb0ef41Sopenharmony_ci 0x2080, /* Currency Symbols */ 831cb0ef41Sopenharmony_ci 0x2100, /* Letterlike Symbols and Number Forms */ 841cb0ef41Sopenharmony_ci 0x3000 /* CJK Symbols and punctuation */ 851cb0ef41Sopenharmony_ci}; 861cb0ef41Sopenharmony_ci 871cb0ef41Sopenharmony_ci/* initial offsets for the 8 dynamic (sliding) windows */ 881cb0ef41Sopenharmony_cistatic const uint32_t initialDynamicOffsets[8]={ 891cb0ef41Sopenharmony_ci 0x0080, /* Latin-1 */ 901cb0ef41Sopenharmony_ci 0x00C0, /* Latin Extended A */ 911cb0ef41Sopenharmony_ci 0x0400, /* Cyrillic */ 921cb0ef41Sopenharmony_ci 0x0600, /* Arabic */ 931cb0ef41Sopenharmony_ci 0x0900, /* Devanagari */ 941cb0ef41Sopenharmony_ci 0x3040, /* Hiragana */ 951cb0ef41Sopenharmony_ci 0x30A0, /* Katakana */ 961cb0ef41Sopenharmony_ci 0xFF00 /* Fullwidth ASCII */ 971cb0ef41Sopenharmony_ci}; 981cb0ef41Sopenharmony_ci 991cb0ef41Sopenharmony_ci/* Table of fixed predefined Offsets */ 1001cb0ef41Sopenharmony_cistatic const uint32_t fixedOffsets[]={ 1011cb0ef41Sopenharmony_ci /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */ 1021cb0ef41Sopenharmony_ci /* 0xFA */ 0x0250, /* IPA extensions */ 1031cb0ef41Sopenharmony_ci /* 0xFB */ 0x0370, /* Greek */ 1041cb0ef41Sopenharmony_ci /* 0xFC */ 0x0530, /* Armenian */ 1051cb0ef41Sopenharmony_ci /* 0xFD */ 0x3040, /* Hiragana */ 1061cb0ef41Sopenharmony_ci /* 0xFE */ 0x30A0, /* Katakana */ 1071cb0ef41Sopenharmony_ci /* 0xFF */ 0xFF60 /* Halfwidth Katakana */ 1081cb0ef41Sopenharmony_ci}; 1091cb0ef41Sopenharmony_ci 1101cb0ef41Sopenharmony_ci/* state values */ 1111cb0ef41Sopenharmony_cienum { 1121cb0ef41Sopenharmony_ci readCommand, 1131cb0ef41Sopenharmony_ci quotePairOne, 1141cb0ef41Sopenharmony_ci quotePairTwo, 1151cb0ef41Sopenharmony_ci quoteOne, 1161cb0ef41Sopenharmony_ci definePairOne, 1171cb0ef41Sopenharmony_ci definePairTwo, 1181cb0ef41Sopenharmony_ci defineOne 1191cb0ef41Sopenharmony_ci}; 1201cb0ef41Sopenharmony_ci 1211cb0ef41Sopenharmony_citypedef struct SCSUData { 1221cb0ef41Sopenharmony_ci /* dynamic window offsets, initialize to default values from initialDynamicOffsets */ 1231cb0ef41Sopenharmony_ci uint32_t toUDynamicOffsets[8]; 1241cb0ef41Sopenharmony_ci uint32_t fromUDynamicOffsets[8]; 1251cb0ef41Sopenharmony_ci 1261cb0ef41Sopenharmony_ci /* state machine state - toUnicode */ 1271cb0ef41Sopenharmony_ci UBool toUIsSingleByteMode; 1281cb0ef41Sopenharmony_ci uint8_t toUState; 1291cb0ef41Sopenharmony_ci int8_t toUQuoteWindow, toUDynamicWindow; 1301cb0ef41Sopenharmony_ci uint8_t toUByteOne; 1311cb0ef41Sopenharmony_ci uint8_t toUPadding[3]; 1321cb0ef41Sopenharmony_ci 1331cb0ef41Sopenharmony_ci /* state machine state - fromUnicode */ 1341cb0ef41Sopenharmony_ci UBool fromUIsSingleByteMode; 1351cb0ef41Sopenharmony_ci int8_t fromUDynamicWindow; 1361cb0ef41Sopenharmony_ci 1371cb0ef41Sopenharmony_ci /* 1381cb0ef41Sopenharmony_ci * windowUse[] keeps track of the use of the dynamic windows: 1391cb0ef41Sopenharmony_ci * At nextWindowUseIndex there is the least recently used window, 1401cb0ef41Sopenharmony_ci * and the following windows (in a wrapping manner) are more and more 1411cb0ef41Sopenharmony_ci * recently used. 1421cb0ef41Sopenharmony_ci * At nextWindowUseIndex-1 there is the most recently used window. 1431cb0ef41Sopenharmony_ci */ 1441cb0ef41Sopenharmony_ci uint8_t locale; 1451cb0ef41Sopenharmony_ci int8_t nextWindowUseIndex; 1461cb0ef41Sopenharmony_ci int8_t windowUse[8]; 1471cb0ef41Sopenharmony_ci} SCSUData; 1481cb0ef41Sopenharmony_ci 1491cb0ef41Sopenharmony_cistatic const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 }; 1501cb0ef41Sopenharmony_cistatic const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 }; 1511cb0ef41Sopenharmony_ci 1521cb0ef41Sopenharmony_cienum { 1531cb0ef41Sopenharmony_ci lGeneric, l_ja 1541cb0ef41Sopenharmony_ci}; 1551cb0ef41Sopenharmony_ci 1561cb0ef41Sopenharmony_ci/* SCSU setup functions ----------------------------------------------------- */ 1571cb0ef41Sopenharmony_ciU_CDECL_BEGIN 1581cb0ef41Sopenharmony_cistatic void U_CALLCONV 1591cb0ef41Sopenharmony_ci_SCSUReset(UConverter *cnv, UConverterResetChoice choice) { 1601cb0ef41Sopenharmony_ci SCSUData *scsu=(SCSUData *)cnv->extraInfo; 1611cb0ef41Sopenharmony_ci 1621cb0ef41Sopenharmony_ci if(choice<=UCNV_RESET_TO_UNICODE) { 1631cb0ef41Sopenharmony_ci /* reset toUnicode */ 1641cb0ef41Sopenharmony_ci uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32); 1651cb0ef41Sopenharmony_ci 1661cb0ef41Sopenharmony_ci scsu->toUIsSingleByteMode=true; 1671cb0ef41Sopenharmony_ci scsu->toUState=readCommand; 1681cb0ef41Sopenharmony_ci scsu->toUQuoteWindow=scsu->toUDynamicWindow=0; 1691cb0ef41Sopenharmony_ci scsu->toUByteOne=0; 1701cb0ef41Sopenharmony_ci 1711cb0ef41Sopenharmony_ci cnv->toULength=0; 1721cb0ef41Sopenharmony_ci } 1731cb0ef41Sopenharmony_ci if(choice!=UCNV_RESET_TO_UNICODE) { 1741cb0ef41Sopenharmony_ci /* reset fromUnicode */ 1751cb0ef41Sopenharmony_ci uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32); 1761cb0ef41Sopenharmony_ci 1771cb0ef41Sopenharmony_ci scsu->fromUIsSingleByteMode=true; 1781cb0ef41Sopenharmony_ci scsu->fromUDynamicWindow=0; 1791cb0ef41Sopenharmony_ci 1801cb0ef41Sopenharmony_ci scsu->nextWindowUseIndex=0; 1811cb0ef41Sopenharmony_ci switch(scsu->locale) { 1821cb0ef41Sopenharmony_ci case l_ja: 1831cb0ef41Sopenharmony_ci uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8); 1841cb0ef41Sopenharmony_ci break; 1851cb0ef41Sopenharmony_ci default: 1861cb0ef41Sopenharmony_ci uprv_memcpy(scsu->windowUse, initialWindowUse, 8); 1871cb0ef41Sopenharmony_ci break; 1881cb0ef41Sopenharmony_ci } 1891cb0ef41Sopenharmony_ci 1901cb0ef41Sopenharmony_ci cnv->fromUChar32=0; 1911cb0ef41Sopenharmony_ci } 1921cb0ef41Sopenharmony_ci} 1931cb0ef41Sopenharmony_ci 1941cb0ef41Sopenharmony_cistatic void U_CALLCONV 1951cb0ef41Sopenharmony_ci_SCSUOpen(UConverter *cnv, 1961cb0ef41Sopenharmony_ci UConverterLoadArgs *pArgs, 1971cb0ef41Sopenharmony_ci UErrorCode *pErrorCode) { 1981cb0ef41Sopenharmony_ci const char *locale=pArgs->locale; 1991cb0ef41Sopenharmony_ci if(pArgs->onlyTestIsLoadable) { 2001cb0ef41Sopenharmony_ci return; 2011cb0ef41Sopenharmony_ci } 2021cb0ef41Sopenharmony_ci cnv->extraInfo=uprv_malloc(sizeof(SCSUData)); 2031cb0ef41Sopenharmony_ci if(cnv->extraInfo!=nullptr) { 2041cb0ef41Sopenharmony_ci if(locale!=nullptr && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) { 2051cb0ef41Sopenharmony_ci ((SCSUData *)cnv->extraInfo)->locale=l_ja; 2061cb0ef41Sopenharmony_ci } else { 2071cb0ef41Sopenharmony_ci ((SCSUData *)cnv->extraInfo)->locale=lGeneric; 2081cb0ef41Sopenharmony_ci } 2091cb0ef41Sopenharmony_ci _SCSUReset(cnv, UCNV_RESET_BOTH); 2101cb0ef41Sopenharmony_ci } else { 2111cb0ef41Sopenharmony_ci *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 2121cb0ef41Sopenharmony_ci } 2131cb0ef41Sopenharmony_ci 2141cb0ef41Sopenharmony_ci /* Set the substitution character U+fffd as a Unicode string. */ 2151cb0ef41Sopenharmony_ci cnv->subUChars[0]=0xfffd; 2161cb0ef41Sopenharmony_ci cnv->subCharLen=-1; 2171cb0ef41Sopenharmony_ci} 2181cb0ef41Sopenharmony_ci 2191cb0ef41Sopenharmony_cistatic void U_CALLCONV 2201cb0ef41Sopenharmony_ci_SCSUClose(UConverter *cnv) { 2211cb0ef41Sopenharmony_ci if(cnv->extraInfo!=nullptr) { 2221cb0ef41Sopenharmony_ci if(!cnv->isExtraLocal) { 2231cb0ef41Sopenharmony_ci uprv_free(cnv->extraInfo); 2241cb0ef41Sopenharmony_ci } 2251cb0ef41Sopenharmony_ci cnv->extraInfo=nullptr; 2261cb0ef41Sopenharmony_ci } 2271cb0ef41Sopenharmony_ci} 2281cb0ef41Sopenharmony_ci 2291cb0ef41Sopenharmony_ci/* SCSU-to-Unicode conversion functions ------------------------------------- */ 2301cb0ef41Sopenharmony_ci 2311cb0ef41Sopenharmony_cistatic void U_CALLCONV 2321cb0ef41Sopenharmony_ci_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 2331cb0ef41Sopenharmony_ci UErrorCode *pErrorCode) { 2341cb0ef41Sopenharmony_ci UConverter *cnv; 2351cb0ef41Sopenharmony_ci SCSUData *scsu; 2361cb0ef41Sopenharmony_ci const uint8_t *source, *sourceLimit; 2371cb0ef41Sopenharmony_ci char16_t *target; 2381cb0ef41Sopenharmony_ci const char16_t *targetLimit; 2391cb0ef41Sopenharmony_ci int32_t *offsets; 2401cb0ef41Sopenharmony_ci UBool isSingleByteMode; 2411cb0ef41Sopenharmony_ci uint8_t state, byteOne; 2421cb0ef41Sopenharmony_ci int8_t quoteWindow, dynamicWindow; 2431cb0ef41Sopenharmony_ci 2441cb0ef41Sopenharmony_ci int32_t sourceIndex, nextSourceIndex; 2451cb0ef41Sopenharmony_ci 2461cb0ef41Sopenharmony_ci uint8_t b; 2471cb0ef41Sopenharmony_ci 2481cb0ef41Sopenharmony_ci /* set up the local pointers */ 2491cb0ef41Sopenharmony_ci cnv=pArgs->converter; 2501cb0ef41Sopenharmony_ci scsu=(SCSUData *)cnv->extraInfo; 2511cb0ef41Sopenharmony_ci 2521cb0ef41Sopenharmony_ci source=(const uint8_t *)pArgs->source; 2531cb0ef41Sopenharmony_ci sourceLimit=(const uint8_t *)pArgs->sourceLimit; 2541cb0ef41Sopenharmony_ci target=pArgs->target; 2551cb0ef41Sopenharmony_ci targetLimit=pArgs->targetLimit; 2561cb0ef41Sopenharmony_ci offsets=pArgs->offsets; 2571cb0ef41Sopenharmony_ci 2581cb0ef41Sopenharmony_ci /* get the state machine state */ 2591cb0ef41Sopenharmony_ci isSingleByteMode=scsu->toUIsSingleByteMode; 2601cb0ef41Sopenharmony_ci state=scsu->toUState; 2611cb0ef41Sopenharmony_ci quoteWindow=scsu->toUQuoteWindow; 2621cb0ef41Sopenharmony_ci dynamicWindow=scsu->toUDynamicWindow; 2631cb0ef41Sopenharmony_ci byteOne=scsu->toUByteOne; 2641cb0ef41Sopenharmony_ci 2651cb0ef41Sopenharmony_ci /* sourceIndex=-1 if the current character began in the previous buffer */ 2661cb0ef41Sopenharmony_ci sourceIndex=state==readCommand ? 0 : -1; 2671cb0ef41Sopenharmony_ci nextSourceIndex=0; 2681cb0ef41Sopenharmony_ci 2691cb0ef41Sopenharmony_ci /* 2701cb0ef41Sopenharmony_ci * conversion "loop" 2711cb0ef41Sopenharmony_ci * 2721cb0ef41Sopenharmony_ci * For performance, this is not a normal C loop. 2731cb0ef41Sopenharmony_ci * Instead, there are two code blocks for the two SCSU modes. 2741cb0ef41Sopenharmony_ci * The function branches to either one, and a change of the mode is done with a goto to 2751cb0ef41Sopenharmony_ci * the other branch. 2761cb0ef41Sopenharmony_ci * 2771cb0ef41Sopenharmony_ci * Each branch has two conventional loops: 2781cb0ef41Sopenharmony_ci * - a fast-path loop for the most common codes in the mode 2791cb0ef41Sopenharmony_ci * - a loop for all other codes in the mode 2801cb0ef41Sopenharmony_ci * When the fast-path runs into a code that it cannot handle, its loop ends and it 2811cb0ef41Sopenharmony_ci * runs into the following loop to handle the other codes. 2821cb0ef41Sopenharmony_ci * The end of the input or output buffer is also handled by the slower loop. 2831cb0ef41Sopenharmony_ci * The slow loop jumps (goto) to the fast-path loop again as soon as possible. 2841cb0ef41Sopenharmony_ci * 2851cb0ef41Sopenharmony_ci * The callback handling is done by returning with an error code. 2861cb0ef41Sopenharmony_ci * The conversion framework actually calls the callback function. 2871cb0ef41Sopenharmony_ci */ 2881cb0ef41Sopenharmony_ci if(isSingleByteMode) { 2891cb0ef41Sopenharmony_ci /* fast path for single-byte mode */ 2901cb0ef41Sopenharmony_ci if(state==readCommand) { 2911cb0ef41Sopenharmony_cifastSingle: 2921cb0ef41Sopenharmony_ci while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) { 2931cb0ef41Sopenharmony_ci ++source; 2941cb0ef41Sopenharmony_ci ++nextSourceIndex; 2951cb0ef41Sopenharmony_ci if(b<=0x7f) { 2961cb0ef41Sopenharmony_ci /* write US-ASCII graphic character or DEL */ 2971cb0ef41Sopenharmony_ci *target++=(char16_t)b; 2981cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 2991cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 3001cb0ef41Sopenharmony_ci } 3011cb0ef41Sopenharmony_ci } else { 3021cb0ef41Sopenharmony_ci /* write from dynamic window */ 3031cb0ef41Sopenharmony_ci uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); 3041cb0ef41Sopenharmony_ci if(c<=0xffff) { 3051cb0ef41Sopenharmony_ci *target++=(char16_t)c; 3061cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 3071cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 3081cb0ef41Sopenharmony_ci } 3091cb0ef41Sopenharmony_ci } else { 3101cb0ef41Sopenharmony_ci /* output surrogate pair */ 3111cb0ef41Sopenharmony_ci *target++=(char16_t)(0xd7c0+(c>>10)); 3121cb0ef41Sopenharmony_ci if(target<targetLimit) { 3131cb0ef41Sopenharmony_ci *target++=(char16_t)(0xdc00|(c&0x3ff)); 3141cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 3151cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 3161cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 3171cb0ef41Sopenharmony_ci } 3181cb0ef41Sopenharmony_ci } else { 3191cb0ef41Sopenharmony_ci /* target overflow */ 3201cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 3211cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 3221cb0ef41Sopenharmony_ci } 3231cb0ef41Sopenharmony_ci cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff)); 3241cb0ef41Sopenharmony_ci cnv->UCharErrorBufferLength=1; 3251cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 3261cb0ef41Sopenharmony_ci goto endloop; 3271cb0ef41Sopenharmony_ci } 3281cb0ef41Sopenharmony_ci } 3291cb0ef41Sopenharmony_ci } 3301cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 3311cb0ef41Sopenharmony_ci } 3321cb0ef41Sopenharmony_ci } 3331cb0ef41Sopenharmony_ci 3341cb0ef41Sopenharmony_ci /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ 3351cb0ef41Sopenharmony_cisingleByteMode: 3361cb0ef41Sopenharmony_ci while(source<sourceLimit) { 3371cb0ef41Sopenharmony_ci if(target>=targetLimit) { 3381cb0ef41Sopenharmony_ci /* target is full */ 3391cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 3401cb0ef41Sopenharmony_ci break; 3411cb0ef41Sopenharmony_ci } 3421cb0ef41Sopenharmony_ci b=*source++; 3431cb0ef41Sopenharmony_ci ++nextSourceIndex; 3441cb0ef41Sopenharmony_ci switch(state) { 3451cb0ef41Sopenharmony_ci case readCommand: 3461cb0ef41Sopenharmony_ci /* redundant conditions are commented out */ 3471cb0ef41Sopenharmony_ci /* here: b<0x20 because otherwise we would be in fastSingle */ 3481cb0ef41Sopenharmony_ci if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { 3491cb0ef41Sopenharmony_ci /* CR/LF/TAB/NUL */ 3501cb0ef41Sopenharmony_ci *target++=(char16_t)b; 3511cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 3521cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 3531cb0ef41Sopenharmony_ci } 3541cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 3551cb0ef41Sopenharmony_ci goto fastSingle; 3561cb0ef41Sopenharmony_ci } else if(SC0<=b) { 3571cb0ef41Sopenharmony_ci if(b<=SC7) { 3581cb0ef41Sopenharmony_ci dynamicWindow=(int8_t)(b-SC0); 3591cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 3601cb0ef41Sopenharmony_ci goto fastSingle; 3611cb0ef41Sopenharmony_ci } else /* if(SD0<=b && b<=SD7) */ { 3621cb0ef41Sopenharmony_ci dynamicWindow=(int8_t)(b-SD0); 3631cb0ef41Sopenharmony_ci state=defineOne; 3641cb0ef41Sopenharmony_ci } 3651cb0ef41Sopenharmony_ci } else if(/* SQ0<=b && */ b<=SQ7) { 3661cb0ef41Sopenharmony_ci quoteWindow=(int8_t)(b-SQ0); 3671cb0ef41Sopenharmony_ci state=quoteOne; 3681cb0ef41Sopenharmony_ci } else if(b==SDX) { 3691cb0ef41Sopenharmony_ci state=definePairOne; 3701cb0ef41Sopenharmony_ci } else if(b==SQU) { 3711cb0ef41Sopenharmony_ci state=quotePairOne; 3721cb0ef41Sopenharmony_ci } else if(b==SCU) { 3731cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 3741cb0ef41Sopenharmony_ci isSingleByteMode=false; 3751cb0ef41Sopenharmony_ci goto fastUnicode; 3761cb0ef41Sopenharmony_ci } else /* Srs */ { 3771cb0ef41Sopenharmony_ci /* callback(illegal) */ 3781cb0ef41Sopenharmony_ci *pErrorCode=U_ILLEGAL_CHAR_FOUND; 3791cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 3801cb0ef41Sopenharmony_ci cnv->toULength=1; 3811cb0ef41Sopenharmony_ci goto endloop; 3821cb0ef41Sopenharmony_ci } 3831cb0ef41Sopenharmony_ci 3841cb0ef41Sopenharmony_ci /* store the first byte of a multibyte sequence in toUBytes[] */ 3851cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 3861cb0ef41Sopenharmony_ci cnv->toULength=1; 3871cb0ef41Sopenharmony_ci break; 3881cb0ef41Sopenharmony_ci case quotePairOne: 3891cb0ef41Sopenharmony_ci byteOne=b; 3901cb0ef41Sopenharmony_ci cnv->toUBytes[1]=b; 3911cb0ef41Sopenharmony_ci cnv->toULength=2; 3921cb0ef41Sopenharmony_ci state=quotePairTwo; 3931cb0ef41Sopenharmony_ci break; 3941cb0ef41Sopenharmony_ci case quotePairTwo: 3951cb0ef41Sopenharmony_ci *target++=(char16_t)((byteOne<<8)|b); 3961cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 3971cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 3981cb0ef41Sopenharmony_ci } 3991cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 4001cb0ef41Sopenharmony_ci state=readCommand; 4011cb0ef41Sopenharmony_ci goto fastSingle; 4021cb0ef41Sopenharmony_ci case quoteOne: 4031cb0ef41Sopenharmony_ci if(b<0x80) { 4041cb0ef41Sopenharmony_ci /* all static offsets are in the BMP */ 4051cb0ef41Sopenharmony_ci *target++=(char16_t)(staticOffsets[quoteWindow]+b); 4061cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 4071cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 4081cb0ef41Sopenharmony_ci } 4091cb0ef41Sopenharmony_ci } else { 4101cb0ef41Sopenharmony_ci /* write from dynamic window */ 4111cb0ef41Sopenharmony_ci uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); 4121cb0ef41Sopenharmony_ci if(c<=0xffff) { 4131cb0ef41Sopenharmony_ci *target++=(char16_t)c; 4141cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 4151cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 4161cb0ef41Sopenharmony_ci } 4171cb0ef41Sopenharmony_ci } else { 4181cb0ef41Sopenharmony_ci /* output surrogate pair */ 4191cb0ef41Sopenharmony_ci *target++=(char16_t)(0xd7c0+(c>>10)); 4201cb0ef41Sopenharmony_ci if(target<targetLimit) { 4211cb0ef41Sopenharmony_ci *target++=(char16_t)(0xdc00|(c&0x3ff)); 4221cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 4231cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 4241cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 4251cb0ef41Sopenharmony_ci } 4261cb0ef41Sopenharmony_ci } else { 4271cb0ef41Sopenharmony_ci /* target overflow */ 4281cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 4291cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 4301cb0ef41Sopenharmony_ci } 4311cb0ef41Sopenharmony_ci cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff)); 4321cb0ef41Sopenharmony_ci cnv->UCharErrorBufferLength=1; 4331cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 4341cb0ef41Sopenharmony_ci goto endloop; 4351cb0ef41Sopenharmony_ci } 4361cb0ef41Sopenharmony_ci } 4371cb0ef41Sopenharmony_ci } 4381cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 4391cb0ef41Sopenharmony_ci state=readCommand; 4401cb0ef41Sopenharmony_ci goto fastSingle; 4411cb0ef41Sopenharmony_ci case definePairOne: 4421cb0ef41Sopenharmony_ci dynamicWindow=(int8_t)((b>>5)&7); 4431cb0ef41Sopenharmony_ci byteOne=(uint8_t)(b&0x1f); 4441cb0ef41Sopenharmony_ci cnv->toUBytes[1]=b; 4451cb0ef41Sopenharmony_ci cnv->toULength=2; 4461cb0ef41Sopenharmony_ci state=definePairTwo; 4471cb0ef41Sopenharmony_ci break; 4481cb0ef41Sopenharmony_ci case definePairTwo: 4491cb0ef41Sopenharmony_ci scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); 4501cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 4511cb0ef41Sopenharmony_ci state=readCommand; 4521cb0ef41Sopenharmony_ci goto fastSingle; 4531cb0ef41Sopenharmony_ci case defineOne: 4541cb0ef41Sopenharmony_ci if(b==0) { 4551cb0ef41Sopenharmony_ci /* callback(illegal): Reserved window offset value 0 */ 4561cb0ef41Sopenharmony_ci cnv->toUBytes[1]=b; 4571cb0ef41Sopenharmony_ci cnv->toULength=2; 4581cb0ef41Sopenharmony_ci goto endloop; 4591cb0ef41Sopenharmony_ci } else if(b<gapThreshold) { 4601cb0ef41Sopenharmony_ci scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL; 4611cb0ef41Sopenharmony_ci } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { 4621cb0ef41Sopenharmony_ci scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; 4631cb0ef41Sopenharmony_ci } else if(b>=fixedThreshold) { 4641cb0ef41Sopenharmony_ci scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; 4651cb0ef41Sopenharmony_ci } else { 4661cb0ef41Sopenharmony_ci /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ 4671cb0ef41Sopenharmony_ci cnv->toUBytes[1]=b; 4681cb0ef41Sopenharmony_ci cnv->toULength=2; 4691cb0ef41Sopenharmony_ci goto endloop; 4701cb0ef41Sopenharmony_ci } 4711cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 4721cb0ef41Sopenharmony_ci state=readCommand; 4731cb0ef41Sopenharmony_ci goto fastSingle; 4741cb0ef41Sopenharmony_ci } 4751cb0ef41Sopenharmony_ci } 4761cb0ef41Sopenharmony_ci } else { 4771cb0ef41Sopenharmony_ci /* fast path for Unicode mode */ 4781cb0ef41Sopenharmony_ci if(state==readCommand) { 4791cb0ef41Sopenharmony_cifastUnicode: 4801cb0ef41Sopenharmony_ci while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) { 4811cb0ef41Sopenharmony_ci *target++=(char16_t)((b<<8)|source[1]); 4821cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 4831cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 4841cb0ef41Sopenharmony_ci } 4851cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 4861cb0ef41Sopenharmony_ci nextSourceIndex+=2; 4871cb0ef41Sopenharmony_ci source+=2; 4881cb0ef41Sopenharmony_ci } 4891cb0ef41Sopenharmony_ci } 4901cb0ef41Sopenharmony_ci 4911cb0ef41Sopenharmony_ci /* normal state machine for Unicode mode */ 4921cb0ef41Sopenharmony_ci/* unicodeByteMode: */ 4931cb0ef41Sopenharmony_ci while(source<sourceLimit) { 4941cb0ef41Sopenharmony_ci if(target>=targetLimit) { 4951cb0ef41Sopenharmony_ci /* target is full */ 4961cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 4971cb0ef41Sopenharmony_ci break; 4981cb0ef41Sopenharmony_ci } 4991cb0ef41Sopenharmony_ci b=*source++; 5001cb0ef41Sopenharmony_ci ++nextSourceIndex; 5011cb0ef41Sopenharmony_ci switch(state) { 5021cb0ef41Sopenharmony_ci case readCommand: 5031cb0ef41Sopenharmony_ci if((uint8_t)(b-UC0)>(Urs-UC0)) { 5041cb0ef41Sopenharmony_ci byteOne=b; 5051cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 5061cb0ef41Sopenharmony_ci cnv->toULength=1; 5071cb0ef41Sopenharmony_ci state=quotePairTwo; 5081cb0ef41Sopenharmony_ci } else if(/* UC0<=b && */ b<=UC7) { 5091cb0ef41Sopenharmony_ci dynamicWindow=(int8_t)(b-UC0); 5101cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 5111cb0ef41Sopenharmony_ci isSingleByteMode=true; 5121cb0ef41Sopenharmony_ci goto fastSingle; 5131cb0ef41Sopenharmony_ci } else if(/* UD0<=b && */ b<=UD7) { 5141cb0ef41Sopenharmony_ci dynamicWindow=(int8_t)(b-UD0); 5151cb0ef41Sopenharmony_ci isSingleByteMode=true; 5161cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 5171cb0ef41Sopenharmony_ci cnv->toULength=1; 5181cb0ef41Sopenharmony_ci state=defineOne; 5191cb0ef41Sopenharmony_ci goto singleByteMode; 5201cb0ef41Sopenharmony_ci } else if(b==UDX) { 5211cb0ef41Sopenharmony_ci isSingleByteMode=true; 5221cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 5231cb0ef41Sopenharmony_ci cnv->toULength=1; 5241cb0ef41Sopenharmony_ci state=definePairOne; 5251cb0ef41Sopenharmony_ci goto singleByteMode; 5261cb0ef41Sopenharmony_ci } else if(b==UQU) { 5271cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 5281cb0ef41Sopenharmony_ci cnv->toULength=1; 5291cb0ef41Sopenharmony_ci state=quotePairOne; 5301cb0ef41Sopenharmony_ci } else /* Urs */ { 5311cb0ef41Sopenharmony_ci /* callback(illegal) */ 5321cb0ef41Sopenharmony_ci *pErrorCode=U_ILLEGAL_CHAR_FOUND; 5331cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 5341cb0ef41Sopenharmony_ci cnv->toULength=1; 5351cb0ef41Sopenharmony_ci goto endloop; 5361cb0ef41Sopenharmony_ci } 5371cb0ef41Sopenharmony_ci break; 5381cb0ef41Sopenharmony_ci case quotePairOne: 5391cb0ef41Sopenharmony_ci byteOne=b; 5401cb0ef41Sopenharmony_ci cnv->toUBytes[1]=b; 5411cb0ef41Sopenharmony_ci cnv->toULength=2; 5421cb0ef41Sopenharmony_ci state=quotePairTwo; 5431cb0ef41Sopenharmony_ci break; 5441cb0ef41Sopenharmony_ci case quotePairTwo: 5451cb0ef41Sopenharmony_ci *target++=(char16_t)((byteOne<<8)|b); 5461cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 5471cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 5481cb0ef41Sopenharmony_ci } 5491cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 5501cb0ef41Sopenharmony_ci state=readCommand; 5511cb0ef41Sopenharmony_ci goto fastUnicode; 5521cb0ef41Sopenharmony_ci } 5531cb0ef41Sopenharmony_ci } 5541cb0ef41Sopenharmony_ci } 5551cb0ef41Sopenharmony_ciendloop: 5561cb0ef41Sopenharmony_ci 5571cb0ef41Sopenharmony_ci /* set the converter state back into UConverter */ 5581cb0ef41Sopenharmony_ci if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { 5591cb0ef41Sopenharmony_ci /* reset to deal with the next character */ 5601cb0ef41Sopenharmony_ci state=readCommand; 5611cb0ef41Sopenharmony_ci } else if(state==readCommand) { 5621cb0ef41Sopenharmony_ci /* not in a multi-byte sequence, reset toULength */ 5631cb0ef41Sopenharmony_ci cnv->toULength=0; 5641cb0ef41Sopenharmony_ci } 5651cb0ef41Sopenharmony_ci scsu->toUIsSingleByteMode=isSingleByteMode; 5661cb0ef41Sopenharmony_ci scsu->toUState=state; 5671cb0ef41Sopenharmony_ci scsu->toUQuoteWindow=quoteWindow; 5681cb0ef41Sopenharmony_ci scsu->toUDynamicWindow=dynamicWindow; 5691cb0ef41Sopenharmony_ci scsu->toUByteOne=byteOne; 5701cb0ef41Sopenharmony_ci 5711cb0ef41Sopenharmony_ci /* write back the updated pointers */ 5721cb0ef41Sopenharmony_ci pArgs->source=(const char *)source; 5731cb0ef41Sopenharmony_ci pArgs->target=target; 5741cb0ef41Sopenharmony_ci pArgs->offsets=offsets; 5751cb0ef41Sopenharmony_ci return; 5761cb0ef41Sopenharmony_ci} 5771cb0ef41Sopenharmony_ci 5781cb0ef41Sopenharmony_ci/* 5791cb0ef41Sopenharmony_ci * Identical to _SCSUToUnicodeWithOffsets but without offset handling. 5801cb0ef41Sopenharmony_ci * If a change is made in the original function, then either 5811cb0ef41Sopenharmony_ci * change this function the same way or 5821cb0ef41Sopenharmony_ci * re-copy the original function and remove the variables 5831cb0ef41Sopenharmony_ci * offsets, sourceIndex, and nextSourceIndex. 5841cb0ef41Sopenharmony_ci */ 5851cb0ef41Sopenharmony_cistatic void U_CALLCONV 5861cb0ef41Sopenharmony_ci_SCSUToUnicode(UConverterToUnicodeArgs *pArgs, 5871cb0ef41Sopenharmony_ci UErrorCode *pErrorCode) { 5881cb0ef41Sopenharmony_ci UConverter *cnv; 5891cb0ef41Sopenharmony_ci SCSUData *scsu; 5901cb0ef41Sopenharmony_ci const uint8_t *source, *sourceLimit; 5911cb0ef41Sopenharmony_ci char16_t *target; 5921cb0ef41Sopenharmony_ci const char16_t *targetLimit; 5931cb0ef41Sopenharmony_ci UBool isSingleByteMode; 5941cb0ef41Sopenharmony_ci uint8_t state, byteOne; 5951cb0ef41Sopenharmony_ci int8_t quoteWindow, dynamicWindow; 5961cb0ef41Sopenharmony_ci 5971cb0ef41Sopenharmony_ci uint8_t b; 5981cb0ef41Sopenharmony_ci 5991cb0ef41Sopenharmony_ci /* set up the local pointers */ 6001cb0ef41Sopenharmony_ci cnv=pArgs->converter; 6011cb0ef41Sopenharmony_ci scsu=(SCSUData *)cnv->extraInfo; 6021cb0ef41Sopenharmony_ci 6031cb0ef41Sopenharmony_ci source=(const uint8_t *)pArgs->source; 6041cb0ef41Sopenharmony_ci sourceLimit=(const uint8_t *)pArgs->sourceLimit; 6051cb0ef41Sopenharmony_ci target=pArgs->target; 6061cb0ef41Sopenharmony_ci targetLimit=pArgs->targetLimit; 6071cb0ef41Sopenharmony_ci 6081cb0ef41Sopenharmony_ci /* get the state machine state */ 6091cb0ef41Sopenharmony_ci isSingleByteMode=scsu->toUIsSingleByteMode; 6101cb0ef41Sopenharmony_ci state=scsu->toUState; 6111cb0ef41Sopenharmony_ci quoteWindow=scsu->toUQuoteWindow; 6121cb0ef41Sopenharmony_ci dynamicWindow=scsu->toUDynamicWindow; 6131cb0ef41Sopenharmony_ci byteOne=scsu->toUByteOne; 6141cb0ef41Sopenharmony_ci 6151cb0ef41Sopenharmony_ci /* 6161cb0ef41Sopenharmony_ci * conversion "loop" 6171cb0ef41Sopenharmony_ci * 6181cb0ef41Sopenharmony_ci * For performance, this is not a normal C loop. 6191cb0ef41Sopenharmony_ci * Instead, there are two code blocks for the two SCSU modes. 6201cb0ef41Sopenharmony_ci * The function branches to either one, and a change of the mode is done with a goto to 6211cb0ef41Sopenharmony_ci * the other branch. 6221cb0ef41Sopenharmony_ci * 6231cb0ef41Sopenharmony_ci * Each branch has two conventional loops: 6241cb0ef41Sopenharmony_ci * - a fast-path loop for the most common codes in the mode 6251cb0ef41Sopenharmony_ci * - a loop for all other codes in the mode 6261cb0ef41Sopenharmony_ci * When the fast-path runs into a code that it cannot handle, its loop ends and it 6271cb0ef41Sopenharmony_ci * runs into the following loop to handle the other codes. 6281cb0ef41Sopenharmony_ci * The end of the input or output buffer is also handled by the slower loop. 6291cb0ef41Sopenharmony_ci * The slow loop jumps (goto) to the fast-path loop again as soon as possible. 6301cb0ef41Sopenharmony_ci * 6311cb0ef41Sopenharmony_ci * The callback handling is done by returning with an error code. 6321cb0ef41Sopenharmony_ci * The conversion framework actually calls the callback function. 6331cb0ef41Sopenharmony_ci */ 6341cb0ef41Sopenharmony_ci if(isSingleByteMode) { 6351cb0ef41Sopenharmony_ci /* fast path for single-byte mode */ 6361cb0ef41Sopenharmony_ci if(state==readCommand) { 6371cb0ef41Sopenharmony_cifastSingle: 6381cb0ef41Sopenharmony_ci while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) { 6391cb0ef41Sopenharmony_ci ++source; 6401cb0ef41Sopenharmony_ci if(b<=0x7f) { 6411cb0ef41Sopenharmony_ci /* write US-ASCII graphic character or DEL */ 6421cb0ef41Sopenharmony_ci *target++=(char16_t)b; 6431cb0ef41Sopenharmony_ci } else { 6441cb0ef41Sopenharmony_ci /* write from dynamic window */ 6451cb0ef41Sopenharmony_ci uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); 6461cb0ef41Sopenharmony_ci if(c<=0xffff) { 6471cb0ef41Sopenharmony_ci *target++=(char16_t)c; 6481cb0ef41Sopenharmony_ci } else { 6491cb0ef41Sopenharmony_ci /* output surrogate pair */ 6501cb0ef41Sopenharmony_ci *target++=(char16_t)(0xd7c0+(c>>10)); 6511cb0ef41Sopenharmony_ci if(target<targetLimit) { 6521cb0ef41Sopenharmony_ci *target++=(char16_t)(0xdc00|(c&0x3ff)); 6531cb0ef41Sopenharmony_ci } else { 6541cb0ef41Sopenharmony_ci /* target overflow */ 6551cb0ef41Sopenharmony_ci cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff)); 6561cb0ef41Sopenharmony_ci cnv->UCharErrorBufferLength=1; 6571cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 6581cb0ef41Sopenharmony_ci goto endloop; 6591cb0ef41Sopenharmony_ci } 6601cb0ef41Sopenharmony_ci } 6611cb0ef41Sopenharmony_ci } 6621cb0ef41Sopenharmony_ci } 6631cb0ef41Sopenharmony_ci } 6641cb0ef41Sopenharmony_ci 6651cb0ef41Sopenharmony_ci /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ 6661cb0ef41Sopenharmony_cisingleByteMode: 6671cb0ef41Sopenharmony_ci while(source<sourceLimit) { 6681cb0ef41Sopenharmony_ci if(target>=targetLimit) { 6691cb0ef41Sopenharmony_ci /* target is full */ 6701cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 6711cb0ef41Sopenharmony_ci break; 6721cb0ef41Sopenharmony_ci } 6731cb0ef41Sopenharmony_ci b=*source++; 6741cb0ef41Sopenharmony_ci switch(state) { 6751cb0ef41Sopenharmony_ci case readCommand: 6761cb0ef41Sopenharmony_ci /* redundant conditions are commented out */ 6771cb0ef41Sopenharmony_ci /* here: b<0x20 because otherwise we would be in fastSingle */ 6781cb0ef41Sopenharmony_ci if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { 6791cb0ef41Sopenharmony_ci /* CR/LF/TAB/NUL */ 6801cb0ef41Sopenharmony_ci *target++=(char16_t)b; 6811cb0ef41Sopenharmony_ci goto fastSingle; 6821cb0ef41Sopenharmony_ci } else if(SC0<=b) { 6831cb0ef41Sopenharmony_ci if(b<=SC7) { 6841cb0ef41Sopenharmony_ci dynamicWindow=(int8_t)(b-SC0); 6851cb0ef41Sopenharmony_ci goto fastSingle; 6861cb0ef41Sopenharmony_ci } else /* if(SD0<=b && b<=SD7) */ { 6871cb0ef41Sopenharmony_ci dynamicWindow=(int8_t)(b-SD0); 6881cb0ef41Sopenharmony_ci state=defineOne; 6891cb0ef41Sopenharmony_ci } 6901cb0ef41Sopenharmony_ci } else if(/* SQ0<=b && */ b<=SQ7) { 6911cb0ef41Sopenharmony_ci quoteWindow=(int8_t)(b-SQ0); 6921cb0ef41Sopenharmony_ci state=quoteOne; 6931cb0ef41Sopenharmony_ci } else if(b==SDX) { 6941cb0ef41Sopenharmony_ci state=definePairOne; 6951cb0ef41Sopenharmony_ci } else if(b==SQU) { 6961cb0ef41Sopenharmony_ci state=quotePairOne; 6971cb0ef41Sopenharmony_ci } else if(b==SCU) { 6981cb0ef41Sopenharmony_ci isSingleByteMode=false; 6991cb0ef41Sopenharmony_ci goto fastUnicode; 7001cb0ef41Sopenharmony_ci } else /* Srs */ { 7011cb0ef41Sopenharmony_ci /* callback(illegal) */ 7021cb0ef41Sopenharmony_ci *pErrorCode=U_ILLEGAL_CHAR_FOUND; 7031cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 7041cb0ef41Sopenharmony_ci cnv->toULength=1; 7051cb0ef41Sopenharmony_ci goto endloop; 7061cb0ef41Sopenharmony_ci } 7071cb0ef41Sopenharmony_ci 7081cb0ef41Sopenharmony_ci /* store the first byte of a multibyte sequence in toUBytes[] */ 7091cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 7101cb0ef41Sopenharmony_ci cnv->toULength=1; 7111cb0ef41Sopenharmony_ci break; 7121cb0ef41Sopenharmony_ci case quotePairOne: 7131cb0ef41Sopenharmony_ci byteOne=b; 7141cb0ef41Sopenharmony_ci cnv->toUBytes[1]=b; 7151cb0ef41Sopenharmony_ci cnv->toULength=2; 7161cb0ef41Sopenharmony_ci state=quotePairTwo; 7171cb0ef41Sopenharmony_ci break; 7181cb0ef41Sopenharmony_ci case quotePairTwo: 7191cb0ef41Sopenharmony_ci *target++=(char16_t)((byteOne<<8)|b); 7201cb0ef41Sopenharmony_ci state=readCommand; 7211cb0ef41Sopenharmony_ci goto fastSingle; 7221cb0ef41Sopenharmony_ci case quoteOne: 7231cb0ef41Sopenharmony_ci if(b<0x80) { 7241cb0ef41Sopenharmony_ci /* all static offsets are in the BMP */ 7251cb0ef41Sopenharmony_ci *target++=(char16_t)(staticOffsets[quoteWindow]+b); 7261cb0ef41Sopenharmony_ci } else { 7271cb0ef41Sopenharmony_ci /* write from dynamic window */ 7281cb0ef41Sopenharmony_ci uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); 7291cb0ef41Sopenharmony_ci if(c<=0xffff) { 7301cb0ef41Sopenharmony_ci *target++=(char16_t)c; 7311cb0ef41Sopenharmony_ci } else { 7321cb0ef41Sopenharmony_ci /* output surrogate pair */ 7331cb0ef41Sopenharmony_ci *target++=(char16_t)(0xd7c0+(c>>10)); 7341cb0ef41Sopenharmony_ci if(target<targetLimit) { 7351cb0ef41Sopenharmony_ci *target++=(char16_t)(0xdc00|(c&0x3ff)); 7361cb0ef41Sopenharmony_ci } else { 7371cb0ef41Sopenharmony_ci /* target overflow */ 7381cb0ef41Sopenharmony_ci cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff)); 7391cb0ef41Sopenharmony_ci cnv->UCharErrorBufferLength=1; 7401cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 7411cb0ef41Sopenharmony_ci goto endloop; 7421cb0ef41Sopenharmony_ci } 7431cb0ef41Sopenharmony_ci } 7441cb0ef41Sopenharmony_ci } 7451cb0ef41Sopenharmony_ci state=readCommand; 7461cb0ef41Sopenharmony_ci goto fastSingle; 7471cb0ef41Sopenharmony_ci case definePairOne: 7481cb0ef41Sopenharmony_ci dynamicWindow=(int8_t)((b>>5)&7); 7491cb0ef41Sopenharmony_ci byteOne=(uint8_t)(b&0x1f); 7501cb0ef41Sopenharmony_ci cnv->toUBytes[1]=b; 7511cb0ef41Sopenharmony_ci cnv->toULength=2; 7521cb0ef41Sopenharmony_ci state=definePairTwo; 7531cb0ef41Sopenharmony_ci break; 7541cb0ef41Sopenharmony_ci case definePairTwo: 7551cb0ef41Sopenharmony_ci scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); 7561cb0ef41Sopenharmony_ci state=readCommand; 7571cb0ef41Sopenharmony_ci goto fastSingle; 7581cb0ef41Sopenharmony_ci case defineOne: 7591cb0ef41Sopenharmony_ci if(b==0) { 7601cb0ef41Sopenharmony_ci /* callback(illegal): Reserved window offset value 0 */ 7611cb0ef41Sopenharmony_ci cnv->toUBytes[1]=b; 7621cb0ef41Sopenharmony_ci cnv->toULength=2; 7631cb0ef41Sopenharmony_ci goto endloop; 7641cb0ef41Sopenharmony_ci } else if(b<gapThreshold) { 7651cb0ef41Sopenharmony_ci scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL; 7661cb0ef41Sopenharmony_ci } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { 7671cb0ef41Sopenharmony_ci scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; 7681cb0ef41Sopenharmony_ci } else if(b>=fixedThreshold) { 7691cb0ef41Sopenharmony_ci scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; 7701cb0ef41Sopenharmony_ci } else { 7711cb0ef41Sopenharmony_ci /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ 7721cb0ef41Sopenharmony_ci cnv->toUBytes[1]=b; 7731cb0ef41Sopenharmony_ci cnv->toULength=2; 7741cb0ef41Sopenharmony_ci goto endloop; 7751cb0ef41Sopenharmony_ci } 7761cb0ef41Sopenharmony_ci state=readCommand; 7771cb0ef41Sopenharmony_ci goto fastSingle; 7781cb0ef41Sopenharmony_ci } 7791cb0ef41Sopenharmony_ci } 7801cb0ef41Sopenharmony_ci } else { 7811cb0ef41Sopenharmony_ci /* fast path for Unicode mode */ 7821cb0ef41Sopenharmony_ci if(state==readCommand) { 7831cb0ef41Sopenharmony_cifastUnicode: 7841cb0ef41Sopenharmony_ci while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) { 7851cb0ef41Sopenharmony_ci *target++=(char16_t)((b<<8)|source[1]); 7861cb0ef41Sopenharmony_ci source+=2; 7871cb0ef41Sopenharmony_ci } 7881cb0ef41Sopenharmony_ci } 7891cb0ef41Sopenharmony_ci 7901cb0ef41Sopenharmony_ci /* normal state machine for Unicode mode */ 7911cb0ef41Sopenharmony_ci/* unicodeByteMode: */ 7921cb0ef41Sopenharmony_ci while(source<sourceLimit) { 7931cb0ef41Sopenharmony_ci if(target>=targetLimit) { 7941cb0ef41Sopenharmony_ci /* target is full */ 7951cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 7961cb0ef41Sopenharmony_ci break; 7971cb0ef41Sopenharmony_ci } 7981cb0ef41Sopenharmony_ci b=*source++; 7991cb0ef41Sopenharmony_ci switch(state) { 8001cb0ef41Sopenharmony_ci case readCommand: 8011cb0ef41Sopenharmony_ci if((uint8_t)(b-UC0)>(Urs-UC0)) { 8021cb0ef41Sopenharmony_ci byteOne=b; 8031cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 8041cb0ef41Sopenharmony_ci cnv->toULength=1; 8051cb0ef41Sopenharmony_ci state=quotePairTwo; 8061cb0ef41Sopenharmony_ci } else if(/* UC0<=b && */ b<=UC7) { 8071cb0ef41Sopenharmony_ci dynamicWindow=(int8_t)(b-UC0); 8081cb0ef41Sopenharmony_ci isSingleByteMode=true; 8091cb0ef41Sopenharmony_ci goto fastSingle; 8101cb0ef41Sopenharmony_ci } else if(/* UD0<=b && */ b<=UD7) { 8111cb0ef41Sopenharmony_ci dynamicWindow=(int8_t)(b-UD0); 8121cb0ef41Sopenharmony_ci isSingleByteMode=true; 8131cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 8141cb0ef41Sopenharmony_ci cnv->toULength=1; 8151cb0ef41Sopenharmony_ci state=defineOne; 8161cb0ef41Sopenharmony_ci goto singleByteMode; 8171cb0ef41Sopenharmony_ci } else if(b==UDX) { 8181cb0ef41Sopenharmony_ci isSingleByteMode=true; 8191cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 8201cb0ef41Sopenharmony_ci cnv->toULength=1; 8211cb0ef41Sopenharmony_ci state=definePairOne; 8221cb0ef41Sopenharmony_ci goto singleByteMode; 8231cb0ef41Sopenharmony_ci } else if(b==UQU) { 8241cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 8251cb0ef41Sopenharmony_ci cnv->toULength=1; 8261cb0ef41Sopenharmony_ci state=quotePairOne; 8271cb0ef41Sopenharmony_ci } else /* Urs */ { 8281cb0ef41Sopenharmony_ci /* callback(illegal) */ 8291cb0ef41Sopenharmony_ci *pErrorCode=U_ILLEGAL_CHAR_FOUND; 8301cb0ef41Sopenharmony_ci cnv->toUBytes[0]=b; 8311cb0ef41Sopenharmony_ci cnv->toULength=1; 8321cb0ef41Sopenharmony_ci goto endloop; 8331cb0ef41Sopenharmony_ci } 8341cb0ef41Sopenharmony_ci break; 8351cb0ef41Sopenharmony_ci case quotePairOne: 8361cb0ef41Sopenharmony_ci byteOne=b; 8371cb0ef41Sopenharmony_ci cnv->toUBytes[1]=b; 8381cb0ef41Sopenharmony_ci cnv->toULength=2; 8391cb0ef41Sopenharmony_ci state=quotePairTwo; 8401cb0ef41Sopenharmony_ci break; 8411cb0ef41Sopenharmony_ci case quotePairTwo: 8421cb0ef41Sopenharmony_ci *target++=(char16_t)((byteOne<<8)|b); 8431cb0ef41Sopenharmony_ci state=readCommand; 8441cb0ef41Sopenharmony_ci goto fastUnicode; 8451cb0ef41Sopenharmony_ci } 8461cb0ef41Sopenharmony_ci } 8471cb0ef41Sopenharmony_ci } 8481cb0ef41Sopenharmony_ciendloop: 8491cb0ef41Sopenharmony_ci 8501cb0ef41Sopenharmony_ci /* set the converter state back into UConverter */ 8511cb0ef41Sopenharmony_ci if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { 8521cb0ef41Sopenharmony_ci /* reset to deal with the next character */ 8531cb0ef41Sopenharmony_ci state=readCommand; 8541cb0ef41Sopenharmony_ci } else if(state==readCommand) { 8551cb0ef41Sopenharmony_ci /* not in a multi-byte sequence, reset toULength */ 8561cb0ef41Sopenharmony_ci cnv->toULength=0; 8571cb0ef41Sopenharmony_ci } 8581cb0ef41Sopenharmony_ci scsu->toUIsSingleByteMode=isSingleByteMode; 8591cb0ef41Sopenharmony_ci scsu->toUState=state; 8601cb0ef41Sopenharmony_ci scsu->toUQuoteWindow=quoteWindow; 8611cb0ef41Sopenharmony_ci scsu->toUDynamicWindow=dynamicWindow; 8621cb0ef41Sopenharmony_ci scsu->toUByteOne=byteOne; 8631cb0ef41Sopenharmony_ci 8641cb0ef41Sopenharmony_ci /* write back the updated pointers */ 8651cb0ef41Sopenharmony_ci pArgs->source=(const char *)source; 8661cb0ef41Sopenharmony_ci pArgs->target=target; 8671cb0ef41Sopenharmony_ci return; 8681cb0ef41Sopenharmony_ci} 8691cb0ef41Sopenharmony_ciU_CDECL_END 8701cb0ef41Sopenharmony_ci/* SCSU-from-Unicode conversion functions ----------------------------------- */ 8711cb0ef41Sopenharmony_ci 8721cb0ef41Sopenharmony_ci/* 8731cb0ef41Sopenharmony_ci * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve 8741cb0ef41Sopenharmony_ci * reasonable results. The lookahead is minimal. 8751cb0ef41Sopenharmony_ci * Many cases are simple: 8761cb0ef41Sopenharmony_ci * A character fits directly into the current mode, a dynamic or static window, 8771cb0ef41Sopenharmony_ci * or is not compressible. These cases are tested first. 8781cb0ef41Sopenharmony_ci * Real compression heuristics are applied to the rest, in code branches for 8791cb0ef41Sopenharmony_ci * single/Unicode mode and BMP/supplementary code points. 8801cb0ef41Sopenharmony_ci * The heuristics used here are extremely simple. 8811cb0ef41Sopenharmony_ci */ 8821cb0ef41Sopenharmony_ci 8831cb0ef41Sopenharmony_ci/* get the number of the window that this character is in, or -1 */ 8841cb0ef41Sopenharmony_cistatic int8_t 8851cb0ef41Sopenharmony_cigetWindow(const uint32_t offsets[8], uint32_t c) { 8861cb0ef41Sopenharmony_ci int i; 8871cb0ef41Sopenharmony_ci for(i=0; i<8; ++i) { 8881cb0ef41Sopenharmony_ci if((uint32_t)(c-offsets[i])<=0x7f) { 8891cb0ef41Sopenharmony_ci return (int8_t)(i); 8901cb0ef41Sopenharmony_ci } 8911cb0ef41Sopenharmony_ci } 8921cb0ef41Sopenharmony_ci return -1; 8931cb0ef41Sopenharmony_ci} 8941cb0ef41Sopenharmony_ci 8951cb0ef41Sopenharmony_ci/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */ 8961cb0ef41Sopenharmony_cistatic UBool 8971cb0ef41Sopenharmony_ciisInOffsetWindowOrDirect(uint32_t offset, uint32_t c) { 8981cb0ef41Sopenharmony_ci return (UBool)(c<=offset+0x7f && 8991cb0ef41Sopenharmony_ci (c>=offset || (c<=0x7f && 9001cb0ef41Sopenharmony_ci (c>=0x20 || (1UL<<c)&0x2601)))); 9011cb0ef41Sopenharmony_ci /* binary 0010 0110 0000 0001, 9021cb0ef41Sopenharmony_ci check for b==0xd || b==0xa || b==9 || b==0 */ 9031cb0ef41Sopenharmony_ci} 9041cb0ef41Sopenharmony_ci 9051cb0ef41Sopenharmony_ci/* 9061cb0ef41Sopenharmony_ci * getNextDynamicWindow returns the next dynamic window to be redefined 9071cb0ef41Sopenharmony_ci */ 9081cb0ef41Sopenharmony_cistatic int8_t 9091cb0ef41Sopenharmony_cigetNextDynamicWindow(SCSUData *scsu) { 9101cb0ef41Sopenharmony_ci int8_t window=scsu->windowUse[scsu->nextWindowUseIndex]; 9111cb0ef41Sopenharmony_ci if(++scsu->nextWindowUseIndex==8) { 9121cb0ef41Sopenharmony_ci scsu->nextWindowUseIndex=0; 9131cb0ef41Sopenharmony_ci } 9141cb0ef41Sopenharmony_ci return window; 9151cb0ef41Sopenharmony_ci} 9161cb0ef41Sopenharmony_ci 9171cb0ef41Sopenharmony_ci/* 9181cb0ef41Sopenharmony_ci * useDynamicWindow() adjusts 9191cb0ef41Sopenharmony_ci * windowUse[] and nextWindowUseIndex for the algorithm to choose 9201cb0ef41Sopenharmony_ci * the next dynamic window to be defined; 9211cb0ef41Sopenharmony_ci * a subclass may override it and provide its own algorithm. 9221cb0ef41Sopenharmony_ci */ 9231cb0ef41Sopenharmony_cistatic void 9241cb0ef41Sopenharmony_ciuseDynamicWindow(SCSUData *scsu, int8_t window) { 9251cb0ef41Sopenharmony_ci /* 9261cb0ef41Sopenharmony_ci * move the existing window, which just became the most recently used one, 9271cb0ef41Sopenharmony_ci * up in windowUse[] to nextWindowUseIndex-1 9281cb0ef41Sopenharmony_ci */ 9291cb0ef41Sopenharmony_ci 9301cb0ef41Sopenharmony_ci /* first, find the index of the window - backwards to favor the more recently used windows */ 9311cb0ef41Sopenharmony_ci int i, j; 9321cb0ef41Sopenharmony_ci 9331cb0ef41Sopenharmony_ci i=scsu->nextWindowUseIndex; 9341cb0ef41Sopenharmony_ci do { 9351cb0ef41Sopenharmony_ci if(--i<0) { 9361cb0ef41Sopenharmony_ci i=7; 9371cb0ef41Sopenharmony_ci } 9381cb0ef41Sopenharmony_ci } while(scsu->windowUse[i]!=window); 9391cb0ef41Sopenharmony_ci 9401cb0ef41Sopenharmony_ci /* now copy each windowUse[i+1] to [i] */ 9411cb0ef41Sopenharmony_ci j=i+1; 9421cb0ef41Sopenharmony_ci if(j==8) { 9431cb0ef41Sopenharmony_ci j=0; 9441cb0ef41Sopenharmony_ci } 9451cb0ef41Sopenharmony_ci while(j!=scsu->nextWindowUseIndex) { 9461cb0ef41Sopenharmony_ci scsu->windowUse[i]=scsu->windowUse[j]; 9471cb0ef41Sopenharmony_ci i=j; 9481cb0ef41Sopenharmony_ci if(++j==8) { j=0; } 9491cb0ef41Sopenharmony_ci } 9501cb0ef41Sopenharmony_ci 9511cb0ef41Sopenharmony_ci /* finally, set the window into the most recently used index */ 9521cb0ef41Sopenharmony_ci scsu->windowUse[i]=window; 9531cb0ef41Sopenharmony_ci} 9541cb0ef41Sopenharmony_ci 9551cb0ef41Sopenharmony_ci/* 9561cb0ef41Sopenharmony_ci * calculate the offset and the code for a dynamic window that contains the character 9571cb0ef41Sopenharmony_ci * takes fixed offsets into account 9581cb0ef41Sopenharmony_ci * the offset of the window is stored in the offset variable, 9591cb0ef41Sopenharmony_ci * the code is returned 9601cb0ef41Sopenharmony_ci * 9611cb0ef41Sopenharmony_ci * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code 9621cb0ef41Sopenharmony_ci */ 9631cb0ef41Sopenharmony_cistatic int 9641cb0ef41Sopenharmony_cigetDynamicOffset(uint32_t c, uint32_t *pOffset) { 9651cb0ef41Sopenharmony_ci int i; 9661cb0ef41Sopenharmony_ci 9671cb0ef41Sopenharmony_ci for(i=0; i<7; ++i) { 9681cb0ef41Sopenharmony_ci if((uint32_t)(c-fixedOffsets[i])<=0x7f) { 9691cb0ef41Sopenharmony_ci *pOffset=fixedOffsets[i]; 9701cb0ef41Sopenharmony_ci return 0xf9+i; 9711cb0ef41Sopenharmony_ci } 9721cb0ef41Sopenharmony_ci } 9731cb0ef41Sopenharmony_ci 9741cb0ef41Sopenharmony_ci if(c<0x80) { 9751cb0ef41Sopenharmony_ci /* No dynamic window for US-ASCII. */ 9761cb0ef41Sopenharmony_ci return -1; 9771cb0ef41Sopenharmony_ci } else if(c<0x3400 || 9781cb0ef41Sopenharmony_ci (uint32_t)(c-0x10000)<(0x14000-0x10000) || 9791cb0ef41Sopenharmony_ci (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000) 9801cb0ef41Sopenharmony_ci ) { 9811cb0ef41Sopenharmony_ci /* This character is in a code range for a "small", i.e., reasonably windowable, script. */ 9821cb0ef41Sopenharmony_ci *pOffset=c&0x7fffff80; 9831cb0ef41Sopenharmony_ci return (int)(c>>7); 9841cb0ef41Sopenharmony_ci } else if(0xe000<=c && c!=0xfeff && c<0xfff0) { 9851cb0ef41Sopenharmony_ci /* For these characters we need to take the gapOffset into account. */ 9861cb0ef41Sopenharmony_ci *pOffset=c&0x7fffff80; 9871cb0ef41Sopenharmony_ci return (int)((c-gapOffset)>>7); 9881cb0ef41Sopenharmony_ci } else { 9891cb0ef41Sopenharmony_ci return -1; 9901cb0ef41Sopenharmony_ci } 9911cb0ef41Sopenharmony_ci} 9921cb0ef41Sopenharmony_ciU_CDECL_BEGIN 9931cb0ef41Sopenharmony_ci/* 9941cb0ef41Sopenharmony_ci * Idea for compression: 9951cb0ef41Sopenharmony_ci * - save SCSUData and other state before really starting work 9961cb0ef41Sopenharmony_ci * - at endloop, see if compression could be better with just unicode mode 9971cb0ef41Sopenharmony_ci * - don't do this if a callback has been called 9981cb0ef41Sopenharmony_ci * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning 9991cb0ef41Sopenharmony_ci * - different buffer handling! 10001cb0ef41Sopenharmony_ci * 10011cb0ef41Sopenharmony_ci * Drawback or need for corrective handling: 10021cb0ef41Sopenharmony_ci * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and 10031cb0ef41Sopenharmony_ci * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible 10041cb0ef41Sopenharmony_ci * not only for compression but also for HTML/XML documents with following charset/encoding announcers. 10051cb0ef41Sopenharmony_ci * 10061cb0ef41Sopenharmony_ci * How to achieve both? 10071cb0ef41Sopenharmony_ci * - Only replace the result after an SDX or SCU? 10081cb0ef41Sopenharmony_ci */ 10091cb0ef41Sopenharmony_ci 10101cb0ef41Sopenharmony_cistatic void U_CALLCONV 10111cb0ef41Sopenharmony_ci_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 10121cb0ef41Sopenharmony_ci UErrorCode *pErrorCode) { 10131cb0ef41Sopenharmony_ci UConverter *cnv; 10141cb0ef41Sopenharmony_ci SCSUData *scsu; 10151cb0ef41Sopenharmony_ci const char16_t *source, *sourceLimit; 10161cb0ef41Sopenharmony_ci uint8_t *target; 10171cb0ef41Sopenharmony_ci int32_t targetCapacity; 10181cb0ef41Sopenharmony_ci int32_t *offsets; 10191cb0ef41Sopenharmony_ci 10201cb0ef41Sopenharmony_ci UBool isSingleByteMode; 10211cb0ef41Sopenharmony_ci uint8_t dynamicWindow; 10221cb0ef41Sopenharmony_ci uint32_t currentOffset; 10231cb0ef41Sopenharmony_ci 10241cb0ef41Sopenharmony_ci uint32_t c, delta; 10251cb0ef41Sopenharmony_ci 10261cb0ef41Sopenharmony_ci int32_t sourceIndex, nextSourceIndex; 10271cb0ef41Sopenharmony_ci 10281cb0ef41Sopenharmony_ci int32_t length; 10291cb0ef41Sopenharmony_ci 10301cb0ef41Sopenharmony_ci /* variables for compression heuristics */ 10311cb0ef41Sopenharmony_ci uint32_t offset; 10321cb0ef41Sopenharmony_ci char16_t lead, trail; 10331cb0ef41Sopenharmony_ci int code; 10341cb0ef41Sopenharmony_ci int8_t window; 10351cb0ef41Sopenharmony_ci 10361cb0ef41Sopenharmony_ci /* set up the local pointers */ 10371cb0ef41Sopenharmony_ci cnv=pArgs->converter; 10381cb0ef41Sopenharmony_ci scsu=(SCSUData *)cnv->extraInfo; 10391cb0ef41Sopenharmony_ci 10401cb0ef41Sopenharmony_ci /* set up the local pointers */ 10411cb0ef41Sopenharmony_ci source=pArgs->source; 10421cb0ef41Sopenharmony_ci sourceLimit=pArgs->sourceLimit; 10431cb0ef41Sopenharmony_ci target=(uint8_t *)pArgs->target; 10441cb0ef41Sopenharmony_ci targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 10451cb0ef41Sopenharmony_ci offsets=pArgs->offsets; 10461cb0ef41Sopenharmony_ci 10471cb0ef41Sopenharmony_ci /* get the state machine state */ 10481cb0ef41Sopenharmony_ci isSingleByteMode=scsu->fromUIsSingleByteMode; 10491cb0ef41Sopenharmony_ci dynamicWindow=scsu->fromUDynamicWindow; 10501cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 10511cb0ef41Sopenharmony_ci 10521cb0ef41Sopenharmony_ci c=cnv->fromUChar32; 10531cb0ef41Sopenharmony_ci 10541cb0ef41Sopenharmony_ci /* sourceIndex=-1 if the current character began in the previous buffer */ 10551cb0ef41Sopenharmony_ci sourceIndex= c==0 ? 0 : -1; 10561cb0ef41Sopenharmony_ci nextSourceIndex=0; 10571cb0ef41Sopenharmony_ci 10581cb0ef41Sopenharmony_ci /* similar conversion "loop" as in toUnicode */ 10591cb0ef41Sopenharmony_ciloop: 10601cb0ef41Sopenharmony_ci if(isSingleByteMode) { 10611cb0ef41Sopenharmony_ci if(c!=0 && targetCapacity>0) { 10621cb0ef41Sopenharmony_ci goto getTrailSingle; 10631cb0ef41Sopenharmony_ci } 10641cb0ef41Sopenharmony_ci 10651cb0ef41Sopenharmony_ci /* state machine for single-byte mode */ 10661cb0ef41Sopenharmony_ci/* singleByteMode: */ 10671cb0ef41Sopenharmony_ci while(source<sourceLimit) { 10681cb0ef41Sopenharmony_ci if(targetCapacity<=0) { 10691cb0ef41Sopenharmony_ci /* target is full */ 10701cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 10711cb0ef41Sopenharmony_ci break; 10721cb0ef41Sopenharmony_ci } 10731cb0ef41Sopenharmony_ci c=*source++; 10741cb0ef41Sopenharmony_ci ++nextSourceIndex; 10751cb0ef41Sopenharmony_ci 10761cb0ef41Sopenharmony_ci if((c-0x20)<=0x5f) { 10771cb0ef41Sopenharmony_ci /* pass US-ASCII graphic character through */ 10781cb0ef41Sopenharmony_ci *target++=(uint8_t)c; 10791cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 10801cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 10811cb0ef41Sopenharmony_ci } 10821cb0ef41Sopenharmony_ci --targetCapacity; 10831cb0ef41Sopenharmony_ci } else if(c<0x20) { 10841cb0ef41Sopenharmony_ci if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { 10851cb0ef41Sopenharmony_ci /* CR/LF/TAB/NUL */ 10861cb0ef41Sopenharmony_ci *target++=(uint8_t)c; 10871cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 10881cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 10891cb0ef41Sopenharmony_ci } 10901cb0ef41Sopenharmony_ci --targetCapacity; 10911cb0ef41Sopenharmony_ci } else { 10921cb0ef41Sopenharmony_ci /* quote C0 control character */ 10931cb0ef41Sopenharmony_ci c|=SQ0<<8; 10941cb0ef41Sopenharmony_ci length=2; 10951cb0ef41Sopenharmony_ci goto outputBytes; 10961cb0ef41Sopenharmony_ci } 10971cb0ef41Sopenharmony_ci } else if((delta=c-currentOffset)<=0x7f) { 10981cb0ef41Sopenharmony_ci /* use the current dynamic window */ 10991cb0ef41Sopenharmony_ci *target++=(uint8_t)(delta|0x80); 11001cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 11011cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 11021cb0ef41Sopenharmony_ci } 11031cb0ef41Sopenharmony_ci --targetCapacity; 11041cb0ef41Sopenharmony_ci } else if(U16_IS_SURROGATE(c)) { 11051cb0ef41Sopenharmony_ci if(U16_IS_SURROGATE_LEAD(c)) { 11061cb0ef41Sopenharmony_cigetTrailSingle: 11071cb0ef41Sopenharmony_ci lead=(char16_t)c; 11081cb0ef41Sopenharmony_ci if(source<sourceLimit) { 11091cb0ef41Sopenharmony_ci /* test the following code unit */ 11101cb0ef41Sopenharmony_ci trail=*source; 11111cb0ef41Sopenharmony_ci if(U16_IS_TRAIL(trail)) { 11121cb0ef41Sopenharmony_ci ++source; 11131cb0ef41Sopenharmony_ci ++nextSourceIndex; 11141cb0ef41Sopenharmony_ci c=U16_GET_SUPPLEMENTARY(c, trail); 11151cb0ef41Sopenharmony_ci /* convert this surrogate code point */ 11161cb0ef41Sopenharmony_ci /* exit this condition tree */ 11171cb0ef41Sopenharmony_ci } else { 11181cb0ef41Sopenharmony_ci /* this is an unmatched lead code unit (1st surrogate) */ 11191cb0ef41Sopenharmony_ci /* callback(illegal) */ 11201cb0ef41Sopenharmony_ci *pErrorCode=U_ILLEGAL_CHAR_FOUND; 11211cb0ef41Sopenharmony_ci goto endloop; 11221cb0ef41Sopenharmony_ci } 11231cb0ef41Sopenharmony_ci } else { 11241cb0ef41Sopenharmony_ci /* no more input */ 11251cb0ef41Sopenharmony_ci break; 11261cb0ef41Sopenharmony_ci } 11271cb0ef41Sopenharmony_ci } else { 11281cb0ef41Sopenharmony_ci /* this is an unmatched trail code unit (2nd surrogate) */ 11291cb0ef41Sopenharmony_ci /* callback(illegal) */ 11301cb0ef41Sopenharmony_ci *pErrorCode=U_ILLEGAL_CHAR_FOUND; 11311cb0ef41Sopenharmony_ci goto endloop; 11321cb0ef41Sopenharmony_ci } 11331cb0ef41Sopenharmony_ci 11341cb0ef41Sopenharmony_ci /* compress supplementary character U+10000..U+10ffff */ 11351cb0ef41Sopenharmony_ci if((delta=c-currentOffset)<=0x7f) { 11361cb0ef41Sopenharmony_ci /* use the current dynamic window */ 11371cb0ef41Sopenharmony_ci *target++=(uint8_t)(delta|0x80); 11381cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 11391cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 11401cb0ef41Sopenharmony_ci } 11411cb0ef41Sopenharmony_ci --targetCapacity; 11421cb0ef41Sopenharmony_ci } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 11431cb0ef41Sopenharmony_ci /* there is a dynamic window that contains this character, change to it */ 11441cb0ef41Sopenharmony_ci dynamicWindow=window; 11451cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 11461cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 11471cb0ef41Sopenharmony_ci c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 11481cb0ef41Sopenharmony_ci length=2; 11491cb0ef41Sopenharmony_ci goto outputBytes; 11501cb0ef41Sopenharmony_ci } else if((code=getDynamicOffset(c, &offset))>=0) { 11511cb0ef41Sopenharmony_ci /* might check if there are more characters in this window to come */ 11521cb0ef41Sopenharmony_ci /* define an extended window with this character */ 11531cb0ef41Sopenharmony_ci code-=0x200; 11541cb0ef41Sopenharmony_ci dynamicWindow=getNextDynamicWindow(scsu); 11551cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 11561cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 11571cb0ef41Sopenharmony_ci c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 11581cb0ef41Sopenharmony_ci length=4; 11591cb0ef41Sopenharmony_ci goto outputBytes; 11601cb0ef41Sopenharmony_ci } else { 11611cb0ef41Sopenharmony_ci /* change to Unicode mode and output this (lead, trail) pair */ 11621cb0ef41Sopenharmony_ci isSingleByteMode=false; 11631cb0ef41Sopenharmony_ci *target++=(uint8_t)SCU; 11641cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 11651cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 11661cb0ef41Sopenharmony_ci } 11671cb0ef41Sopenharmony_ci --targetCapacity; 11681cb0ef41Sopenharmony_ci c=((uint32_t)lead<<16)|trail; 11691cb0ef41Sopenharmony_ci length=4; 11701cb0ef41Sopenharmony_ci goto outputBytes; 11711cb0ef41Sopenharmony_ci } 11721cb0ef41Sopenharmony_ci } else if(c<0xa0) { 11731cb0ef41Sopenharmony_ci /* quote C1 control character */ 11741cb0ef41Sopenharmony_ci c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ 11751cb0ef41Sopenharmony_ci length=2; 11761cb0ef41Sopenharmony_ci goto outputBytes; 11771cb0ef41Sopenharmony_ci } else if(c==0xfeff || c>=0xfff0) { 11781cb0ef41Sopenharmony_ci /* quote signature character=byte order mark and specials */ 11791cb0ef41Sopenharmony_ci c|=SQU<<16; 11801cb0ef41Sopenharmony_ci length=3; 11811cb0ef41Sopenharmony_ci goto outputBytes; 11821cb0ef41Sopenharmony_ci } else { 11831cb0ef41Sopenharmony_ci /* compress all other BMP characters */ 11841cb0ef41Sopenharmony_ci if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 11851cb0ef41Sopenharmony_ci /* there is a window defined that contains this character - switch to it or quote from it? */ 11861cb0ef41Sopenharmony_ci if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { 11871cb0ef41Sopenharmony_ci /* change to dynamic window */ 11881cb0ef41Sopenharmony_ci dynamicWindow=window; 11891cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 11901cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 11911cb0ef41Sopenharmony_ci c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 11921cb0ef41Sopenharmony_ci length=2; 11931cb0ef41Sopenharmony_ci goto outputBytes; 11941cb0ef41Sopenharmony_ci } else { 11951cb0ef41Sopenharmony_ci /* quote from dynamic window */ 11961cb0ef41Sopenharmony_ci c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; 11971cb0ef41Sopenharmony_ci length=2; 11981cb0ef41Sopenharmony_ci goto outputBytes; 11991cb0ef41Sopenharmony_ci } 12001cb0ef41Sopenharmony_ci } else if((window=getWindow(staticOffsets, c))>=0) { 12011cb0ef41Sopenharmony_ci /* quote from static window */ 12021cb0ef41Sopenharmony_ci c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); 12031cb0ef41Sopenharmony_ci length=2; 12041cb0ef41Sopenharmony_ci goto outputBytes; 12051cb0ef41Sopenharmony_ci } else if((code=getDynamicOffset(c, &offset))>=0) { 12061cb0ef41Sopenharmony_ci /* define a dynamic window with this character */ 12071cb0ef41Sopenharmony_ci dynamicWindow=getNextDynamicWindow(scsu); 12081cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 12091cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 12101cb0ef41Sopenharmony_ci c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 12111cb0ef41Sopenharmony_ci length=3; 12121cb0ef41Sopenharmony_ci goto outputBytes; 12131cb0ef41Sopenharmony_ci } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && 12141cb0ef41Sopenharmony_ci (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) 12151cb0ef41Sopenharmony_ci ) { 12161cb0ef41Sopenharmony_ci /* 12171cb0ef41Sopenharmony_ci * this character is not compressible (a BMP ideograph or similar); 12181cb0ef41Sopenharmony_ci * switch to Unicode mode if this is the last character in the block 12191cb0ef41Sopenharmony_ci * or there is at least one more ideograph following immediately 12201cb0ef41Sopenharmony_ci */ 12211cb0ef41Sopenharmony_ci isSingleByteMode=false; 12221cb0ef41Sopenharmony_ci c|=SCU<<16; 12231cb0ef41Sopenharmony_ci length=3; 12241cb0ef41Sopenharmony_ci goto outputBytes; 12251cb0ef41Sopenharmony_ci } else { 12261cb0ef41Sopenharmony_ci /* quote Unicode */ 12271cb0ef41Sopenharmony_ci c|=SQU<<16; 12281cb0ef41Sopenharmony_ci length=3; 12291cb0ef41Sopenharmony_ci goto outputBytes; 12301cb0ef41Sopenharmony_ci } 12311cb0ef41Sopenharmony_ci } 12321cb0ef41Sopenharmony_ci 12331cb0ef41Sopenharmony_ci /* normal end of conversion: prepare for a new character */ 12341cb0ef41Sopenharmony_ci c=0; 12351cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 12361cb0ef41Sopenharmony_ci } 12371cb0ef41Sopenharmony_ci } else { 12381cb0ef41Sopenharmony_ci if(c!=0 && targetCapacity>0) { 12391cb0ef41Sopenharmony_ci goto getTrailUnicode; 12401cb0ef41Sopenharmony_ci } 12411cb0ef41Sopenharmony_ci 12421cb0ef41Sopenharmony_ci /* state machine for Unicode mode */ 12431cb0ef41Sopenharmony_ci/* unicodeByteMode: */ 12441cb0ef41Sopenharmony_ci while(source<sourceLimit) { 12451cb0ef41Sopenharmony_ci if(targetCapacity<=0) { 12461cb0ef41Sopenharmony_ci /* target is full */ 12471cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 12481cb0ef41Sopenharmony_ci break; 12491cb0ef41Sopenharmony_ci } 12501cb0ef41Sopenharmony_ci c=*source++; 12511cb0ef41Sopenharmony_ci ++nextSourceIndex; 12521cb0ef41Sopenharmony_ci 12531cb0ef41Sopenharmony_ci if((uint32_t)(c-0x3400)<(0xd800-0x3400)) { 12541cb0ef41Sopenharmony_ci /* not compressible, write character directly */ 12551cb0ef41Sopenharmony_ci if(targetCapacity>=2) { 12561cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>8); 12571cb0ef41Sopenharmony_ci *target++=(uint8_t)c; 12581cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 12591cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 12601cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 12611cb0ef41Sopenharmony_ci } 12621cb0ef41Sopenharmony_ci targetCapacity-=2; 12631cb0ef41Sopenharmony_ci } else { 12641cb0ef41Sopenharmony_ci length=2; 12651cb0ef41Sopenharmony_ci goto outputBytes; 12661cb0ef41Sopenharmony_ci } 12671cb0ef41Sopenharmony_ci } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { 12681cb0ef41Sopenharmony_ci /* compress BMP character if the following one is not an uncompressible ideograph */ 12691cb0ef41Sopenharmony_ci if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) { 12701cb0ef41Sopenharmony_ci if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) { 12711cb0ef41Sopenharmony_ci /* ASCII digit or letter */ 12721cb0ef41Sopenharmony_ci isSingleByteMode=true; 12731cb0ef41Sopenharmony_ci c|=((uint32_t)(UC0+dynamicWindow)<<8)|c; 12741cb0ef41Sopenharmony_ci length=2; 12751cb0ef41Sopenharmony_ci goto outputBytes; 12761cb0ef41Sopenharmony_ci } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 12771cb0ef41Sopenharmony_ci /* there is a dynamic window that contains this character, change to it */ 12781cb0ef41Sopenharmony_ci isSingleByteMode=true; 12791cb0ef41Sopenharmony_ci dynamicWindow=window; 12801cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 12811cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 12821cb0ef41Sopenharmony_ci c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 12831cb0ef41Sopenharmony_ci length=2; 12841cb0ef41Sopenharmony_ci goto outputBytes; 12851cb0ef41Sopenharmony_ci } else if((code=getDynamicOffset(c, &offset))>=0) { 12861cb0ef41Sopenharmony_ci /* define a dynamic window with this character */ 12871cb0ef41Sopenharmony_ci isSingleByteMode=true; 12881cb0ef41Sopenharmony_ci dynamicWindow=getNextDynamicWindow(scsu); 12891cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 12901cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 12911cb0ef41Sopenharmony_ci c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 12921cb0ef41Sopenharmony_ci length=3; 12931cb0ef41Sopenharmony_ci goto outputBytes; 12941cb0ef41Sopenharmony_ci } 12951cb0ef41Sopenharmony_ci } 12961cb0ef41Sopenharmony_ci 12971cb0ef41Sopenharmony_ci /* don't know how to compress this character, just write it directly */ 12981cb0ef41Sopenharmony_ci length=2; 12991cb0ef41Sopenharmony_ci goto outputBytes; 13001cb0ef41Sopenharmony_ci } else if(c<0xe000) { 13011cb0ef41Sopenharmony_ci /* c is a surrogate */ 13021cb0ef41Sopenharmony_ci if(U16_IS_SURROGATE_LEAD(c)) { 13031cb0ef41Sopenharmony_cigetTrailUnicode: 13041cb0ef41Sopenharmony_ci lead=(char16_t)c; 13051cb0ef41Sopenharmony_ci if(source<sourceLimit) { 13061cb0ef41Sopenharmony_ci /* test the following code unit */ 13071cb0ef41Sopenharmony_ci trail=*source; 13081cb0ef41Sopenharmony_ci if(U16_IS_TRAIL(trail)) { 13091cb0ef41Sopenharmony_ci ++source; 13101cb0ef41Sopenharmony_ci ++nextSourceIndex; 13111cb0ef41Sopenharmony_ci c=U16_GET_SUPPLEMENTARY(c, trail); 13121cb0ef41Sopenharmony_ci /* convert this surrogate code point */ 13131cb0ef41Sopenharmony_ci /* exit this condition tree */ 13141cb0ef41Sopenharmony_ci } else { 13151cb0ef41Sopenharmony_ci /* this is an unmatched lead code unit (1st surrogate) */ 13161cb0ef41Sopenharmony_ci /* callback(illegal) */ 13171cb0ef41Sopenharmony_ci *pErrorCode=U_ILLEGAL_CHAR_FOUND; 13181cb0ef41Sopenharmony_ci goto endloop; 13191cb0ef41Sopenharmony_ci } 13201cb0ef41Sopenharmony_ci } else { 13211cb0ef41Sopenharmony_ci /* no more input */ 13221cb0ef41Sopenharmony_ci break; 13231cb0ef41Sopenharmony_ci } 13241cb0ef41Sopenharmony_ci } else { 13251cb0ef41Sopenharmony_ci /* this is an unmatched trail code unit (2nd surrogate) */ 13261cb0ef41Sopenharmony_ci /* callback(illegal) */ 13271cb0ef41Sopenharmony_ci *pErrorCode=U_ILLEGAL_CHAR_FOUND; 13281cb0ef41Sopenharmony_ci goto endloop; 13291cb0ef41Sopenharmony_ci } 13301cb0ef41Sopenharmony_ci 13311cb0ef41Sopenharmony_ci /* compress supplementary character */ 13321cb0ef41Sopenharmony_ci if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 && 13331cb0ef41Sopenharmony_ci !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400)) 13341cb0ef41Sopenharmony_ci ) { 13351cb0ef41Sopenharmony_ci /* 13361cb0ef41Sopenharmony_ci * there is a dynamic window that contains this character and 13371cb0ef41Sopenharmony_ci * the following character is not uncompressible, 13381cb0ef41Sopenharmony_ci * change to the window 13391cb0ef41Sopenharmony_ci */ 13401cb0ef41Sopenharmony_ci isSingleByteMode=true; 13411cb0ef41Sopenharmony_ci dynamicWindow=window; 13421cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 13431cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 13441cb0ef41Sopenharmony_ci c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 13451cb0ef41Sopenharmony_ci length=2; 13461cb0ef41Sopenharmony_ci goto outputBytes; 13471cb0ef41Sopenharmony_ci } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */ 13481cb0ef41Sopenharmony_ci (code=getDynamicOffset(c, &offset))>=0 13491cb0ef41Sopenharmony_ci ) { 13501cb0ef41Sopenharmony_ci /* two supplementary characters in (probably) the same window - define an extended one */ 13511cb0ef41Sopenharmony_ci isSingleByteMode=true; 13521cb0ef41Sopenharmony_ci code-=0x200; 13531cb0ef41Sopenharmony_ci dynamicWindow=getNextDynamicWindow(scsu); 13541cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 13551cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 13561cb0ef41Sopenharmony_ci c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 13571cb0ef41Sopenharmony_ci length=4; 13581cb0ef41Sopenharmony_ci goto outputBytes; 13591cb0ef41Sopenharmony_ci } else { 13601cb0ef41Sopenharmony_ci /* don't know how to compress this character, just write it directly */ 13611cb0ef41Sopenharmony_ci c=((uint32_t)lead<<16)|trail; 13621cb0ef41Sopenharmony_ci length=4; 13631cb0ef41Sopenharmony_ci goto outputBytes; 13641cb0ef41Sopenharmony_ci } 13651cb0ef41Sopenharmony_ci } else /* 0xe000<=c<0xf300 */ { 13661cb0ef41Sopenharmony_ci /* quote to avoid SCSU tags */ 13671cb0ef41Sopenharmony_ci c|=UQU<<16; 13681cb0ef41Sopenharmony_ci length=3; 13691cb0ef41Sopenharmony_ci goto outputBytes; 13701cb0ef41Sopenharmony_ci } 13711cb0ef41Sopenharmony_ci 13721cb0ef41Sopenharmony_ci /* normal end of conversion: prepare for a new character */ 13731cb0ef41Sopenharmony_ci c=0; 13741cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 13751cb0ef41Sopenharmony_ci } 13761cb0ef41Sopenharmony_ci } 13771cb0ef41Sopenharmony_ciendloop: 13781cb0ef41Sopenharmony_ci 13791cb0ef41Sopenharmony_ci /* set the converter state back into UConverter */ 13801cb0ef41Sopenharmony_ci scsu->fromUIsSingleByteMode=isSingleByteMode; 13811cb0ef41Sopenharmony_ci scsu->fromUDynamicWindow=dynamicWindow; 13821cb0ef41Sopenharmony_ci 13831cb0ef41Sopenharmony_ci cnv->fromUChar32=c; 13841cb0ef41Sopenharmony_ci 13851cb0ef41Sopenharmony_ci /* write back the updated pointers */ 13861cb0ef41Sopenharmony_ci pArgs->source=source; 13871cb0ef41Sopenharmony_ci pArgs->target=(char *)target; 13881cb0ef41Sopenharmony_ci pArgs->offsets=offsets; 13891cb0ef41Sopenharmony_ci return; 13901cb0ef41Sopenharmony_ci 13911cb0ef41Sopenharmony_cioutputBytes: 13921cb0ef41Sopenharmony_ci /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ 13931cb0ef41Sopenharmony_ci /* from the first if in the loop we know that targetCapacity>0 */ 13941cb0ef41Sopenharmony_ci if(length<=targetCapacity) { 13951cb0ef41Sopenharmony_ci if(offsets==nullptr) { 13961cb0ef41Sopenharmony_ci switch(length) { 13971cb0ef41Sopenharmony_ci /* each branch falls through to the next one */ 13981cb0ef41Sopenharmony_ci case 4: 13991cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>24); 14001cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14011cb0ef41Sopenharmony_ci case 3: 14021cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>16); 14031cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14041cb0ef41Sopenharmony_ci case 2: 14051cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>8); 14061cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14071cb0ef41Sopenharmony_ci case 1: 14081cb0ef41Sopenharmony_ci *target++=(uint8_t)c; 14091cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14101cb0ef41Sopenharmony_ci default: 14111cb0ef41Sopenharmony_ci /* will never occur */ 14121cb0ef41Sopenharmony_ci break; 14131cb0ef41Sopenharmony_ci } 14141cb0ef41Sopenharmony_ci } else { 14151cb0ef41Sopenharmony_ci switch(length) { 14161cb0ef41Sopenharmony_ci /* each branch falls through to the next one */ 14171cb0ef41Sopenharmony_ci case 4: 14181cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>24); 14191cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 14201cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14211cb0ef41Sopenharmony_ci case 3: 14221cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>16); 14231cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 14241cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14251cb0ef41Sopenharmony_ci case 2: 14261cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>8); 14271cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 14281cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14291cb0ef41Sopenharmony_ci case 1: 14301cb0ef41Sopenharmony_ci *target++=(uint8_t)c; 14311cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 14321cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14331cb0ef41Sopenharmony_ci default: 14341cb0ef41Sopenharmony_ci /* will never occur */ 14351cb0ef41Sopenharmony_ci break; 14361cb0ef41Sopenharmony_ci } 14371cb0ef41Sopenharmony_ci } 14381cb0ef41Sopenharmony_ci targetCapacity-=length; 14391cb0ef41Sopenharmony_ci 14401cb0ef41Sopenharmony_ci /* normal end of conversion: prepare for a new character */ 14411cb0ef41Sopenharmony_ci c=0; 14421cb0ef41Sopenharmony_ci sourceIndex=nextSourceIndex; 14431cb0ef41Sopenharmony_ci goto loop; 14441cb0ef41Sopenharmony_ci } else { 14451cb0ef41Sopenharmony_ci uint8_t *p; 14461cb0ef41Sopenharmony_ci 14471cb0ef41Sopenharmony_ci /* 14481cb0ef41Sopenharmony_ci * We actually do this backwards here: 14491cb0ef41Sopenharmony_ci * In order to save an intermediate variable, we output 14501cb0ef41Sopenharmony_ci * first to the overflow buffer what does not fit into the 14511cb0ef41Sopenharmony_ci * regular target. 14521cb0ef41Sopenharmony_ci */ 14531cb0ef41Sopenharmony_ci /* we know that 0<=targetCapacity<length<=4 */ 14541cb0ef41Sopenharmony_ci /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */ 14551cb0ef41Sopenharmony_ci length-=targetCapacity; 14561cb0ef41Sopenharmony_ci p=(uint8_t *)cnv->charErrorBuffer; 14571cb0ef41Sopenharmony_ci switch(length) { 14581cb0ef41Sopenharmony_ci /* each branch falls through to the next one */ 14591cb0ef41Sopenharmony_ci case 4: 14601cb0ef41Sopenharmony_ci *p++=(uint8_t)(c>>24); 14611cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14621cb0ef41Sopenharmony_ci case 3: 14631cb0ef41Sopenharmony_ci *p++=(uint8_t)(c>>16); 14641cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14651cb0ef41Sopenharmony_ci case 2: 14661cb0ef41Sopenharmony_ci *p++=(uint8_t)(c>>8); 14671cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14681cb0ef41Sopenharmony_ci case 1: 14691cb0ef41Sopenharmony_ci *p=(uint8_t)c; 14701cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14711cb0ef41Sopenharmony_ci default: 14721cb0ef41Sopenharmony_ci /* will never occur */ 14731cb0ef41Sopenharmony_ci break; 14741cb0ef41Sopenharmony_ci } 14751cb0ef41Sopenharmony_ci cnv->charErrorBufferLength=(int8_t)length; 14761cb0ef41Sopenharmony_ci 14771cb0ef41Sopenharmony_ci /* now output what fits into the regular target */ 14781cb0ef41Sopenharmony_ci c>>=8*length; /* length was reduced by targetCapacity */ 14791cb0ef41Sopenharmony_ci switch(targetCapacity) { 14801cb0ef41Sopenharmony_ci /* each branch falls through to the next one */ 14811cb0ef41Sopenharmony_ci case 3: 14821cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>16); 14831cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 14841cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 14851cb0ef41Sopenharmony_ci } 14861cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14871cb0ef41Sopenharmony_ci case 2: 14881cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>8); 14891cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 14901cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 14911cb0ef41Sopenharmony_ci } 14921cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14931cb0ef41Sopenharmony_ci case 1: 14941cb0ef41Sopenharmony_ci *target++=(uint8_t)c; 14951cb0ef41Sopenharmony_ci if(offsets!=nullptr) { 14961cb0ef41Sopenharmony_ci *offsets++=sourceIndex; 14971cb0ef41Sopenharmony_ci } 14981cb0ef41Sopenharmony_ci U_FALLTHROUGH; 14991cb0ef41Sopenharmony_ci default: 15001cb0ef41Sopenharmony_ci break; 15011cb0ef41Sopenharmony_ci } 15021cb0ef41Sopenharmony_ci 15031cb0ef41Sopenharmony_ci /* target overflow */ 15041cb0ef41Sopenharmony_ci targetCapacity=0; 15051cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 15061cb0ef41Sopenharmony_ci c=0; 15071cb0ef41Sopenharmony_ci goto endloop; 15081cb0ef41Sopenharmony_ci } 15091cb0ef41Sopenharmony_ci} 15101cb0ef41Sopenharmony_ci 15111cb0ef41Sopenharmony_ci/* 15121cb0ef41Sopenharmony_ci * Identical to _SCSUFromUnicodeWithOffsets but without offset handling. 15131cb0ef41Sopenharmony_ci * If a change is made in the original function, then either 15141cb0ef41Sopenharmony_ci * change this function the same way or 15151cb0ef41Sopenharmony_ci * re-copy the original function and remove the variables 15161cb0ef41Sopenharmony_ci * offsets, sourceIndex, and nextSourceIndex. 15171cb0ef41Sopenharmony_ci */ 15181cb0ef41Sopenharmony_cistatic void U_CALLCONV 15191cb0ef41Sopenharmony_ci_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs, 15201cb0ef41Sopenharmony_ci UErrorCode *pErrorCode) { 15211cb0ef41Sopenharmony_ci UConverter *cnv; 15221cb0ef41Sopenharmony_ci SCSUData *scsu; 15231cb0ef41Sopenharmony_ci const char16_t *source, *sourceLimit; 15241cb0ef41Sopenharmony_ci uint8_t *target; 15251cb0ef41Sopenharmony_ci int32_t targetCapacity; 15261cb0ef41Sopenharmony_ci 15271cb0ef41Sopenharmony_ci UBool isSingleByteMode; 15281cb0ef41Sopenharmony_ci uint8_t dynamicWindow; 15291cb0ef41Sopenharmony_ci uint32_t currentOffset; 15301cb0ef41Sopenharmony_ci 15311cb0ef41Sopenharmony_ci uint32_t c, delta; 15321cb0ef41Sopenharmony_ci 15331cb0ef41Sopenharmony_ci int32_t length; 15341cb0ef41Sopenharmony_ci 15351cb0ef41Sopenharmony_ci /* variables for compression heuristics */ 15361cb0ef41Sopenharmony_ci uint32_t offset; 15371cb0ef41Sopenharmony_ci char16_t lead, trail; 15381cb0ef41Sopenharmony_ci int code; 15391cb0ef41Sopenharmony_ci int8_t window; 15401cb0ef41Sopenharmony_ci 15411cb0ef41Sopenharmony_ci /* set up the local pointers */ 15421cb0ef41Sopenharmony_ci cnv=pArgs->converter; 15431cb0ef41Sopenharmony_ci scsu=(SCSUData *)cnv->extraInfo; 15441cb0ef41Sopenharmony_ci 15451cb0ef41Sopenharmony_ci /* set up the local pointers */ 15461cb0ef41Sopenharmony_ci source=pArgs->source; 15471cb0ef41Sopenharmony_ci sourceLimit=pArgs->sourceLimit; 15481cb0ef41Sopenharmony_ci target=(uint8_t *)pArgs->target; 15491cb0ef41Sopenharmony_ci targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 15501cb0ef41Sopenharmony_ci 15511cb0ef41Sopenharmony_ci /* get the state machine state */ 15521cb0ef41Sopenharmony_ci isSingleByteMode=scsu->fromUIsSingleByteMode; 15531cb0ef41Sopenharmony_ci dynamicWindow=scsu->fromUDynamicWindow; 15541cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 15551cb0ef41Sopenharmony_ci 15561cb0ef41Sopenharmony_ci c=cnv->fromUChar32; 15571cb0ef41Sopenharmony_ci 15581cb0ef41Sopenharmony_ci /* similar conversion "loop" as in toUnicode */ 15591cb0ef41Sopenharmony_ciloop: 15601cb0ef41Sopenharmony_ci if(isSingleByteMode) { 15611cb0ef41Sopenharmony_ci if(c!=0 && targetCapacity>0) { 15621cb0ef41Sopenharmony_ci goto getTrailSingle; 15631cb0ef41Sopenharmony_ci } 15641cb0ef41Sopenharmony_ci 15651cb0ef41Sopenharmony_ci /* state machine for single-byte mode */ 15661cb0ef41Sopenharmony_ci/* singleByteMode: */ 15671cb0ef41Sopenharmony_ci while(source<sourceLimit) { 15681cb0ef41Sopenharmony_ci if(targetCapacity<=0) { 15691cb0ef41Sopenharmony_ci /* target is full */ 15701cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 15711cb0ef41Sopenharmony_ci break; 15721cb0ef41Sopenharmony_ci } 15731cb0ef41Sopenharmony_ci c=*source++; 15741cb0ef41Sopenharmony_ci 15751cb0ef41Sopenharmony_ci if((c-0x20)<=0x5f) { 15761cb0ef41Sopenharmony_ci /* pass US-ASCII graphic character through */ 15771cb0ef41Sopenharmony_ci *target++=(uint8_t)c; 15781cb0ef41Sopenharmony_ci --targetCapacity; 15791cb0ef41Sopenharmony_ci } else if(c<0x20) { 15801cb0ef41Sopenharmony_ci if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { 15811cb0ef41Sopenharmony_ci /* CR/LF/TAB/NUL */ 15821cb0ef41Sopenharmony_ci *target++=(uint8_t)c; 15831cb0ef41Sopenharmony_ci --targetCapacity; 15841cb0ef41Sopenharmony_ci } else { 15851cb0ef41Sopenharmony_ci /* quote C0 control character */ 15861cb0ef41Sopenharmony_ci c|=SQ0<<8; 15871cb0ef41Sopenharmony_ci length=2; 15881cb0ef41Sopenharmony_ci goto outputBytes; 15891cb0ef41Sopenharmony_ci } 15901cb0ef41Sopenharmony_ci } else if((delta=c-currentOffset)<=0x7f) { 15911cb0ef41Sopenharmony_ci /* use the current dynamic window */ 15921cb0ef41Sopenharmony_ci *target++=(uint8_t)(delta|0x80); 15931cb0ef41Sopenharmony_ci --targetCapacity; 15941cb0ef41Sopenharmony_ci } else if(U16_IS_SURROGATE(c)) { 15951cb0ef41Sopenharmony_ci if(U16_IS_SURROGATE_LEAD(c)) { 15961cb0ef41Sopenharmony_cigetTrailSingle: 15971cb0ef41Sopenharmony_ci lead=(char16_t)c; 15981cb0ef41Sopenharmony_ci if(source<sourceLimit) { 15991cb0ef41Sopenharmony_ci /* test the following code unit */ 16001cb0ef41Sopenharmony_ci trail=*source; 16011cb0ef41Sopenharmony_ci if(U16_IS_TRAIL(trail)) { 16021cb0ef41Sopenharmony_ci ++source; 16031cb0ef41Sopenharmony_ci c=U16_GET_SUPPLEMENTARY(c, trail); 16041cb0ef41Sopenharmony_ci /* convert this surrogate code point */ 16051cb0ef41Sopenharmony_ci /* exit this condition tree */ 16061cb0ef41Sopenharmony_ci } else { 16071cb0ef41Sopenharmony_ci /* this is an unmatched lead code unit (1st surrogate) */ 16081cb0ef41Sopenharmony_ci /* callback(illegal) */ 16091cb0ef41Sopenharmony_ci *pErrorCode=U_ILLEGAL_CHAR_FOUND; 16101cb0ef41Sopenharmony_ci goto endloop; 16111cb0ef41Sopenharmony_ci } 16121cb0ef41Sopenharmony_ci } else { 16131cb0ef41Sopenharmony_ci /* no more input */ 16141cb0ef41Sopenharmony_ci break; 16151cb0ef41Sopenharmony_ci } 16161cb0ef41Sopenharmony_ci } else { 16171cb0ef41Sopenharmony_ci /* this is an unmatched trail code unit (2nd surrogate) */ 16181cb0ef41Sopenharmony_ci /* callback(illegal) */ 16191cb0ef41Sopenharmony_ci *pErrorCode=U_ILLEGAL_CHAR_FOUND; 16201cb0ef41Sopenharmony_ci goto endloop; 16211cb0ef41Sopenharmony_ci } 16221cb0ef41Sopenharmony_ci 16231cb0ef41Sopenharmony_ci /* compress supplementary character U+10000..U+10ffff */ 16241cb0ef41Sopenharmony_ci if((delta=c-currentOffset)<=0x7f) { 16251cb0ef41Sopenharmony_ci /* use the current dynamic window */ 16261cb0ef41Sopenharmony_ci *target++=(uint8_t)(delta|0x80); 16271cb0ef41Sopenharmony_ci --targetCapacity; 16281cb0ef41Sopenharmony_ci } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 16291cb0ef41Sopenharmony_ci /* there is a dynamic window that contains this character, change to it */ 16301cb0ef41Sopenharmony_ci dynamicWindow=window; 16311cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 16321cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 16331cb0ef41Sopenharmony_ci c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 16341cb0ef41Sopenharmony_ci length=2; 16351cb0ef41Sopenharmony_ci goto outputBytes; 16361cb0ef41Sopenharmony_ci } else if((code=getDynamicOffset(c, &offset))>=0) { 16371cb0ef41Sopenharmony_ci /* might check if there are more characters in this window to come */ 16381cb0ef41Sopenharmony_ci /* define an extended window with this character */ 16391cb0ef41Sopenharmony_ci code-=0x200; 16401cb0ef41Sopenharmony_ci dynamicWindow=getNextDynamicWindow(scsu); 16411cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 16421cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 16431cb0ef41Sopenharmony_ci c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 16441cb0ef41Sopenharmony_ci length=4; 16451cb0ef41Sopenharmony_ci goto outputBytes; 16461cb0ef41Sopenharmony_ci } else { 16471cb0ef41Sopenharmony_ci /* change to Unicode mode and output this (lead, trail) pair */ 16481cb0ef41Sopenharmony_ci isSingleByteMode=false; 16491cb0ef41Sopenharmony_ci *target++=(uint8_t)SCU; 16501cb0ef41Sopenharmony_ci --targetCapacity; 16511cb0ef41Sopenharmony_ci c=((uint32_t)lead<<16)|trail; 16521cb0ef41Sopenharmony_ci length=4; 16531cb0ef41Sopenharmony_ci goto outputBytes; 16541cb0ef41Sopenharmony_ci } 16551cb0ef41Sopenharmony_ci } else if(c<0xa0) { 16561cb0ef41Sopenharmony_ci /* quote C1 control character */ 16571cb0ef41Sopenharmony_ci c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ 16581cb0ef41Sopenharmony_ci length=2; 16591cb0ef41Sopenharmony_ci goto outputBytes; 16601cb0ef41Sopenharmony_ci } else if(c==0xfeff || c>=0xfff0) { 16611cb0ef41Sopenharmony_ci /* quote signature character=byte order mark and specials */ 16621cb0ef41Sopenharmony_ci c|=SQU<<16; 16631cb0ef41Sopenharmony_ci length=3; 16641cb0ef41Sopenharmony_ci goto outputBytes; 16651cb0ef41Sopenharmony_ci } else { 16661cb0ef41Sopenharmony_ci /* compress all other BMP characters */ 16671cb0ef41Sopenharmony_ci if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 16681cb0ef41Sopenharmony_ci /* there is a window defined that contains this character - switch to it or quote from it? */ 16691cb0ef41Sopenharmony_ci if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { 16701cb0ef41Sopenharmony_ci /* change to dynamic window */ 16711cb0ef41Sopenharmony_ci dynamicWindow=window; 16721cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 16731cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 16741cb0ef41Sopenharmony_ci c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 16751cb0ef41Sopenharmony_ci length=2; 16761cb0ef41Sopenharmony_ci goto outputBytes; 16771cb0ef41Sopenharmony_ci } else { 16781cb0ef41Sopenharmony_ci /* quote from dynamic window */ 16791cb0ef41Sopenharmony_ci c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; 16801cb0ef41Sopenharmony_ci length=2; 16811cb0ef41Sopenharmony_ci goto outputBytes; 16821cb0ef41Sopenharmony_ci } 16831cb0ef41Sopenharmony_ci } else if((window=getWindow(staticOffsets, c))>=0) { 16841cb0ef41Sopenharmony_ci /* quote from static window */ 16851cb0ef41Sopenharmony_ci c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); 16861cb0ef41Sopenharmony_ci length=2; 16871cb0ef41Sopenharmony_ci goto outputBytes; 16881cb0ef41Sopenharmony_ci } else if((code=getDynamicOffset(c, &offset))>=0) { 16891cb0ef41Sopenharmony_ci /* define a dynamic window with this character */ 16901cb0ef41Sopenharmony_ci dynamicWindow=getNextDynamicWindow(scsu); 16911cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 16921cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 16931cb0ef41Sopenharmony_ci c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 16941cb0ef41Sopenharmony_ci length=3; 16951cb0ef41Sopenharmony_ci goto outputBytes; 16961cb0ef41Sopenharmony_ci } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && 16971cb0ef41Sopenharmony_ci (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) 16981cb0ef41Sopenharmony_ci ) { 16991cb0ef41Sopenharmony_ci /* 17001cb0ef41Sopenharmony_ci * this character is not compressible (a BMP ideograph or similar); 17011cb0ef41Sopenharmony_ci * switch to Unicode mode if this is the last character in the block 17021cb0ef41Sopenharmony_ci * or there is at least one more ideograph following immediately 17031cb0ef41Sopenharmony_ci */ 17041cb0ef41Sopenharmony_ci isSingleByteMode=false; 17051cb0ef41Sopenharmony_ci c|=SCU<<16; 17061cb0ef41Sopenharmony_ci length=3; 17071cb0ef41Sopenharmony_ci goto outputBytes; 17081cb0ef41Sopenharmony_ci } else { 17091cb0ef41Sopenharmony_ci /* quote Unicode */ 17101cb0ef41Sopenharmony_ci c|=SQU<<16; 17111cb0ef41Sopenharmony_ci length=3; 17121cb0ef41Sopenharmony_ci goto outputBytes; 17131cb0ef41Sopenharmony_ci } 17141cb0ef41Sopenharmony_ci } 17151cb0ef41Sopenharmony_ci 17161cb0ef41Sopenharmony_ci /* normal end of conversion: prepare for a new character */ 17171cb0ef41Sopenharmony_ci c=0; 17181cb0ef41Sopenharmony_ci } 17191cb0ef41Sopenharmony_ci } else { 17201cb0ef41Sopenharmony_ci if(c!=0 && targetCapacity>0) { 17211cb0ef41Sopenharmony_ci goto getTrailUnicode; 17221cb0ef41Sopenharmony_ci } 17231cb0ef41Sopenharmony_ci 17241cb0ef41Sopenharmony_ci /* state machine for Unicode mode */ 17251cb0ef41Sopenharmony_ci/* unicodeByteMode: */ 17261cb0ef41Sopenharmony_ci while(source<sourceLimit) { 17271cb0ef41Sopenharmony_ci if(targetCapacity<=0) { 17281cb0ef41Sopenharmony_ci /* target is full */ 17291cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 17301cb0ef41Sopenharmony_ci break; 17311cb0ef41Sopenharmony_ci } 17321cb0ef41Sopenharmony_ci c=*source++; 17331cb0ef41Sopenharmony_ci 17341cb0ef41Sopenharmony_ci if((uint32_t)(c-0x3400)<(0xd800-0x3400)) { 17351cb0ef41Sopenharmony_ci /* not compressible, write character directly */ 17361cb0ef41Sopenharmony_ci if(targetCapacity>=2) { 17371cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>8); 17381cb0ef41Sopenharmony_ci *target++=(uint8_t)c; 17391cb0ef41Sopenharmony_ci targetCapacity-=2; 17401cb0ef41Sopenharmony_ci } else { 17411cb0ef41Sopenharmony_ci length=2; 17421cb0ef41Sopenharmony_ci goto outputBytes; 17431cb0ef41Sopenharmony_ci } 17441cb0ef41Sopenharmony_ci } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { 17451cb0ef41Sopenharmony_ci /* compress BMP character if the following one is not an uncompressible ideograph */ 17461cb0ef41Sopenharmony_ci if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) { 17471cb0ef41Sopenharmony_ci if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) { 17481cb0ef41Sopenharmony_ci /* ASCII digit or letter */ 17491cb0ef41Sopenharmony_ci isSingleByteMode=true; 17501cb0ef41Sopenharmony_ci c|=((uint32_t)(UC0+dynamicWindow)<<8)|c; 17511cb0ef41Sopenharmony_ci length=2; 17521cb0ef41Sopenharmony_ci goto outputBytes; 17531cb0ef41Sopenharmony_ci } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { 17541cb0ef41Sopenharmony_ci /* there is a dynamic window that contains this character, change to it */ 17551cb0ef41Sopenharmony_ci isSingleByteMode=true; 17561cb0ef41Sopenharmony_ci dynamicWindow=window; 17571cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 17581cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 17591cb0ef41Sopenharmony_ci c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 17601cb0ef41Sopenharmony_ci length=2; 17611cb0ef41Sopenharmony_ci goto outputBytes; 17621cb0ef41Sopenharmony_ci } else if((code=getDynamicOffset(c, &offset))>=0) { 17631cb0ef41Sopenharmony_ci /* define a dynamic window with this character */ 17641cb0ef41Sopenharmony_ci isSingleByteMode=true; 17651cb0ef41Sopenharmony_ci dynamicWindow=getNextDynamicWindow(scsu); 17661cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 17671cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 17681cb0ef41Sopenharmony_ci c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 17691cb0ef41Sopenharmony_ci length=3; 17701cb0ef41Sopenharmony_ci goto outputBytes; 17711cb0ef41Sopenharmony_ci } 17721cb0ef41Sopenharmony_ci } 17731cb0ef41Sopenharmony_ci 17741cb0ef41Sopenharmony_ci /* don't know how to compress this character, just write it directly */ 17751cb0ef41Sopenharmony_ci length=2; 17761cb0ef41Sopenharmony_ci goto outputBytes; 17771cb0ef41Sopenharmony_ci } else if(c<0xe000) { 17781cb0ef41Sopenharmony_ci /* c is a surrogate */ 17791cb0ef41Sopenharmony_ci if(U16_IS_SURROGATE_LEAD(c)) { 17801cb0ef41Sopenharmony_cigetTrailUnicode: 17811cb0ef41Sopenharmony_ci lead=(char16_t)c; 17821cb0ef41Sopenharmony_ci if(source<sourceLimit) { 17831cb0ef41Sopenharmony_ci /* test the following code unit */ 17841cb0ef41Sopenharmony_ci trail=*source; 17851cb0ef41Sopenharmony_ci if(U16_IS_TRAIL(trail)) { 17861cb0ef41Sopenharmony_ci ++source; 17871cb0ef41Sopenharmony_ci c=U16_GET_SUPPLEMENTARY(c, trail); 17881cb0ef41Sopenharmony_ci /* convert this surrogate code point */ 17891cb0ef41Sopenharmony_ci /* exit this condition tree */ 17901cb0ef41Sopenharmony_ci } else { 17911cb0ef41Sopenharmony_ci /* this is an unmatched lead code unit (1st surrogate) */ 17921cb0ef41Sopenharmony_ci /* callback(illegal) */ 17931cb0ef41Sopenharmony_ci *pErrorCode=U_ILLEGAL_CHAR_FOUND; 17941cb0ef41Sopenharmony_ci goto endloop; 17951cb0ef41Sopenharmony_ci } 17961cb0ef41Sopenharmony_ci } else { 17971cb0ef41Sopenharmony_ci /* no more input */ 17981cb0ef41Sopenharmony_ci break; 17991cb0ef41Sopenharmony_ci } 18001cb0ef41Sopenharmony_ci } else { 18011cb0ef41Sopenharmony_ci /* this is an unmatched trail code unit (2nd surrogate) */ 18021cb0ef41Sopenharmony_ci /* callback(illegal) */ 18031cb0ef41Sopenharmony_ci *pErrorCode=U_ILLEGAL_CHAR_FOUND; 18041cb0ef41Sopenharmony_ci goto endloop; 18051cb0ef41Sopenharmony_ci } 18061cb0ef41Sopenharmony_ci 18071cb0ef41Sopenharmony_ci /* compress supplementary character */ 18081cb0ef41Sopenharmony_ci if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 && 18091cb0ef41Sopenharmony_ci !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400)) 18101cb0ef41Sopenharmony_ci ) { 18111cb0ef41Sopenharmony_ci /* 18121cb0ef41Sopenharmony_ci * there is a dynamic window that contains this character and 18131cb0ef41Sopenharmony_ci * the following character is not uncompressible, 18141cb0ef41Sopenharmony_ci * change to the window 18151cb0ef41Sopenharmony_ci */ 18161cb0ef41Sopenharmony_ci isSingleByteMode=true; 18171cb0ef41Sopenharmony_ci dynamicWindow=window; 18181cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; 18191cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 18201cb0ef41Sopenharmony_ci c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; 18211cb0ef41Sopenharmony_ci length=2; 18221cb0ef41Sopenharmony_ci goto outputBytes; 18231cb0ef41Sopenharmony_ci } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */ 18241cb0ef41Sopenharmony_ci (code=getDynamicOffset(c, &offset))>=0 18251cb0ef41Sopenharmony_ci ) { 18261cb0ef41Sopenharmony_ci /* two supplementary characters in (probably) the same window - define an extended one */ 18271cb0ef41Sopenharmony_ci isSingleByteMode=true; 18281cb0ef41Sopenharmony_ci code-=0x200; 18291cb0ef41Sopenharmony_ci dynamicWindow=getNextDynamicWindow(scsu); 18301cb0ef41Sopenharmony_ci currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; 18311cb0ef41Sopenharmony_ci useDynamicWindow(scsu, dynamicWindow); 18321cb0ef41Sopenharmony_ci c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; 18331cb0ef41Sopenharmony_ci length=4; 18341cb0ef41Sopenharmony_ci goto outputBytes; 18351cb0ef41Sopenharmony_ci } else { 18361cb0ef41Sopenharmony_ci /* don't know how to compress this character, just write it directly */ 18371cb0ef41Sopenharmony_ci c=((uint32_t)lead<<16)|trail; 18381cb0ef41Sopenharmony_ci length=4; 18391cb0ef41Sopenharmony_ci goto outputBytes; 18401cb0ef41Sopenharmony_ci } 18411cb0ef41Sopenharmony_ci } else /* 0xe000<=c<0xf300 */ { 18421cb0ef41Sopenharmony_ci /* quote to avoid SCSU tags */ 18431cb0ef41Sopenharmony_ci c|=UQU<<16; 18441cb0ef41Sopenharmony_ci length=3; 18451cb0ef41Sopenharmony_ci goto outputBytes; 18461cb0ef41Sopenharmony_ci } 18471cb0ef41Sopenharmony_ci 18481cb0ef41Sopenharmony_ci /* normal end of conversion: prepare for a new character */ 18491cb0ef41Sopenharmony_ci c=0; 18501cb0ef41Sopenharmony_ci } 18511cb0ef41Sopenharmony_ci } 18521cb0ef41Sopenharmony_ciendloop: 18531cb0ef41Sopenharmony_ci 18541cb0ef41Sopenharmony_ci /* set the converter state back into UConverter */ 18551cb0ef41Sopenharmony_ci scsu->fromUIsSingleByteMode=isSingleByteMode; 18561cb0ef41Sopenharmony_ci scsu->fromUDynamicWindow=dynamicWindow; 18571cb0ef41Sopenharmony_ci 18581cb0ef41Sopenharmony_ci cnv->fromUChar32=c; 18591cb0ef41Sopenharmony_ci 18601cb0ef41Sopenharmony_ci /* write back the updated pointers */ 18611cb0ef41Sopenharmony_ci pArgs->source=source; 18621cb0ef41Sopenharmony_ci pArgs->target=(char *)target; 18631cb0ef41Sopenharmony_ci return; 18641cb0ef41Sopenharmony_ci 18651cb0ef41Sopenharmony_cioutputBytes: 18661cb0ef41Sopenharmony_ci /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ 18671cb0ef41Sopenharmony_ci /* from the first if in the loop we know that targetCapacity>0 */ 18681cb0ef41Sopenharmony_ci if(length<=targetCapacity) { 18691cb0ef41Sopenharmony_ci switch(length) { 18701cb0ef41Sopenharmony_ci /* each branch falls through to the next one */ 18711cb0ef41Sopenharmony_ci case 4: 18721cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>24); 18731cb0ef41Sopenharmony_ci U_FALLTHROUGH; 18741cb0ef41Sopenharmony_ci case 3: 18751cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>16); 18761cb0ef41Sopenharmony_ci U_FALLTHROUGH; 18771cb0ef41Sopenharmony_ci case 2: 18781cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>8); 18791cb0ef41Sopenharmony_ci U_FALLTHROUGH; 18801cb0ef41Sopenharmony_ci case 1: 18811cb0ef41Sopenharmony_ci *target++=(uint8_t)c; 18821cb0ef41Sopenharmony_ci U_FALLTHROUGH; 18831cb0ef41Sopenharmony_ci default: 18841cb0ef41Sopenharmony_ci /* will never occur */ 18851cb0ef41Sopenharmony_ci break; 18861cb0ef41Sopenharmony_ci } 18871cb0ef41Sopenharmony_ci targetCapacity-=length; 18881cb0ef41Sopenharmony_ci 18891cb0ef41Sopenharmony_ci /* normal end of conversion: prepare for a new character */ 18901cb0ef41Sopenharmony_ci c=0; 18911cb0ef41Sopenharmony_ci goto loop; 18921cb0ef41Sopenharmony_ci } else { 18931cb0ef41Sopenharmony_ci uint8_t *p; 18941cb0ef41Sopenharmony_ci 18951cb0ef41Sopenharmony_ci /* 18961cb0ef41Sopenharmony_ci * We actually do this backwards here: 18971cb0ef41Sopenharmony_ci * In order to save an intermediate variable, we output 18981cb0ef41Sopenharmony_ci * first to the overflow buffer what does not fit into the 18991cb0ef41Sopenharmony_ci * regular target. 19001cb0ef41Sopenharmony_ci */ 19011cb0ef41Sopenharmony_ci /* we know that 0<=targetCapacity<length<=4 */ 19021cb0ef41Sopenharmony_ci /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */ 19031cb0ef41Sopenharmony_ci length-=targetCapacity; 19041cb0ef41Sopenharmony_ci p=(uint8_t *)cnv->charErrorBuffer; 19051cb0ef41Sopenharmony_ci switch(length) { 19061cb0ef41Sopenharmony_ci /* each branch falls through to the next one */ 19071cb0ef41Sopenharmony_ci case 4: 19081cb0ef41Sopenharmony_ci *p++=(uint8_t)(c>>24); 19091cb0ef41Sopenharmony_ci U_FALLTHROUGH; 19101cb0ef41Sopenharmony_ci case 3: 19111cb0ef41Sopenharmony_ci *p++=(uint8_t)(c>>16); 19121cb0ef41Sopenharmony_ci U_FALLTHROUGH; 19131cb0ef41Sopenharmony_ci case 2: 19141cb0ef41Sopenharmony_ci *p++=(uint8_t)(c>>8); 19151cb0ef41Sopenharmony_ci U_FALLTHROUGH; 19161cb0ef41Sopenharmony_ci case 1: 19171cb0ef41Sopenharmony_ci *p=(uint8_t)c; 19181cb0ef41Sopenharmony_ci U_FALLTHROUGH; 19191cb0ef41Sopenharmony_ci default: 19201cb0ef41Sopenharmony_ci /* will never occur */ 19211cb0ef41Sopenharmony_ci break; 19221cb0ef41Sopenharmony_ci } 19231cb0ef41Sopenharmony_ci cnv->charErrorBufferLength=(int8_t)length; 19241cb0ef41Sopenharmony_ci 19251cb0ef41Sopenharmony_ci /* now output what fits into the regular target */ 19261cb0ef41Sopenharmony_ci c = (length == 4) ? 0 : c >> 8*length; /* length was reduced by targetCapacity */ 19271cb0ef41Sopenharmony_ci switch(targetCapacity) { 19281cb0ef41Sopenharmony_ci /* each branch falls through to the next one */ 19291cb0ef41Sopenharmony_ci case 3: 19301cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>16); 19311cb0ef41Sopenharmony_ci U_FALLTHROUGH; 19321cb0ef41Sopenharmony_ci case 2: 19331cb0ef41Sopenharmony_ci *target++=(uint8_t)(c>>8); 19341cb0ef41Sopenharmony_ci U_FALLTHROUGH; 19351cb0ef41Sopenharmony_ci case 1: 19361cb0ef41Sopenharmony_ci *target++=(uint8_t)c; 19371cb0ef41Sopenharmony_ci U_FALLTHROUGH; 19381cb0ef41Sopenharmony_ci default: 19391cb0ef41Sopenharmony_ci break; 19401cb0ef41Sopenharmony_ci } 19411cb0ef41Sopenharmony_ci 19421cb0ef41Sopenharmony_ci /* target overflow */ 19431cb0ef41Sopenharmony_ci targetCapacity=0; 19441cb0ef41Sopenharmony_ci *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 19451cb0ef41Sopenharmony_ci c=0; 19461cb0ef41Sopenharmony_ci goto endloop; 19471cb0ef41Sopenharmony_ci } 19481cb0ef41Sopenharmony_ci} 19491cb0ef41Sopenharmony_ci 19501cb0ef41Sopenharmony_ci/* miscellaneous ------------------------------------------------------------ */ 19511cb0ef41Sopenharmony_ci 19521cb0ef41Sopenharmony_cistatic const char * U_CALLCONV 19531cb0ef41Sopenharmony_ci_SCSUGetName(const UConverter *cnv) { 19541cb0ef41Sopenharmony_ci SCSUData *scsu=(SCSUData *)cnv->extraInfo; 19551cb0ef41Sopenharmony_ci 19561cb0ef41Sopenharmony_ci switch(scsu->locale) { 19571cb0ef41Sopenharmony_ci case l_ja: 19581cb0ef41Sopenharmony_ci return "SCSU,locale=ja"; 19591cb0ef41Sopenharmony_ci default: 19601cb0ef41Sopenharmony_ci return "SCSU"; 19611cb0ef41Sopenharmony_ci } 19621cb0ef41Sopenharmony_ci} 19631cb0ef41Sopenharmony_ci 19641cb0ef41Sopenharmony_ci/* structure for SafeClone calculations */ 19651cb0ef41Sopenharmony_cistruct cloneSCSUStruct 19661cb0ef41Sopenharmony_ci{ 19671cb0ef41Sopenharmony_ci UConverter cnv; 19681cb0ef41Sopenharmony_ci SCSUData mydata; 19691cb0ef41Sopenharmony_ci}; 19701cb0ef41Sopenharmony_ci 19711cb0ef41Sopenharmony_cistatic UConverter * U_CALLCONV 19721cb0ef41Sopenharmony_ci_SCSUSafeClone(const UConverter *cnv, 19731cb0ef41Sopenharmony_ci void *stackBuffer, 19741cb0ef41Sopenharmony_ci int32_t *pBufferSize, 19751cb0ef41Sopenharmony_ci UErrorCode *status) 19761cb0ef41Sopenharmony_ci{ 19771cb0ef41Sopenharmony_ci struct cloneSCSUStruct * localClone; 19781cb0ef41Sopenharmony_ci int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct); 19791cb0ef41Sopenharmony_ci 19801cb0ef41Sopenharmony_ci if (U_FAILURE(*status)){ 19811cb0ef41Sopenharmony_ci return 0; 19821cb0ef41Sopenharmony_ci } 19831cb0ef41Sopenharmony_ci 19841cb0ef41Sopenharmony_ci if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ 19851cb0ef41Sopenharmony_ci *pBufferSize = bufferSizeNeeded; 19861cb0ef41Sopenharmony_ci return 0; 19871cb0ef41Sopenharmony_ci } 19881cb0ef41Sopenharmony_ci 19891cb0ef41Sopenharmony_ci localClone = (struct cloneSCSUStruct *)stackBuffer; 19901cb0ef41Sopenharmony_ci /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ 19911cb0ef41Sopenharmony_ci 19921cb0ef41Sopenharmony_ci uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData)); 19931cb0ef41Sopenharmony_ci localClone->cnv.extraInfo = &localClone->mydata; 19941cb0ef41Sopenharmony_ci localClone->cnv.isExtraLocal = true; 19951cb0ef41Sopenharmony_ci 19961cb0ef41Sopenharmony_ci return &localClone->cnv; 19971cb0ef41Sopenharmony_ci} 19981cb0ef41Sopenharmony_ciU_CDECL_END 19991cb0ef41Sopenharmony_ci 20001cb0ef41Sopenharmony_cistatic const UConverterImpl _SCSUImpl={ 20011cb0ef41Sopenharmony_ci UCNV_SCSU, 20021cb0ef41Sopenharmony_ci 20031cb0ef41Sopenharmony_ci nullptr, 20041cb0ef41Sopenharmony_ci nullptr, 20051cb0ef41Sopenharmony_ci 20061cb0ef41Sopenharmony_ci _SCSUOpen, 20071cb0ef41Sopenharmony_ci _SCSUClose, 20081cb0ef41Sopenharmony_ci _SCSUReset, 20091cb0ef41Sopenharmony_ci 20101cb0ef41Sopenharmony_ci _SCSUToUnicode, 20111cb0ef41Sopenharmony_ci _SCSUToUnicodeWithOffsets, 20121cb0ef41Sopenharmony_ci _SCSUFromUnicode, 20131cb0ef41Sopenharmony_ci _SCSUFromUnicodeWithOffsets, 20141cb0ef41Sopenharmony_ci nullptr, 20151cb0ef41Sopenharmony_ci 20161cb0ef41Sopenharmony_ci nullptr, 20171cb0ef41Sopenharmony_ci _SCSUGetName, 20181cb0ef41Sopenharmony_ci nullptr, 20191cb0ef41Sopenharmony_ci _SCSUSafeClone, 20201cb0ef41Sopenharmony_ci ucnv_getCompleteUnicodeSet, 20211cb0ef41Sopenharmony_ci nullptr, 20221cb0ef41Sopenharmony_ci nullptr 20231cb0ef41Sopenharmony_ci}; 20241cb0ef41Sopenharmony_ci 20251cb0ef41Sopenharmony_cistatic const UConverterStaticData _SCSUStaticData={ 20261cb0ef41Sopenharmony_ci sizeof(UConverterStaticData), 20271cb0ef41Sopenharmony_ci "SCSU", 20281cb0ef41Sopenharmony_ci 1212, /* CCSID for SCSU */ 20291cb0ef41Sopenharmony_ci UCNV_IBM, UCNV_SCSU, 20301cb0ef41Sopenharmony_ci 1, 3, /* one char16_t generates at least 1 byte and at most 3 bytes */ 20311cb0ef41Sopenharmony_ci /* 20321cb0ef41Sopenharmony_ci * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode 20331cb0ef41Sopenharmony_ci * substitution string. 20341cb0ef41Sopenharmony_ci */ 20351cb0ef41Sopenharmony_ci { 0x0e, 0xff, 0xfd, 0 }, 3, 20361cb0ef41Sopenharmony_ci false, false, 20371cb0ef41Sopenharmony_ci 0, 20381cb0ef41Sopenharmony_ci 0, 20391cb0ef41Sopenharmony_ci { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 20401cb0ef41Sopenharmony_ci}; 20411cb0ef41Sopenharmony_ci 20421cb0ef41Sopenharmony_ciconst UConverterSharedData _SCSUData= 20431cb0ef41Sopenharmony_ci UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl); 20441cb0ef41Sopenharmony_ci 20451cb0ef41Sopenharmony_ci#endif 2046