1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*
6*   Copyright (C) 2000-2016, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9******************************************************************************
10*   file name:  ucnvscsu.c
11*   encoding:   UTF-8
12*   tab size:   8 (not used)
13*   indentation:4
14*
15*   created on: 2000nov18
16*   created by: Markus W. Scherer
17*
18*   This is an implementation of the Standard Compression Scheme for Unicode
19*   as defined in https://www.unicode.org/reports/tr6/ .
20*   Reserved commands and window settings are treated as illegal sequences and
21*   will result in callback calls.
22*/
23
24#include "unicode/utypes.h"
25
26#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
27
28#include "unicode/ucnv.h"
29#include "unicode/ucnv_cb.h"
30#include "unicode/utf16.h"
31#include "ucnv_bld.h"
32#include "ucnv_cnv.h"
33#include "cmemory.h"
34
35/* SCSU definitions --------------------------------------------------------- */
36
37/* SCSU command byte values */
38enum {
39    SQ0=0x01, /* Quote from window pair 0 */
40    SQ7=0x08, /* Quote from window pair 7 */
41    SDX=0x0B, /* Define a window as extended */
42    Srs=0x0C, /* reserved */
43    SQU=0x0E, /* Quote a single Unicode character */
44    SCU=0x0F, /* Change to Unicode mode */
45    SC0=0x10, /* Select window 0 */
46    SC7=0x17, /* Select window 7 */
47    SD0=0x18, /* Define and select window 0 */
48    SD7=0x1F, /* Define and select window 7 */
49
50    UC0=0xE0, /* Select window 0 */
51    UC7=0xE7, /* Select window 7 */
52    UD0=0xE8, /* Define and select window 0 */
53    UD7=0xEF, /* Define and select window 7 */
54    UQU=0xF0, /* Quote a single Unicode character */
55    UDX=0xF1, /* Define a Window as extended */
56    Urs=0xF2  /* reserved */
57};
58
59enum {
60    /*
61     * Unicode code points from 3400 to E000 are not adressible by
62     * dynamic window, since in these areas no short run alphabets are
63     * found. Therefore add gapOffset to all values from gapThreshold.
64     */
65    gapThreshold=0x68,
66    gapOffset=0xAC00,
67
68    /* values between reservedStart and fixedThreshold are reserved */
69    reservedStart=0xA8,
70
71    /* use table of predefined fixed offsets for values from fixedThreshold */
72    fixedThreshold=0xF9
73};
74
75/* constant offsets for the 8 static windows */
76static const uint32_t staticOffsets[8]={
77    0x0000, /* ASCII for quoted tags */
78    0x0080, /* Latin - 1 Supplement (for access to punctuation) */
79    0x0100, /* Latin Extended-A */
80    0x0300, /* Combining Diacritical Marks */
81    0x2000, /* General Punctuation */
82    0x2080, /* Currency Symbols */
83    0x2100, /* Letterlike Symbols and Number Forms */
84    0x3000  /* CJK Symbols and punctuation */
85};
86
87/* initial offsets for the 8 dynamic (sliding) windows */
88static const uint32_t initialDynamicOffsets[8]={
89    0x0080, /* Latin-1 */
90    0x00C0, /* Latin Extended A */
91    0x0400, /* Cyrillic */
92    0x0600, /* Arabic */
93    0x0900, /* Devanagari */
94    0x3040, /* Hiragana */
95    0x30A0, /* Katakana */
96    0xFF00  /* Fullwidth ASCII */
97};
98
99/* Table of fixed predefined Offsets */
100static const uint32_t fixedOffsets[]={
101    /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
102    /* 0xFA */ 0x0250, /* IPA extensions */
103    /* 0xFB */ 0x0370, /* Greek */
104    /* 0xFC */ 0x0530, /* Armenian */
105    /* 0xFD */ 0x3040, /* Hiragana */
106    /* 0xFE */ 0x30A0, /* Katakana */
107    /* 0xFF */ 0xFF60  /* Halfwidth Katakana */
108};
109
110/* state values */
111enum {
112    readCommand,
113    quotePairOne,
114    quotePairTwo,
115    quoteOne,
116    definePairOne,
117    definePairTwo,
118    defineOne
119};
120
121typedef struct SCSUData {
122    /* dynamic window offsets, initialize to default values from initialDynamicOffsets */
123    uint32_t toUDynamicOffsets[8];
124    uint32_t fromUDynamicOffsets[8];
125
126    /* state machine state - toUnicode */
127    UBool toUIsSingleByteMode;
128    uint8_t toUState;
129    int8_t toUQuoteWindow, toUDynamicWindow;
130    uint8_t toUByteOne;
131    uint8_t toUPadding[3];
132
133    /* state machine state - fromUnicode */
134    UBool fromUIsSingleByteMode;
135    int8_t fromUDynamicWindow;
136
137    /*
138     * windowUse[] keeps track of the use of the dynamic windows:
139     * At nextWindowUseIndex there is the least recently used window,
140     * and the following windows (in a wrapping manner) are more and more
141     * recently used.
142     * At nextWindowUseIndex-1 there is the most recently used window.
143     */
144    uint8_t locale;
145    int8_t nextWindowUseIndex;
146    int8_t windowUse[8];
147} SCSUData;
148
149static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
150static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
151
152enum {
153    lGeneric, l_ja
154};
155
156/* SCSU setup functions ----------------------------------------------------- */
157U_CDECL_BEGIN
158static void U_CALLCONV
159_SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
160    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
161
162    if(choice<=UCNV_RESET_TO_UNICODE) {
163        /* reset toUnicode */
164        uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
165
166        scsu->toUIsSingleByteMode=true;
167        scsu->toUState=readCommand;
168        scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
169        scsu->toUByteOne=0;
170
171        cnv->toULength=0;
172    }
173    if(choice!=UCNV_RESET_TO_UNICODE) {
174        /* reset fromUnicode */
175        uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
176
177        scsu->fromUIsSingleByteMode=true;
178        scsu->fromUDynamicWindow=0;
179
180        scsu->nextWindowUseIndex=0;
181        switch(scsu->locale) {
182        case l_ja:
183            uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
184            break;
185        default:
186            uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
187            break;
188        }
189
190        cnv->fromUChar32=0;
191    }
192}
193
194static void U_CALLCONV
195_SCSUOpen(UConverter *cnv,
196          UConverterLoadArgs *pArgs,
197          UErrorCode *pErrorCode) {
198    const char *locale=pArgs->locale;
199    if(pArgs->onlyTestIsLoadable) {
200        return;
201    }
202    cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
203    if(cnv->extraInfo!=nullptr) {
204        if(locale!=nullptr && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
205            ((SCSUData *)cnv->extraInfo)->locale=l_ja;
206        } else {
207            ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
208        }
209        _SCSUReset(cnv, UCNV_RESET_BOTH);
210    } else {
211        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
212    }
213
214    /* Set the substitution character U+fffd as a Unicode string. */
215    cnv->subUChars[0]=0xfffd;
216    cnv->subCharLen=-1;
217}
218
219static void U_CALLCONV
220_SCSUClose(UConverter *cnv) {
221    if(cnv->extraInfo!=nullptr) {
222        if(!cnv->isExtraLocal) {
223            uprv_free(cnv->extraInfo);
224        }
225        cnv->extraInfo=nullptr;
226    }
227}
228
229/* SCSU-to-Unicode conversion functions ------------------------------------- */
230
231static void U_CALLCONV
232_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
233                          UErrorCode *pErrorCode) {
234    UConverter *cnv;
235    SCSUData *scsu;
236    const uint8_t *source, *sourceLimit;
237    char16_t *target;
238    const char16_t *targetLimit;
239    int32_t *offsets;
240    UBool isSingleByteMode;
241    uint8_t state, byteOne;
242    int8_t quoteWindow, dynamicWindow;
243
244    int32_t sourceIndex, nextSourceIndex;
245
246    uint8_t b;
247
248    /* set up the local pointers */
249    cnv=pArgs->converter;
250    scsu=(SCSUData *)cnv->extraInfo;
251
252    source=(const uint8_t *)pArgs->source;
253    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
254    target=pArgs->target;
255    targetLimit=pArgs->targetLimit;
256    offsets=pArgs->offsets;
257
258    /* get the state machine state */
259    isSingleByteMode=scsu->toUIsSingleByteMode;
260    state=scsu->toUState;
261    quoteWindow=scsu->toUQuoteWindow;
262    dynamicWindow=scsu->toUDynamicWindow;
263    byteOne=scsu->toUByteOne;
264
265    /* sourceIndex=-1 if the current character began in the previous buffer */
266    sourceIndex=state==readCommand ? 0 : -1;
267    nextSourceIndex=0;
268
269    /*
270     * conversion "loop"
271     *
272     * For performance, this is not a normal C loop.
273     * Instead, there are two code blocks for the two SCSU modes.
274     * The function branches to either one, and a change of the mode is done with a goto to
275     * the other branch.
276     *
277     * Each branch has two conventional loops:
278     * - a fast-path loop for the most common codes in the mode
279     * - a loop for all other codes in the mode
280     * When the fast-path runs into a code that it cannot handle, its loop ends and it
281     * runs into the following loop to handle the other codes.
282     * The end of the input or output buffer is also handled by the slower loop.
283     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
284     *
285     * The callback handling is done by returning with an error code.
286     * The conversion framework actually calls the callback function.
287     */
288    if(isSingleByteMode) {
289        /* fast path for single-byte mode */
290        if(state==readCommand) {
291fastSingle:
292            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
293                ++source;
294                ++nextSourceIndex;
295                if(b<=0x7f) {
296                    /* write US-ASCII graphic character or DEL */
297                    *target++=(char16_t)b;
298                    if(offsets!=nullptr) {
299                        *offsets++=sourceIndex;
300                    }
301                } else {
302                    /* write from dynamic window */
303                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
304                    if(c<=0xffff) {
305                        *target++=(char16_t)c;
306                        if(offsets!=nullptr) {
307                            *offsets++=sourceIndex;
308                        }
309                    } else {
310                        /* output surrogate pair */
311                        *target++=(char16_t)(0xd7c0+(c>>10));
312                        if(target<targetLimit) {
313                            *target++=(char16_t)(0xdc00|(c&0x3ff));
314                            if(offsets!=nullptr) {
315                                *offsets++=sourceIndex;
316                                *offsets++=sourceIndex;
317                            }
318                        } else {
319                            /* target overflow */
320                            if(offsets!=nullptr) {
321                                *offsets++=sourceIndex;
322                            }
323                            cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
324                            cnv->UCharErrorBufferLength=1;
325                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
326                            goto endloop;
327                        }
328                    }
329                }
330                sourceIndex=nextSourceIndex;
331            }
332        }
333
334        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
335singleByteMode:
336        while(source<sourceLimit) {
337            if(target>=targetLimit) {
338                /* target is full */
339                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
340                break;
341            }
342            b=*source++;
343            ++nextSourceIndex;
344            switch(state) {
345            case readCommand:
346                /* redundant conditions are commented out */
347                /* here: b<0x20 because otherwise we would be in fastSingle */
348                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
349                    /* CR/LF/TAB/NUL */
350                    *target++=(char16_t)b;
351                    if(offsets!=nullptr) {
352                        *offsets++=sourceIndex;
353                    }
354                    sourceIndex=nextSourceIndex;
355                    goto fastSingle;
356                } else if(SC0<=b) {
357                    if(b<=SC7) {
358                        dynamicWindow=(int8_t)(b-SC0);
359                        sourceIndex=nextSourceIndex;
360                        goto fastSingle;
361                    } else /* if(SD0<=b && b<=SD7) */ {
362                        dynamicWindow=(int8_t)(b-SD0);
363                        state=defineOne;
364                    }
365                } else if(/* SQ0<=b && */ b<=SQ7) {
366                    quoteWindow=(int8_t)(b-SQ0);
367                    state=quoteOne;
368                } else if(b==SDX) {
369                    state=definePairOne;
370                } else if(b==SQU) {
371                    state=quotePairOne;
372                } else if(b==SCU) {
373                    sourceIndex=nextSourceIndex;
374                    isSingleByteMode=false;
375                    goto fastUnicode;
376                } else /* Srs */ {
377                    /* callback(illegal) */
378                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
379                    cnv->toUBytes[0]=b;
380                    cnv->toULength=1;
381                    goto endloop;
382                }
383
384                /* store the first byte of a multibyte sequence in toUBytes[] */
385                cnv->toUBytes[0]=b;
386                cnv->toULength=1;
387                break;
388            case quotePairOne:
389                byteOne=b;
390                cnv->toUBytes[1]=b;
391                cnv->toULength=2;
392                state=quotePairTwo;
393                break;
394            case quotePairTwo:
395                *target++=(char16_t)((byteOne<<8)|b);
396                if(offsets!=nullptr) {
397                    *offsets++=sourceIndex;
398                }
399                sourceIndex=nextSourceIndex;
400                state=readCommand;
401                goto fastSingle;
402            case quoteOne:
403                if(b<0x80) {
404                    /* all static offsets are in the BMP */
405                    *target++=(char16_t)(staticOffsets[quoteWindow]+b);
406                    if(offsets!=nullptr) {
407                        *offsets++=sourceIndex;
408                    }
409                } else {
410                    /* write from dynamic window */
411                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
412                    if(c<=0xffff) {
413                        *target++=(char16_t)c;
414                        if(offsets!=nullptr) {
415                            *offsets++=sourceIndex;
416                        }
417                    } else {
418                        /* output surrogate pair */
419                        *target++=(char16_t)(0xd7c0+(c>>10));
420                        if(target<targetLimit) {
421                            *target++=(char16_t)(0xdc00|(c&0x3ff));
422                            if(offsets!=nullptr) {
423                                *offsets++=sourceIndex;
424                                *offsets++=sourceIndex;
425                            }
426                        } else {
427                            /* target overflow */
428                            if(offsets!=nullptr) {
429                                *offsets++=sourceIndex;
430                            }
431                            cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
432                            cnv->UCharErrorBufferLength=1;
433                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
434                            goto endloop;
435                        }
436                    }
437                }
438                sourceIndex=nextSourceIndex;
439                state=readCommand;
440                goto fastSingle;
441            case definePairOne:
442                dynamicWindow=(int8_t)((b>>5)&7);
443                byteOne=(uint8_t)(b&0x1f);
444                cnv->toUBytes[1]=b;
445                cnv->toULength=2;
446                state=definePairTwo;
447                break;
448            case definePairTwo:
449                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
450                sourceIndex=nextSourceIndex;
451                state=readCommand;
452                goto fastSingle;
453            case defineOne:
454                if(b==0) {
455                    /* callback(illegal): Reserved window offset value 0 */
456                    cnv->toUBytes[1]=b;
457                    cnv->toULength=2;
458                    goto endloop;
459                } else if(b<gapThreshold) {
460                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
461                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
462                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
463                } else if(b>=fixedThreshold) {
464                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
465                } else {
466                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
467                    cnv->toUBytes[1]=b;
468                    cnv->toULength=2;
469                    goto endloop;
470                }
471                sourceIndex=nextSourceIndex;
472                state=readCommand;
473                goto fastSingle;
474            }
475        }
476    } else {
477        /* fast path for Unicode mode */
478        if(state==readCommand) {
479fastUnicode:
480            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
481                *target++=(char16_t)((b<<8)|source[1]);
482                if(offsets!=nullptr) {
483                    *offsets++=sourceIndex;
484                }
485                sourceIndex=nextSourceIndex;
486                nextSourceIndex+=2;
487                source+=2;
488            }
489        }
490
491        /* normal state machine for Unicode mode */
492/* unicodeByteMode: */
493        while(source<sourceLimit) {
494            if(target>=targetLimit) {
495                /* target is full */
496                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
497                break;
498            }
499            b=*source++;
500            ++nextSourceIndex;
501            switch(state) {
502            case readCommand:
503                if((uint8_t)(b-UC0)>(Urs-UC0)) {
504                    byteOne=b;
505                    cnv->toUBytes[0]=b;
506                    cnv->toULength=1;
507                    state=quotePairTwo;
508                } else if(/* UC0<=b && */ b<=UC7) {
509                    dynamicWindow=(int8_t)(b-UC0);
510                    sourceIndex=nextSourceIndex;
511                    isSingleByteMode=true;
512                    goto fastSingle;
513                } else if(/* UD0<=b && */ b<=UD7) {
514                    dynamicWindow=(int8_t)(b-UD0);
515                    isSingleByteMode=true;
516                    cnv->toUBytes[0]=b;
517                    cnv->toULength=1;
518                    state=defineOne;
519                    goto singleByteMode;
520                } else if(b==UDX) {
521                    isSingleByteMode=true;
522                    cnv->toUBytes[0]=b;
523                    cnv->toULength=1;
524                    state=definePairOne;
525                    goto singleByteMode;
526                } else if(b==UQU) {
527                    cnv->toUBytes[0]=b;
528                    cnv->toULength=1;
529                    state=quotePairOne;
530                } else /* Urs */ {
531                    /* callback(illegal) */
532                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
533                    cnv->toUBytes[0]=b;
534                    cnv->toULength=1;
535                    goto endloop;
536                }
537                break;
538            case quotePairOne:
539                byteOne=b;
540                cnv->toUBytes[1]=b;
541                cnv->toULength=2;
542                state=quotePairTwo;
543                break;
544            case quotePairTwo:
545                *target++=(char16_t)((byteOne<<8)|b);
546                if(offsets!=nullptr) {
547                    *offsets++=sourceIndex;
548                }
549                sourceIndex=nextSourceIndex;
550                state=readCommand;
551                goto fastUnicode;
552            }
553        }
554    }
555endloop:
556
557    /* set the converter state back into UConverter */
558    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
559        /* reset to deal with the next character */
560        state=readCommand;
561    } else if(state==readCommand) {
562        /* not in a multi-byte sequence, reset toULength */
563        cnv->toULength=0;
564    }
565    scsu->toUIsSingleByteMode=isSingleByteMode;
566    scsu->toUState=state;
567    scsu->toUQuoteWindow=quoteWindow;
568    scsu->toUDynamicWindow=dynamicWindow;
569    scsu->toUByteOne=byteOne;
570
571    /* write back the updated pointers */
572    pArgs->source=(const char *)source;
573    pArgs->target=target;
574    pArgs->offsets=offsets;
575    return;
576}
577
578/*
579 * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
580 * If a change is made in the original function, then either
581 * change this function the same way or
582 * re-copy the original function and remove the variables
583 * offsets, sourceIndex, and nextSourceIndex.
584 */
585static void U_CALLCONV
586_SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
587               UErrorCode *pErrorCode) {
588    UConverter *cnv;
589    SCSUData *scsu;
590    const uint8_t *source, *sourceLimit;
591    char16_t *target;
592    const char16_t *targetLimit;
593    UBool isSingleByteMode;
594    uint8_t state, byteOne;
595    int8_t quoteWindow, dynamicWindow;
596
597    uint8_t b;
598
599    /* set up the local pointers */
600    cnv=pArgs->converter;
601    scsu=(SCSUData *)cnv->extraInfo;
602
603    source=(const uint8_t *)pArgs->source;
604    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
605    target=pArgs->target;
606    targetLimit=pArgs->targetLimit;
607
608    /* get the state machine state */
609    isSingleByteMode=scsu->toUIsSingleByteMode;
610    state=scsu->toUState;
611    quoteWindow=scsu->toUQuoteWindow;
612    dynamicWindow=scsu->toUDynamicWindow;
613    byteOne=scsu->toUByteOne;
614
615    /*
616     * conversion "loop"
617     *
618     * For performance, this is not a normal C loop.
619     * Instead, there are two code blocks for the two SCSU modes.
620     * The function branches to either one, and a change of the mode is done with a goto to
621     * the other branch.
622     *
623     * Each branch has two conventional loops:
624     * - a fast-path loop for the most common codes in the mode
625     * - a loop for all other codes in the mode
626     * When the fast-path runs into a code that it cannot handle, its loop ends and it
627     * runs into the following loop to handle the other codes.
628     * The end of the input or output buffer is also handled by the slower loop.
629     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
630     *
631     * The callback handling is done by returning with an error code.
632     * The conversion framework actually calls the callback function.
633     */
634    if(isSingleByteMode) {
635        /* fast path for single-byte mode */
636        if(state==readCommand) {
637fastSingle:
638            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
639                ++source;
640                if(b<=0x7f) {
641                    /* write US-ASCII graphic character or DEL */
642                    *target++=(char16_t)b;
643                } else {
644                    /* write from dynamic window */
645                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
646                    if(c<=0xffff) {
647                        *target++=(char16_t)c;
648                    } else {
649                        /* output surrogate pair */
650                        *target++=(char16_t)(0xd7c0+(c>>10));
651                        if(target<targetLimit) {
652                            *target++=(char16_t)(0xdc00|(c&0x3ff));
653                        } else {
654                            /* target overflow */
655                            cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
656                            cnv->UCharErrorBufferLength=1;
657                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
658                            goto endloop;
659                        }
660                    }
661                }
662            }
663        }
664
665        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
666singleByteMode:
667        while(source<sourceLimit) {
668            if(target>=targetLimit) {
669                /* target is full */
670                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
671                break;
672            }
673            b=*source++;
674            switch(state) {
675            case readCommand:
676                /* redundant conditions are commented out */
677                /* here: b<0x20 because otherwise we would be in fastSingle */
678                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
679                    /* CR/LF/TAB/NUL */
680                    *target++=(char16_t)b;
681                    goto fastSingle;
682                } else if(SC0<=b) {
683                    if(b<=SC7) {
684                        dynamicWindow=(int8_t)(b-SC0);
685                        goto fastSingle;
686                    } else /* if(SD0<=b && b<=SD7) */ {
687                        dynamicWindow=(int8_t)(b-SD0);
688                        state=defineOne;
689                    }
690                } else if(/* SQ0<=b && */ b<=SQ7) {
691                    quoteWindow=(int8_t)(b-SQ0);
692                    state=quoteOne;
693                } else if(b==SDX) {
694                    state=definePairOne;
695                } else if(b==SQU) {
696                    state=quotePairOne;
697                } else if(b==SCU) {
698                    isSingleByteMode=false;
699                    goto fastUnicode;
700                } else /* Srs */ {
701                    /* callback(illegal) */
702                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
703                    cnv->toUBytes[0]=b;
704                    cnv->toULength=1;
705                    goto endloop;
706                }
707
708                /* store the first byte of a multibyte sequence in toUBytes[] */
709                cnv->toUBytes[0]=b;
710                cnv->toULength=1;
711                break;
712            case quotePairOne:
713                byteOne=b;
714                cnv->toUBytes[1]=b;
715                cnv->toULength=2;
716                state=quotePairTwo;
717                break;
718            case quotePairTwo:
719                *target++=(char16_t)((byteOne<<8)|b);
720                state=readCommand;
721                goto fastSingle;
722            case quoteOne:
723                if(b<0x80) {
724                    /* all static offsets are in the BMP */
725                    *target++=(char16_t)(staticOffsets[quoteWindow]+b);
726                } else {
727                    /* write from dynamic window */
728                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
729                    if(c<=0xffff) {
730                        *target++=(char16_t)c;
731                    } else {
732                        /* output surrogate pair */
733                        *target++=(char16_t)(0xd7c0+(c>>10));
734                        if(target<targetLimit) {
735                            *target++=(char16_t)(0xdc00|(c&0x3ff));
736                        } else {
737                            /* target overflow */
738                            cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
739                            cnv->UCharErrorBufferLength=1;
740                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
741                            goto endloop;
742                        }
743                    }
744                }
745                state=readCommand;
746                goto fastSingle;
747            case definePairOne:
748                dynamicWindow=(int8_t)((b>>5)&7);
749                byteOne=(uint8_t)(b&0x1f);
750                cnv->toUBytes[1]=b;
751                cnv->toULength=2;
752                state=definePairTwo;
753                break;
754            case definePairTwo:
755                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
756                state=readCommand;
757                goto fastSingle;
758            case defineOne:
759                if(b==0) {
760                    /* callback(illegal): Reserved window offset value 0 */
761                    cnv->toUBytes[1]=b;
762                    cnv->toULength=2;
763                    goto endloop;
764                } else if(b<gapThreshold) {
765                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
766                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
767                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
768                } else if(b>=fixedThreshold) {
769                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
770                } else {
771                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
772                    cnv->toUBytes[1]=b;
773                    cnv->toULength=2;
774                    goto endloop;
775                }
776                state=readCommand;
777                goto fastSingle;
778            }
779        }
780    } else {
781        /* fast path for Unicode mode */
782        if(state==readCommand) {
783fastUnicode:
784            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
785                *target++=(char16_t)((b<<8)|source[1]);
786                source+=2;
787            }
788        }
789
790        /* normal state machine for Unicode mode */
791/* unicodeByteMode: */
792        while(source<sourceLimit) {
793            if(target>=targetLimit) {
794                /* target is full */
795                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
796                break;
797            }
798            b=*source++;
799            switch(state) {
800            case readCommand:
801                if((uint8_t)(b-UC0)>(Urs-UC0)) {
802                    byteOne=b;
803                    cnv->toUBytes[0]=b;
804                    cnv->toULength=1;
805                    state=quotePairTwo;
806                } else if(/* UC0<=b && */ b<=UC7) {
807                    dynamicWindow=(int8_t)(b-UC0);
808                    isSingleByteMode=true;
809                    goto fastSingle;
810                } else if(/* UD0<=b && */ b<=UD7) {
811                    dynamicWindow=(int8_t)(b-UD0);
812                    isSingleByteMode=true;
813                    cnv->toUBytes[0]=b;
814                    cnv->toULength=1;
815                    state=defineOne;
816                    goto singleByteMode;
817                } else if(b==UDX) {
818                    isSingleByteMode=true;
819                    cnv->toUBytes[0]=b;
820                    cnv->toULength=1;
821                    state=definePairOne;
822                    goto singleByteMode;
823                } else if(b==UQU) {
824                    cnv->toUBytes[0]=b;
825                    cnv->toULength=1;
826                    state=quotePairOne;
827                } else /* Urs */ {
828                    /* callback(illegal) */
829                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
830                    cnv->toUBytes[0]=b;
831                    cnv->toULength=1;
832                    goto endloop;
833                }
834                break;
835            case quotePairOne:
836                byteOne=b;
837                cnv->toUBytes[1]=b;
838                cnv->toULength=2;
839                state=quotePairTwo;
840                break;
841            case quotePairTwo:
842                *target++=(char16_t)((byteOne<<8)|b);
843                state=readCommand;
844                goto fastUnicode;
845            }
846        }
847    }
848endloop:
849
850    /* set the converter state back into UConverter */
851    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
852        /* reset to deal with the next character */
853        state=readCommand;
854    } else if(state==readCommand) {
855        /* not in a multi-byte sequence, reset toULength */
856        cnv->toULength=0;
857    }
858    scsu->toUIsSingleByteMode=isSingleByteMode;
859    scsu->toUState=state;
860    scsu->toUQuoteWindow=quoteWindow;
861    scsu->toUDynamicWindow=dynamicWindow;
862    scsu->toUByteOne=byteOne;
863
864    /* write back the updated pointers */
865    pArgs->source=(const char *)source;
866    pArgs->target=target;
867    return;
868}
869U_CDECL_END
870/* SCSU-from-Unicode conversion functions ----------------------------------- */
871
872/*
873 * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
874 * reasonable results. The lookahead is minimal.
875 * Many cases are simple:
876 * A character fits directly into the current mode, a dynamic or static window,
877 * or is not compressible. These cases are tested first.
878 * Real compression heuristics are applied to the rest, in code branches for
879 * single/Unicode mode and BMP/supplementary code points.
880 * The heuristics used here are extremely simple.
881 */
882
883/* get the number of the window that this character is in, or -1 */
884static int8_t
885getWindow(const uint32_t offsets[8], uint32_t c) {
886    int i;
887    for(i=0; i<8; ++i) {
888        if((uint32_t)(c-offsets[i])<=0x7f) {
889            return (int8_t)(i);
890        }
891    }
892    return -1;
893}
894
895/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
896static UBool
897isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
898    return (UBool)(c<=offset+0x7f &&
899          (c>=offset || (c<=0x7f &&
900                        (c>=0x20 || (1UL<<c)&0x2601))));
901                                /* binary 0010 0110 0000 0001,
902                                   check for b==0xd || b==0xa || b==9 || b==0 */
903}
904
905/*
906 * getNextDynamicWindow returns the next dynamic window to be redefined
907 */
908static int8_t
909getNextDynamicWindow(SCSUData *scsu) {
910    int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
911    if(++scsu->nextWindowUseIndex==8) {
912        scsu->nextWindowUseIndex=0;
913    }
914    return window;
915}
916
917/*
918 * useDynamicWindow() adjusts
919 * windowUse[] and nextWindowUseIndex for the algorithm to choose
920 * the next dynamic window to be defined;
921 * a subclass may override it and provide its own algorithm.
922 */
923static void
924useDynamicWindow(SCSUData *scsu, int8_t window) {
925    /*
926     * move the existing window, which just became the most recently used one,
927     * up in windowUse[] to nextWindowUseIndex-1
928     */
929
930    /* first, find the index of the window - backwards to favor the more recently used windows */
931    int i, j;
932
933    i=scsu->nextWindowUseIndex;
934    do {
935        if(--i<0) {
936            i=7;
937        }
938    } while(scsu->windowUse[i]!=window);
939
940    /* now copy each windowUse[i+1] to [i] */
941    j=i+1;
942    if(j==8) {
943        j=0;
944    }
945    while(j!=scsu->nextWindowUseIndex) {
946        scsu->windowUse[i]=scsu->windowUse[j];
947        i=j;
948        if(++j==8) { j=0; }
949    }
950
951    /* finally, set the window into the most recently used index */
952    scsu->windowUse[i]=window;
953}
954
955/*
956 * calculate the offset and the code for a dynamic window that contains the character
957 * takes fixed offsets into account
958 * the offset of the window is stored in the offset variable,
959 * the code is returned
960 *
961 * return offset code: -1 none  <=0xff code for SDn/UDn  else code for SDX/UDX, subtract 0x200 to get the true code
962 */
963static int
964getDynamicOffset(uint32_t c, uint32_t *pOffset) {
965    int i;
966
967    for(i=0; i<7; ++i) {
968        if((uint32_t)(c-fixedOffsets[i])<=0x7f) {
969            *pOffset=fixedOffsets[i];
970            return 0xf9+i;
971        }
972    }
973
974    if(c<0x80) {
975        /* No dynamic window for US-ASCII. */
976        return -1;
977    } else if(c<0x3400 ||
978              (uint32_t)(c-0x10000)<(0x14000-0x10000) ||
979              (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000)
980    ) {
981        /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
982        *pOffset=c&0x7fffff80;
983        return (int)(c>>7);
984    } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
985        /* For these characters we need to take the gapOffset into account. */
986        *pOffset=c&0x7fffff80;
987        return (int)((c-gapOffset)>>7);
988    } else {
989        return -1;
990    }
991}
992U_CDECL_BEGIN
993/*
994 * Idea for compression:
995 *  - save SCSUData and other state before really starting work
996 *  - at endloop, see if compression could be better with just unicode mode
997 *  - don't do this if a callback has been called
998 *  - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
999 *  - different buffer handling!
1000 *
1001 * Drawback or need for corrective handling:
1002 * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
1003 * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
1004 * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
1005 *
1006 * How to achieve both?
1007 *  - Only replace the result after an SDX or SCU?
1008 */
1009
1010static void U_CALLCONV
1011_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
1012                            UErrorCode *pErrorCode) {
1013    UConverter *cnv;
1014    SCSUData *scsu;
1015    const char16_t *source, *sourceLimit;
1016    uint8_t *target;
1017    int32_t targetCapacity;
1018    int32_t *offsets;
1019
1020    UBool isSingleByteMode;
1021    uint8_t dynamicWindow;
1022    uint32_t currentOffset;
1023
1024    uint32_t c, delta;
1025
1026    int32_t sourceIndex, nextSourceIndex;
1027
1028    int32_t length;
1029
1030    /* variables for compression heuristics */
1031    uint32_t offset;
1032    char16_t lead, trail;
1033    int code;
1034    int8_t window;
1035
1036    /* set up the local pointers */
1037    cnv=pArgs->converter;
1038    scsu=(SCSUData *)cnv->extraInfo;
1039
1040    /* set up the local pointers */
1041    source=pArgs->source;
1042    sourceLimit=pArgs->sourceLimit;
1043    target=(uint8_t *)pArgs->target;
1044    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1045    offsets=pArgs->offsets;
1046
1047    /* get the state machine state */
1048    isSingleByteMode=scsu->fromUIsSingleByteMode;
1049    dynamicWindow=scsu->fromUDynamicWindow;
1050    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1051
1052    c=cnv->fromUChar32;
1053
1054    /* sourceIndex=-1 if the current character began in the previous buffer */
1055    sourceIndex= c==0 ? 0 : -1;
1056    nextSourceIndex=0;
1057
1058    /* similar conversion "loop" as in toUnicode */
1059loop:
1060    if(isSingleByteMode) {
1061        if(c!=0 && targetCapacity>0) {
1062            goto getTrailSingle;
1063        }
1064
1065        /* state machine for single-byte mode */
1066/* singleByteMode: */
1067        while(source<sourceLimit) {
1068            if(targetCapacity<=0) {
1069                /* target is full */
1070                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1071                break;
1072            }
1073            c=*source++;
1074            ++nextSourceIndex;
1075
1076            if((c-0x20)<=0x5f) {
1077                /* pass US-ASCII graphic character through */
1078                *target++=(uint8_t)c;
1079                if(offsets!=nullptr) {
1080                    *offsets++=sourceIndex;
1081                }
1082                --targetCapacity;
1083            } else if(c<0x20) {
1084                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1085                    /* CR/LF/TAB/NUL */
1086                    *target++=(uint8_t)c;
1087                    if(offsets!=nullptr) {
1088                        *offsets++=sourceIndex;
1089                    }
1090                    --targetCapacity;
1091                } else {
1092                    /* quote C0 control character */
1093                    c|=SQ0<<8;
1094                    length=2;
1095                    goto outputBytes;
1096                }
1097            } else if((delta=c-currentOffset)<=0x7f) {
1098                /* use the current dynamic window */
1099                *target++=(uint8_t)(delta|0x80);
1100                if(offsets!=nullptr) {
1101                    *offsets++=sourceIndex;
1102                }
1103                --targetCapacity;
1104            } else if(U16_IS_SURROGATE(c)) {
1105                if(U16_IS_SURROGATE_LEAD(c)) {
1106getTrailSingle:
1107                    lead=(char16_t)c;
1108                    if(source<sourceLimit) {
1109                        /* test the following code unit */
1110                        trail=*source;
1111                        if(U16_IS_TRAIL(trail)) {
1112                            ++source;
1113                            ++nextSourceIndex;
1114                            c=U16_GET_SUPPLEMENTARY(c, trail);
1115                            /* convert this surrogate code point */
1116                            /* exit this condition tree */
1117                        } else {
1118                            /* this is an unmatched lead code unit (1st surrogate) */
1119                            /* callback(illegal) */
1120                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1121                            goto endloop;
1122                        }
1123                    } else {
1124                        /* no more input */
1125                        break;
1126                    }
1127                } else {
1128                    /* this is an unmatched trail code unit (2nd surrogate) */
1129                    /* callback(illegal) */
1130                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1131                    goto endloop;
1132                }
1133
1134                /* compress supplementary character U+10000..U+10ffff */
1135                if((delta=c-currentOffset)<=0x7f) {
1136                    /* use the current dynamic window */
1137                    *target++=(uint8_t)(delta|0x80);
1138                    if(offsets!=nullptr) {
1139                        *offsets++=sourceIndex;
1140                    }
1141                    --targetCapacity;
1142                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1143                    /* there is a dynamic window that contains this character, change to it */
1144                    dynamicWindow=window;
1145                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1146                    useDynamicWindow(scsu, dynamicWindow);
1147                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1148                    length=2;
1149                    goto outputBytes;
1150                } else if((code=getDynamicOffset(c, &offset))>=0) {
1151                    /* might check if there are more characters in this window to come */
1152                    /* define an extended window with this character */
1153                    code-=0x200;
1154                    dynamicWindow=getNextDynamicWindow(scsu);
1155                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1156                    useDynamicWindow(scsu, dynamicWindow);
1157                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1158                    length=4;
1159                    goto outputBytes;
1160                } else {
1161                    /* change to Unicode mode and output this (lead, trail) pair */
1162                    isSingleByteMode=false;
1163                    *target++=(uint8_t)SCU;
1164                    if(offsets!=nullptr) {
1165                        *offsets++=sourceIndex;
1166                    }
1167                    --targetCapacity;
1168                    c=((uint32_t)lead<<16)|trail;
1169                    length=4;
1170                    goto outputBytes;
1171                }
1172            } else if(c<0xa0) {
1173                /* quote C1 control character */
1174                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1175                length=2;
1176                goto outputBytes;
1177            } else if(c==0xfeff || c>=0xfff0) {
1178                /* quote signature character=byte order mark and specials */
1179                c|=SQU<<16;
1180                length=3;
1181                goto outputBytes;
1182            } else {
1183                /* compress all other BMP characters */
1184                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1185                    /* there is a window defined that contains this character - switch to it or quote from it? */
1186                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1187                        /* change to dynamic window */
1188                        dynamicWindow=window;
1189                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1190                        useDynamicWindow(scsu, dynamicWindow);
1191                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1192                        length=2;
1193                        goto outputBytes;
1194                    } else {
1195                        /* quote from dynamic window */
1196                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1197                        length=2;
1198                        goto outputBytes;
1199                    }
1200                } else if((window=getWindow(staticOffsets, c))>=0) {
1201                    /* quote from static window */
1202                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1203                    length=2;
1204                    goto outputBytes;
1205                } else if((code=getDynamicOffset(c, &offset))>=0) {
1206                    /* define a dynamic window with this character */
1207                    dynamicWindow=getNextDynamicWindow(scsu);
1208                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1209                    useDynamicWindow(scsu, dynamicWindow);
1210                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1211                    length=3;
1212                    goto outputBytes;
1213                } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
1214                          (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1215                ) {
1216                    /*
1217                     * this character is not compressible (a BMP ideograph or similar);
1218                     * switch to Unicode mode if this is the last character in the block
1219                     * or there is at least one more ideograph following immediately
1220                     */
1221                    isSingleByteMode=false;
1222                    c|=SCU<<16;
1223                    length=3;
1224                    goto outputBytes;
1225                } else {
1226                    /* quote Unicode */
1227                    c|=SQU<<16;
1228                    length=3;
1229                    goto outputBytes;
1230                }
1231            }
1232
1233            /* normal end of conversion: prepare for a new character */
1234            c=0;
1235            sourceIndex=nextSourceIndex;
1236        }
1237    } else {
1238        if(c!=0 && targetCapacity>0) {
1239            goto getTrailUnicode;
1240        }
1241
1242        /* state machine for Unicode mode */
1243/* unicodeByteMode: */
1244        while(source<sourceLimit) {
1245            if(targetCapacity<=0) {
1246                /* target is full */
1247                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1248                break;
1249            }
1250            c=*source++;
1251            ++nextSourceIndex;
1252
1253            if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
1254                /* not compressible, write character directly */
1255                if(targetCapacity>=2) {
1256                    *target++=(uint8_t)(c>>8);
1257                    *target++=(uint8_t)c;
1258                    if(offsets!=nullptr) {
1259                        *offsets++=sourceIndex;
1260                        *offsets++=sourceIndex;
1261                    }
1262                    targetCapacity-=2;
1263                } else {
1264                    length=2;
1265                    goto outputBytes;
1266                }
1267            } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
1268                /* compress BMP character if the following one is not an uncompressible ideograph */
1269                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1270                    if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
1271                        /* ASCII digit or letter */
1272                        isSingleByteMode=true;
1273                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1274                        length=2;
1275                        goto outputBytes;
1276                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1277                        /* there is a dynamic window that contains this character, change to it */
1278                        isSingleByteMode=true;
1279                        dynamicWindow=window;
1280                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1281                        useDynamicWindow(scsu, dynamicWindow);
1282                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1283                        length=2;
1284                        goto outputBytes;
1285                    } else if((code=getDynamicOffset(c, &offset))>=0) {
1286                        /* define a dynamic window with this character */
1287                        isSingleByteMode=true;
1288                        dynamicWindow=getNextDynamicWindow(scsu);
1289                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1290                        useDynamicWindow(scsu, dynamicWindow);
1291                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1292                        length=3;
1293                        goto outputBytes;
1294                    }
1295                }
1296
1297                /* don't know how to compress this character, just write it directly */
1298                length=2;
1299                goto outputBytes;
1300            } else if(c<0xe000) {
1301                /* c is a surrogate */
1302                if(U16_IS_SURROGATE_LEAD(c)) {
1303getTrailUnicode:
1304                    lead=(char16_t)c;
1305                    if(source<sourceLimit) {
1306                        /* test the following code unit */
1307                        trail=*source;
1308                        if(U16_IS_TRAIL(trail)) {
1309                            ++source;
1310                            ++nextSourceIndex;
1311                            c=U16_GET_SUPPLEMENTARY(c, trail);
1312                            /* convert this surrogate code point */
1313                            /* exit this condition tree */
1314                        } else {
1315                            /* this is an unmatched lead code unit (1st surrogate) */
1316                            /* callback(illegal) */
1317                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1318                            goto endloop;
1319                        }
1320                    } else {
1321                        /* no more input */
1322                        break;
1323                    }
1324                } else {
1325                    /* this is an unmatched trail code unit (2nd surrogate) */
1326                    /* callback(illegal) */
1327                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1328                    goto endloop;
1329                }
1330
1331                /* compress supplementary character */
1332                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1333                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1334                ) {
1335                    /*
1336                     * there is a dynamic window that contains this character and
1337                     * the following character is not uncompressible,
1338                     * change to the window
1339                     */
1340                    isSingleByteMode=true;
1341                    dynamicWindow=window;
1342                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1343                    useDynamicWindow(scsu, dynamicWindow);
1344                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1345                    length=2;
1346                    goto outputBytes;
1347                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1348                          (code=getDynamicOffset(c, &offset))>=0
1349                ) {
1350                    /* two supplementary characters in (probably) the same window - define an extended one */
1351                    isSingleByteMode=true;
1352                    code-=0x200;
1353                    dynamicWindow=getNextDynamicWindow(scsu);
1354                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1355                    useDynamicWindow(scsu, dynamicWindow);
1356                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1357                    length=4;
1358                    goto outputBytes;
1359                } else {
1360                    /* don't know how to compress this character, just write it directly */
1361                    c=((uint32_t)lead<<16)|trail;
1362                    length=4;
1363                    goto outputBytes;
1364                }
1365            } else /* 0xe000<=c<0xf300 */ {
1366                /* quote to avoid SCSU tags */
1367                c|=UQU<<16;
1368                length=3;
1369                goto outputBytes;
1370            }
1371
1372            /* normal end of conversion: prepare for a new character */
1373            c=0;
1374            sourceIndex=nextSourceIndex;
1375        }
1376    }
1377endloop:
1378
1379    /* set the converter state back into UConverter */
1380    scsu->fromUIsSingleByteMode=isSingleByteMode;
1381    scsu->fromUDynamicWindow=dynamicWindow;
1382
1383    cnv->fromUChar32=c;
1384
1385    /* write back the updated pointers */
1386    pArgs->source=source;
1387    pArgs->target=(char *)target;
1388    pArgs->offsets=offsets;
1389    return;
1390
1391outputBytes:
1392    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1393    /* from the first if in the loop we know that targetCapacity>0 */
1394    if(length<=targetCapacity) {
1395        if(offsets==nullptr) {
1396            switch(length) {
1397                /* each branch falls through to the next one */
1398            case 4:
1399                *target++=(uint8_t)(c>>24);
1400                U_FALLTHROUGH;
1401            case 3:
1402                *target++=(uint8_t)(c>>16);
1403                U_FALLTHROUGH;
1404            case 2:
1405                *target++=(uint8_t)(c>>8);
1406                U_FALLTHROUGH;
1407            case 1:
1408                *target++=(uint8_t)c;
1409                U_FALLTHROUGH;
1410            default:
1411                /* will never occur */
1412                break;
1413            }
1414        } else {
1415            switch(length) {
1416                /* each branch falls through to the next one */
1417            case 4:
1418                *target++=(uint8_t)(c>>24);
1419                *offsets++=sourceIndex;
1420                U_FALLTHROUGH;
1421            case 3:
1422                *target++=(uint8_t)(c>>16);
1423                *offsets++=sourceIndex;
1424                U_FALLTHROUGH;
1425            case 2:
1426                *target++=(uint8_t)(c>>8);
1427                *offsets++=sourceIndex;
1428                U_FALLTHROUGH;
1429            case 1:
1430                *target++=(uint8_t)c;
1431                *offsets++=sourceIndex;
1432                U_FALLTHROUGH;
1433            default:
1434                /* will never occur */
1435                break;
1436            }
1437        }
1438        targetCapacity-=length;
1439
1440        /* normal end of conversion: prepare for a new character */
1441        c=0;
1442        sourceIndex=nextSourceIndex;
1443        goto loop;
1444    } else {
1445        uint8_t *p;
1446
1447        /*
1448         * We actually do this backwards here:
1449         * In order to save an intermediate variable, we output
1450         * first to the overflow buffer what does not fit into the
1451         * regular target.
1452         */
1453        /* we know that 0<=targetCapacity<length<=4 */
1454        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
1455        length-=targetCapacity;
1456        p=(uint8_t *)cnv->charErrorBuffer;
1457        switch(length) {
1458            /* each branch falls through to the next one */
1459        case 4:
1460            *p++=(uint8_t)(c>>24);
1461            U_FALLTHROUGH;
1462        case 3:
1463            *p++=(uint8_t)(c>>16);
1464            U_FALLTHROUGH;
1465        case 2:
1466            *p++=(uint8_t)(c>>8);
1467            U_FALLTHROUGH;
1468        case 1:
1469            *p=(uint8_t)c;
1470            U_FALLTHROUGH;
1471        default:
1472            /* will never occur */
1473            break;
1474        }
1475        cnv->charErrorBufferLength=(int8_t)length;
1476
1477        /* now output what fits into the regular target */
1478        c>>=8*length; /* length was reduced by targetCapacity */
1479        switch(targetCapacity) {
1480            /* each branch falls through to the next one */
1481        case 3:
1482            *target++=(uint8_t)(c>>16);
1483            if(offsets!=nullptr) {
1484                *offsets++=sourceIndex;
1485            }
1486            U_FALLTHROUGH;
1487        case 2:
1488            *target++=(uint8_t)(c>>8);
1489            if(offsets!=nullptr) {
1490                *offsets++=sourceIndex;
1491            }
1492            U_FALLTHROUGH;
1493        case 1:
1494            *target++=(uint8_t)c;
1495            if(offsets!=nullptr) {
1496                *offsets++=sourceIndex;
1497            }
1498            U_FALLTHROUGH;
1499        default:
1500            break;
1501        }
1502
1503        /* target overflow */
1504        targetCapacity=0;
1505        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1506        c=0;
1507        goto endloop;
1508    }
1509}
1510
1511/*
1512 * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
1513 * If a change is made in the original function, then either
1514 * change this function the same way or
1515 * re-copy the original function and remove the variables
1516 * offsets, sourceIndex, and nextSourceIndex.
1517 */
1518static void U_CALLCONV
1519_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
1520                 UErrorCode *pErrorCode) {
1521    UConverter *cnv;
1522    SCSUData *scsu;
1523    const char16_t *source, *sourceLimit;
1524    uint8_t *target;
1525    int32_t targetCapacity;
1526
1527    UBool isSingleByteMode;
1528    uint8_t dynamicWindow;
1529    uint32_t currentOffset;
1530
1531    uint32_t c, delta;
1532
1533    int32_t length;
1534
1535    /* variables for compression heuristics */
1536    uint32_t offset;
1537    char16_t lead, trail;
1538    int code;
1539    int8_t window;
1540
1541    /* set up the local pointers */
1542    cnv=pArgs->converter;
1543    scsu=(SCSUData *)cnv->extraInfo;
1544
1545    /* set up the local pointers */
1546    source=pArgs->source;
1547    sourceLimit=pArgs->sourceLimit;
1548    target=(uint8_t *)pArgs->target;
1549    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1550
1551    /* get the state machine state */
1552    isSingleByteMode=scsu->fromUIsSingleByteMode;
1553    dynamicWindow=scsu->fromUDynamicWindow;
1554    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1555
1556    c=cnv->fromUChar32;
1557
1558    /* similar conversion "loop" as in toUnicode */
1559loop:
1560    if(isSingleByteMode) {
1561        if(c!=0 && targetCapacity>0) {
1562            goto getTrailSingle;
1563        }
1564
1565        /* state machine for single-byte mode */
1566/* singleByteMode: */
1567        while(source<sourceLimit) {
1568            if(targetCapacity<=0) {
1569                /* target is full */
1570                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1571                break;
1572            }
1573            c=*source++;
1574
1575            if((c-0x20)<=0x5f) {
1576                /* pass US-ASCII graphic character through */
1577                *target++=(uint8_t)c;
1578                --targetCapacity;
1579            } else if(c<0x20) {
1580                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1581                    /* CR/LF/TAB/NUL */
1582                    *target++=(uint8_t)c;
1583                    --targetCapacity;
1584                } else {
1585                    /* quote C0 control character */
1586                    c|=SQ0<<8;
1587                    length=2;
1588                    goto outputBytes;
1589                }
1590            } else if((delta=c-currentOffset)<=0x7f) {
1591                /* use the current dynamic window */
1592                *target++=(uint8_t)(delta|0x80);
1593                --targetCapacity;
1594            } else if(U16_IS_SURROGATE(c)) {
1595                if(U16_IS_SURROGATE_LEAD(c)) {
1596getTrailSingle:
1597                    lead=(char16_t)c;
1598                    if(source<sourceLimit) {
1599                        /* test the following code unit */
1600                        trail=*source;
1601                        if(U16_IS_TRAIL(trail)) {
1602                            ++source;
1603                            c=U16_GET_SUPPLEMENTARY(c, trail);
1604                            /* convert this surrogate code point */
1605                            /* exit this condition tree */
1606                        } else {
1607                            /* this is an unmatched lead code unit (1st surrogate) */
1608                            /* callback(illegal) */
1609                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1610                            goto endloop;
1611                        }
1612                    } else {
1613                        /* no more input */
1614                        break;
1615                    }
1616                } else {
1617                    /* this is an unmatched trail code unit (2nd surrogate) */
1618                    /* callback(illegal) */
1619                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1620                    goto endloop;
1621                }
1622
1623                /* compress supplementary character U+10000..U+10ffff */
1624                if((delta=c-currentOffset)<=0x7f) {
1625                    /* use the current dynamic window */
1626                    *target++=(uint8_t)(delta|0x80);
1627                    --targetCapacity;
1628                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1629                    /* there is a dynamic window that contains this character, change to it */
1630                    dynamicWindow=window;
1631                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1632                    useDynamicWindow(scsu, dynamicWindow);
1633                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1634                    length=2;
1635                    goto outputBytes;
1636                } else if((code=getDynamicOffset(c, &offset))>=0) {
1637                    /* might check if there are more characters in this window to come */
1638                    /* define an extended window with this character */
1639                    code-=0x200;
1640                    dynamicWindow=getNextDynamicWindow(scsu);
1641                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1642                    useDynamicWindow(scsu, dynamicWindow);
1643                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1644                    length=4;
1645                    goto outputBytes;
1646                } else {
1647                    /* change to Unicode mode and output this (lead, trail) pair */
1648                    isSingleByteMode=false;
1649                    *target++=(uint8_t)SCU;
1650                    --targetCapacity;
1651                    c=((uint32_t)lead<<16)|trail;
1652                    length=4;
1653                    goto outputBytes;
1654                }
1655            } else if(c<0xa0) {
1656                /* quote C1 control character */
1657                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1658                length=2;
1659                goto outputBytes;
1660            } else if(c==0xfeff || c>=0xfff0) {
1661                /* quote signature character=byte order mark and specials */
1662                c|=SQU<<16;
1663                length=3;
1664                goto outputBytes;
1665            } else {
1666                /* compress all other BMP characters */
1667                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1668                    /* there is a window defined that contains this character - switch to it or quote from it? */
1669                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1670                        /* change to dynamic window */
1671                        dynamicWindow=window;
1672                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1673                        useDynamicWindow(scsu, dynamicWindow);
1674                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1675                        length=2;
1676                        goto outputBytes;
1677                    } else {
1678                        /* quote from dynamic window */
1679                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1680                        length=2;
1681                        goto outputBytes;
1682                    }
1683                } else if((window=getWindow(staticOffsets, c))>=0) {
1684                    /* quote from static window */
1685                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1686                    length=2;
1687                    goto outputBytes;
1688                } else if((code=getDynamicOffset(c, &offset))>=0) {
1689                    /* define a dynamic window with this character */
1690                    dynamicWindow=getNextDynamicWindow(scsu);
1691                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1692                    useDynamicWindow(scsu, dynamicWindow);
1693                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1694                    length=3;
1695                    goto outputBytes;
1696                } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
1697                          (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1698                ) {
1699                    /*
1700                     * this character is not compressible (a BMP ideograph or similar);
1701                     * switch to Unicode mode if this is the last character in the block
1702                     * or there is at least one more ideograph following immediately
1703                     */
1704                    isSingleByteMode=false;
1705                    c|=SCU<<16;
1706                    length=3;
1707                    goto outputBytes;
1708                } else {
1709                    /* quote Unicode */
1710                    c|=SQU<<16;
1711                    length=3;
1712                    goto outputBytes;
1713                }
1714            }
1715
1716            /* normal end of conversion: prepare for a new character */
1717            c=0;
1718        }
1719    } else {
1720        if(c!=0 && targetCapacity>0) {
1721            goto getTrailUnicode;
1722        }
1723
1724        /* state machine for Unicode mode */
1725/* unicodeByteMode: */
1726        while(source<sourceLimit) {
1727            if(targetCapacity<=0) {
1728                /* target is full */
1729                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1730                break;
1731            }
1732            c=*source++;
1733
1734            if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
1735                /* not compressible, write character directly */
1736                if(targetCapacity>=2) {
1737                    *target++=(uint8_t)(c>>8);
1738                    *target++=(uint8_t)c;
1739                    targetCapacity-=2;
1740                } else {
1741                    length=2;
1742                    goto outputBytes;
1743                }
1744            } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
1745                /* compress BMP character if the following one is not an uncompressible ideograph */
1746                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1747                    if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
1748                        /* ASCII digit or letter */
1749                        isSingleByteMode=true;
1750                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1751                        length=2;
1752                        goto outputBytes;
1753                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1754                        /* there is a dynamic window that contains this character, change to it */
1755                        isSingleByteMode=true;
1756                        dynamicWindow=window;
1757                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1758                        useDynamicWindow(scsu, dynamicWindow);
1759                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1760                        length=2;
1761                        goto outputBytes;
1762                    } else if((code=getDynamicOffset(c, &offset))>=0) {
1763                        /* define a dynamic window with this character */
1764                        isSingleByteMode=true;
1765                        dynamicWindow=getNextDynamicWindow(scsu);
1766                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1767                        useDynamicWindow(scsu, dynamicWindow);
1768                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1769                        length=3;
1770                        goto outputBytes;
1771                    }
1772                }
1773
1774                /* don't know how to compress this character, just write it directly */
1775                length=2;
1776                goto outputBytes;
1777            } else if(c<0xe000) {
1778                /* c is a surrogate */
1779                if(U16_IS_SURROGATE_LEAD(c)) {
1780getTrailUnicode:
1781                    lead=(char16_t)c;
1782                    if(source<sourceLimit) {
1783                        /* test the following code unit */
1784                        trail=*source;
1785                        if(U16_IS_TRAIL(trail)) {
1786                            ++source;
1787                            c=U16_GET_SUPPLEMENTARY(c, trail);
1788                            /* convert this surrogate code point */
1789                            /* exit this condition tree */
1790                        } else {
1791                            /* this is an unmatched lead code unit (1st surrogate) */
1792                            /* callback(illegal) */
1793                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1794                            goto endloop;
1795                        }
1796                    } else {
1797                        /* no more input */
1798                        break;
1799                    }
1800                } else {
1801                    /* this is an unmatched trail code unit (2nd surrogate) */
1802                    /* callback(illegal) */
1803                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1804                    goto endloop;
1805                }
1806
1807                /* compress supplementary character */
1808                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1809                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1810                ) {
1811                    /*
1812                     * there is a dynamic window that contains this character and
1813                     * the following character is not uncompressible,
1814                     * change to the window
1815                     */
1816                    isSingleByteMode=true;
1817                    dynamicWindow=window;
1818                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1819                    useDynamicWindow(scsu, dynamicWindow);
1820                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1821                    length=2;
1822                    goto outputBytes;
1823                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1824                          (code=getDynamicOffset(c, &offset))>=0
1825                ) {
1826                    /* two supplementary characters in (probably) the same window - define an extended one */
1827                    isSingleByteMode=true;
1828                    code-=0x200;
1829                    dynamicWindow=getNextDynamicWindow(scsu);
1830                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1831                    useDynamicWindow(scsu, dynamicWindow);
1832                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1833                    length=4;
1834                    goto outputBytes;
1835                } else {
1836                    /* don't know how to compress this character, just write it directly */
1837                    c=((uint32_t)lead<<16)|trail;
1838                    length=4;
1839                    goto outputBytes;
1840                }
1841            } else /* 0xe000<=c<0xf300 */ {
1842                /* quote to avoid SCSU tags */
1843                c|=UQU<<16;
1844                length=3;
1845                goto outputBytes;
1846            }
1847
1848            /* normal end of conversion: prepare for a new character */
1849            c=0;
1850        }
1851    }
1852endloop:
1853
1854    /* set the converter state back into UConverter */
1855    scsu->fromUIsSingleByteMode=isSingleByteMode;
1856    scsu->fromUDynamicWindow=dynamicWindow;
1857
1858    cnv->fromUChar32=c;
1859
1860    /* write back the updated pointers */
1861    pArgs->source=source;
1862    pArgs->target=(char *)target;
1863    return;
1864
1865outputBytes:
1866    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1867    /* from the first if in the loop we know that targetCapacity>0 */
1868    if(length<=targetCapacity) {
1869        switch(length) {
1870            /* each branch falls through to the next one */
1871        case 4:
1872            *target++=(uint8_t)(c>>24);
1873            U_FALLTHROUGH;
1874        case 3:
1875            *target++=(uint8_t)(c>>16);
1876            U_FALLTHROUGH;
1877        case 2:
1878            *target++=(uint8_t)(c>>8);
1879            U_FALLTHROUGH;
1880        case 1:
1881            *target++=(uint8_t)c;
1882            U_FALLTHROUGH;
1883        default:
1884            /* will never occur */
1885            break;
1886        }
1887        targetCapacity-=length;
1888
1889        /* normal end of conversion: prepare for a new character */
1890        c=0;
1891        goto loop;
1892    } else {
1893        uint8_t *p;
1894
1895        /*
1896         * We actually do this backwards here:
1897         * In order to save an intermediate variable, we output
1898         * first to the overflow buffer what does not fit into the
1899         * regular target.
1900         */
1901        /* we know that 0<=targetCapacity<length<=4 */
1902        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
1903        length-=targetCapacity;
1904        p=(uint8_t *)cnv->charErrorBuffer;
1905        switch(length) {
1906            /* each branch falls through to the next one */
1907        case 4:
1908            *p++=(uint8_t)(c>>24);
1909            U_FALLTHROUGH;
1910        case 3:
1911            *p++=(uint8_t)(c>>16);
1912            U_FALLTHROUGH;
1913        case 2:
1914            *p++=(uint8_t)(c>>8);
1915            U_FALLTHROUGH;
1916        case 1:
1917            *p=(uint8_t)c;
1918            U_FALLTHROUGH;
1919        default:
1920            /* will never occur */
1921            break;
1922        }
1923        cnv->charErrorBufferLength=(int8_t)length;
1924
1925        /* now output what fits into the regular target */
1926        c = (length == 4) ? 0 : c >> 8*length; /* length was reduced by targetCapacity */
1927        switch(targetCapacity) {
1928            /* each branch falls through to the next one */
1929        case 3:
1930            *target++=(uint8_t)(c>>16);
1931            U_FALLTHROUGH;
1932        case 2:
1933            *target++=(uint8_t)(c>>8);
1934            U_FALLTHROUGH;
1935        case 1:
1936            *target++=(uint8_t)c;
1937            U_FALLTHROUGH;
1938        default:
1939            break;
1940        }
1941
1942        /* target overflow */
1943        targetCapacity=0;
1944        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1945        c=0;
1946        goto endloop;
1947    }
1948}
1949
1950/* miscellaneous ------------------------------------------------------------ */
1951
1952static const char *  U_CALLCONV
1953_SCSUGetName(const UConverter *cnv) {
1954    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
1955
1956    switch(scsu->locale) {
1957    case l_ja:
1958        return "SCSU,locale=ja";
1959    default:
1960        return "SCSU";
1961    }
1962}
1963
1964/* structure for SafeClone calculations */
1965struct cloneSCSUStruct
1966{
1967    UConverter cnv;
1968    SCSUData mydata;
1969};
1970
1971static UConverter *  U_CALLCONV
1972_SCSUSafeClone(const UConverter *cnv,
1973               void *stackBuffer,
1974               int32_t *pBufferSize,
1975               UErrorCode *status)
1976{
1977    struct cloneSCSUStruct * localClone;
1978    int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
1979
1980    if (U_FAILURE(*status)){
1981        return 0;
1982    }
1983
1984    if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
1985        *pBufferSize = bufferSizeNeeded;
1986        return 0;
1987    }
1988
1989    localClone = (struct cloneSCSUStruct *)stackBuffer;
1990    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
1991
1992    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
1993    localClone->cnv.extraInfo = &localClone->mydata;
1994    localClone->cnv.isExtraLocal = true;
1995
1996    return &localClone->cnv;
1997}
1998U_CDECL_END
1999
2000static const UConverterImpl _SCSUImpl={
2001    UCNV_SCSU,
2002
2003    nullptr,
2004    nullptr,
2005
2006    _SCSUOpen,
2007    _SCSUClose,
2008    _SCSUReset,
2009
2010    _SCSUToUnicode,
2011    _SCSUToUnicodeWithOffsets,
2012    _SCSUFromUnicode,
2013    _SCSUFromUnicodeWithOffsets,
2014    nullptr,
2015
2016    nullptr,
2017    _SCSUGetName,
2018    nullptr,
2019    _SCSUSafeClone,
2020    ucnv_getCompleteUnicodeSet,
2021    nullptr,
2022    nullptr
2023};
2024
2025static const UConverterStaticData _SCSUStaticData={
2026    sizeof(UConverterStaticData),
2027    "SCSU",
2028    1212, /* CCSID for SCSU */
2029    UCNV_IBM, UCNV_SCSU,
2030    1, 3, /* one char16_t generates at least 1 byte and at most 3 bytes */
2031    /*
2032     * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
2033     * substitution string.
2034     */
2035    { 0x0e, 0xff, 0xfd, 0 }, 3,
2036    false, false,
2037    0,
2038    0,
2039    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
2040};
2041
2042const UConverterSharedData _SCSUData=
2043        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl);
2044
2045#endif
2046