1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*
6*   Copyright (C) 2000-2015, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9******************************************************************************
10*   file name:  ubidiwrt.c
11*   encoding:   UTF-8
12*   tab size:   8 (not used)
13*   indentation:4
14*
15*   created on: 1999aug06
16*   created by: Markus W. Scherer, updated by Matitiahu Allouche
17*
18* This file contains implementations for BiDi functions that use
19* the core algorithm and core API to write reordered text.
20*/
21
22#include "unicode/utypes.h"
23#include "unicode/ustring.h"
24#include "unicode/uchar.h"
25#include "unicode/ubidi.h"
26#include "unicode/utf16.h"
27#include "cmemory.h"
28#include "ustr_imp.h"
29#include "ubidiimp.h"
30
31/*
32 * The function implementations in this file are designed
33 * for UTF-16 and UTF-32, not for UTF-8.
34 *
35 * Assumptions that are not true for UTF-8:
36 * - Any code point always needs the same number of code units
37 *   ("minimum-length-problem" of UTF-8)
38 * - The BiDi control characters need only one code unit each
39 *
40 * Further assumptions for all UTFs:
41 * - u_charMirror(c) needs the same number of code units as c
42 */
43#if defined(UTF_SIZE) && UTF_SIZE==8
44# error reimplement ubidi_writeReordered() for UTF-8, see comment above
45#endif
46
47#define IS_COMBINING(type) ((1UL<<(type))&(1UL<<U_NON_SPACING_MARK|1UL<<U_COMBINING_SPACING_MARK|1UL<<U_ENCLOSING_MARK))
48
49/*
50 * When we have UBIDI_OUTPUT_REVERSE set on ubidi_writeReordered(), then we
51 * semantically write RTL runs in reverse and later reverse them again.
52 * Instead, we actually write them in forward order to begin with.
53 * However, if the RTL run was to be mirrored, we need to mirror here now
54 * since the implicit second reversal must not do it.
55 * It looks strange to do mirroring in LTR output, but it is only because
56 * we are writing RTL output in reverse.
57 */
58static int32_t
59doWriteForward(const char16_t *src, int32_t srcLength,
60               char16_t *dest, int32_t destSize,
61               uint16_t options,
62               UErrorCode *pErrorCode) {
63    /* optimize for several combinations of options */
64    switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING)) {
65    case 0: {
66        /* simply copy the LTR run to the destination */
67        int32_t length=srcLength;
68        if(destSize<length) {
69            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
70            return srcLength;
71        }
72        do {
73            *dest++=*src++;
74        } while(--length>0);
75        return srcLength;
76    }
77    case UBIDI_DO_MIRRORING: {
78        /* do mirroring */
79        int32_t i=0, j=0;
80        UChar32 c;
81
82        if(destSize<srcLength) {
83            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
84            return srcLength;
85        }
86        do {
87            U16_NEXT(src, i, srcLength, c);
88            c=u_charMirror(c);
89            U16_APPEND_UNSAFE(dest, j, c);
90        } while(i<srcLength);
91        return srcLength;
92    }
93    case UBIDI_REMOVE_BIDI_CONTROLS: {
94        /* copy the LTR run and remove any BiDi control characters */
95        int32_t remaining=destSize;
96        char16_t c;
97        do {
98            c=*src++;
99            if(!IS_BIDI_CONTROL_CHAR(c)) {
100                if(--remaining<0) {
101                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
102
103                    /* preflight the length */
104                    while(--srcLength>0) {
105                        c=*src++;
106                        if(!IS_BIDI_CONTROL_CHAR(c)) {
107                            --remaining;
108                        }
109                    }
110                    return destSize-remaining;
111                }
112                *dest++=c;
113            }
114        } while(--srcLength>0);
115        return destSize-remaining;
116    }
117    default: {
118        /* remove BiDi control characters and do mirroring */
119        int32_t remaining=destSize;
120        int32_t i, j=0;
121        UChar32 c;
122        do {
123            i=0;
124            U16_NEXT(src, i, srcLength, c);
125            src+=i;
126            srcLength-=i;
127            if(!IS_BIDI_CONTROL_CHAR(c)) {
128                remaining-=i;
129                if(remaining<0) {
130                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
131
132                    /* preflight the length */
133                    while(srcLength>0) {
134                        c=*src++;
135                        if(!IS_BIDI_CONTROL_CHAR(c)) {
136                            --remaining;
137                        }
138                        --srcLength;
139                    }
140                    return destSize-remaining;
141                }
142                c=u_charMirror(c);
143                U16_APPEND_UNSAFE(dest, j, c);
144            }
145        } while(srcLength>0);
146        return j;
147    }
148    } /* end of switch */
149}
150
151static int32_t
152doWriteReverse(const char16_t *src, int32_t srcLength,
153               char16_t *dest, int32_t destSize,
154               uint16_t options,
155               UErrorCode *pErrorCode) {
156    /*
157     * RTL run -
158     *
159     * RTL runs need to be copied to the destination in reverse order
160     * of code points, not code units, to keep Unicode characters intact.
161     *
162     * The general strategy for this is to read the source text
163     * in backward order, collect all code units for a code point
164     * (and optionally following combining characters, see below),
165     * and copy all these code units in ascending order
166     * to the destination for this run.
167     *
168     * Several options request whether combining characters
169     * should be kept after their base characters,
170     * whether BiDi control characters should be removed, and
171     * whether characters should be replaced by their mirror-image
172     * equivalent Unicode characters.
173     */
174    int32_t i, j;
175    UChar32 c;
176
177    /* optimize for several combinations of options */
178    switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) {
179    case 0:
180        /*
181         * With none of the "complicated" options set, the destination
182         * run will have the same length as the source run,
183         * and there is no mirroring and no keeping combining characters
184         * with their base characters.
185         */
186        if(destSize<srcLength) {
187            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
188            return srcLength;
189        }
190        destSize=srcLength;
191
192        /* preserve character integrity */
193        do {
194            /* i is always after the last code unit known to need to be kept in this segment */
195            i=srcLength;
196
197            /* collect code units for one base character */
198            U16_BACK_1(src, 0, srcLength);
199
200            /* copy this base character */
201            j=srcLength;
202            do {
203                *dest++=src[j++];
204            } while(j<i);
205        } while(srcLength>0);
206        break;
207    case UBIDI_KEEP_BASE_COMBINING:
208        /*
209         * Here, too, the destination
210         * run will have the same length as the source run,
211         * and there is no mirroring.
212         * We do need to keep combining characters with their base characters.
213         */
214        if(destSize<srcLength) {
215            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
216            return srcLength;
217        }
218        destSize=srcLength;
219
220        /* preserve character integrity */
221        do {
222            /* i is always after the last code unit known to need to be kept in this segment */
223            i=srcLength;
224
225            /* collect code units and modifier letters for one base character */
226            do {
227                U16_PREV(src, 0, srcLength, c);
228            } while(srcLength>0 && IS_COMBINING(u_charType(c)));
229
230            /* copy this "user character" */
231            j=srcLength;
232            do {
233                *dest++=src[j++];
234            } while(j<i);
235        } while(srcLength>0);
236        break;
237    default:
238        /*
239         * With several "complicated" options set, this is the most
240         * general and the slowest copying of an RTL run.
241         * We will do mirroring, remove BiDi controls, and
242         * keep combining characters with their base characters
243         * as requested.
244         */
245        if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) {
246            i=srcLength;
247        } else {
248            /* we need to find out the destination length of the run,
249               which will not include the BiDi control characters */
250            int32_t length=srcLength;
251            char16_t ch;
252
253            i=0;
254            do {
255                ch=*src++;
256                if(!IS_BIDI_CONTROL_CHAR(ch)) {
257                    ++i;
258                }
259            } while(--length>0);
260            src-=srcLength;
261        }
262
263        if(destSize<i) {
264            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
265            return i;
266        }
267        destSize=i;
268
269        /* preserve character integrity */
270        do {
271            /* i is always after the last code unit known to need to be kept in this segment */
272            i=srcLength;
273
274            /* collect code units for one base character */
275            U16_PREV(src, 0, srcLength, c);
276            if(options&UBIDI_KEEP_BASE_COMBINING) {
277                /* collect modifier letters for this base character */
278                while(srcLength>0 && IS_COMBINING(u_charType(c))) {
279                    U16_PREV(src, 0, srcLength, c);
280                }
281            }
282
283            if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) {
284                /* do not copy this BiDi control character */
285                continue;
286            }
287
288            /* copy this "user character" */
289            j=srcLength;
290            if(options&UBIDI_DO_MIRRORING) {
291                /* mirror only the base character */
292                int32_t k=0;
293                c=u_charMirror(c);
294                U16_APPEND_UNSAFE(dest, k, c);
295                dest+=k;
296                j+=k;
297            }
298            while(j<i) {
299                *dest++=src[j++];
300            }
301        } while(srcLength>0);
302        break;
303    } /* end of switch */
304
305    return destSize;
306}
307
308U_CAPI int32_t U_EXPORT2
309ubidi_writeReverse(const char16_t *src, int32_t srcLength,
310                   char16_t *dest, int32_t destSize,
311                   uint16_t options,
312                   UErrorCode *pErrorCode) {
313    int32_t destLength;
314
315    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
316        return 0;
317    }
318
319    /* more error checking */
320    if( src==nullptr || srcLength<-1 ||
321        destSize<0 || (destSize>0 && dest==nullptr))
322    {
323        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
324        return 0;
325    }
326
327    /* do input and output overlap? */
328    if( dest!=nullptr &&
329        ((src>=dest && src<dest+destSize) ||
330         (dest>=src && dest<src+srcLength)))
331    {
332        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
333        return 0;
334    }
335
336    if(srcLength==-1) {
337        srcLength=u_strlen(src);
338    }
339    if(srcLength>0) {
340        destLength=doWriteReverse(src, srcLength, dest, destSize, options, pErrorCode);
341    } else {
342        /* nothing to do */
343        destLength=0;
344    }
345
346    return u_terminateUChars(dest, destSize, destLength, pErrorCode);
347}
348
349// Ticket 20907 - The optimizer in MSVC/Visual Studio versions below 16.4 has trouble with this
350// function on Windows ARM64. As a work-around, we disable optimizations for this function.
351// This work-around could/should be removed once the following versions of Visual Studio are no
352// longer supported: All versions of VS2017, and versions of VS2019 below 16.4.
353#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
354#pragma optimize( "", off )
355#endif
356U_CAPI int32_t U_EXPORT2
357ubidi_writeReordered(UBiDi *pBiDi,
358                     char16_t *dest, int32_t destSize,
359                     uint16_t options,
360                     UErrorCode *pErrorCode) {
361    const char16_t *text;
362    char16_t *saveDest;
363    int32_t length, destCapacity;
364    int32_t run, runCount, logicalStart, runLength;
365
366    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
367        return 0;
368    }
369
370    /* more error checking */
371    if( pBiDi==nullptr ||
372        (text=pBiDi->text)==nullptr || (length=pBiDi->length)<0 ||
373        destSize<0 || (destSize>0 && dest==nullptr))
374    {
375        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
376        return 0;
377    }
378
379    /* do input and output overlap? */
380    if( dest!=nullptr &&
381        ((text>=dest && text<dest+destSize) ||
382         (dest>=text && dest<text+pBiDi->originalLength)))
383    {
384        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
385        return 0;
386    }
387
388    if(length==0) {
389        /* nothing to do */
390        return u_terminateUChars(dest, destSize, 0, pErrorCode);
391    }
392
393    runCount=ubidi_countRuns(pBiDi, pErrorCode);
394    if(U_FAILURE(*pErrorCode)) {
395        return 0;
396    }
397
398    /* destSize shrinks, later destination length=destCapacity-destSize */
399    saveDest=dest;
400    destCapacity=destSize;
401
402    /*
403     * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the
404     * reordering mode (checked below) is appropriate.
405     */
406    if(pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
407        options|=UBIDI_INSERT_LRM_FOR_NUMERIC;
408        options&=~UBIDI_REMOVE_BIDI_CONTROLS;
409    }
410    /*
411     * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS
412     * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC.
413     */
414    if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
415        options|=UBIDI_REMOVE_BIDI_CONTROLS;
416        options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
417    }
418    /*
419     * If we do not perform the "inverse BiDi" algorithm, then we
420     * don't need to insert any LRMs, and don't need to test for it.
421     */
422    if((pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_NUMBERS_AS_L) &&
423       (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_LIKE_DIRECT)  &&
424       (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL) &&
425       (pBiDi->reorderingMode != UBIDI_REORDER_RUNS_ONLY)) {
426        options&=~UBIDI_INSERT_LRM_FOR_NUMERIC;
427    }
428    /*
429     * Iterate through all visual runs and copy the run text segments to
430     * the destination, according to the options.
431     *
432     * The tests for where to insert LRMs ignore the fact that there may be
433     * BN codes or non-BMP code points at the beginning and end of a run;
434     * they may insert LRMs unnecessarily but the tests are faster this way
435     * (this would have to be improved for UTF-8).
436     *
437     * Note that the only errors that are set by doWriteXY() are buffer overflow
438     * errors. Ignore them until the end, and continue for preflighting.
439     */
440    if(!(options&UBIDI_OUTPUT_REVERSE)) {
441        /* forward output */
442        if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
443            /* do not insert BiDi controls */
444            for(run=0; run<runCount; ++run) {
445                if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
446                    runLength=doWriteForward(text+logicalStart, runLength,
447                                             dest, destSize,
448                                             (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
449                } else {
450                    runLength=doWriteReverse(text+logicalStart, runLength,
451                                             dest, destSize,
452                                             options, pErrorCode);
453                }
454                if(dest!=nullptr) {
455                  dest+=runLength;
456                }
457                destSize-=runLength;
458            }
459        } else {
460            /* insert BiDi controls for "inverse BiDi" */
461            const DirProp *dirProps=pBiDi->dirProps;
462            const char16_t *src;
463            char16_t uc;
464            UBiDiDirection dir;
465            int32_t markFlag;
466
467            for(run=0; run<runCount; ++run) {
468                dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
469                src=text+logicalStart;
470                /* check if something relevant in insertPoints */
471                markFlag=pBiDi->runs[run].insertRemove;
472                if(markFlag<0) {        /* BiDi controls count */
473                    markFlag=0;
474                }
475
476                if(UBIDI_LTR==dir) {
477                    if((pBiDi->isInverse) &&
478                       (/*run>0 &&*/ dirProps[logicalStart]!=L)) {
479                        markFlag |= LRM_BEFORE;
480                    }
481                    if (markFlag & LRM_BEFORE) {
482                        uc=LRM_CHAR;
483                    }
484                    else if (markFlag & RLM_BEFORE) {
485                        uc=RLM_CHAR;
486                    }
487                    else  uc=0;
488                    if(uc) {
489                        if(destSize>0) {
490                            *dest++=uc;
491                        }
492                        --destSize;
493                    }
494
495                    runLength=doWriteForward(src, runLength,
496                                             dest, destSize,
497                                             (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
498                    if(dest!=nullptr) {
499                      dest+=runLength;
500                    }
501                    destSize-=runLength;
502
503                    if((pBiDi->isInverse) &&
504                       (/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L)) {
505                        markFlag |= LRM_AFTER;
506                    }
507                    if (markFlag & LRM_AFTER) {
508                        uc=LRM_CHAR;
509                    }
510                    else if (markFlag & RLM_AFTER) {
511                        uc=RLM_CHAR;
512                    }
513                    else  uc=0;
514                    if(uc) {
515                        if(destSize>0) {
516                            *dest++=uc;
517                        }
518                        --destSize;
519                    }
520                } else {                /* RTL run */
521                    if((pBiDi->isInverse) &&
522                       (/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1])))) {
523                        markFlag |= RLM_BEFORE;
524                    }
525                    if (markFlag & LRM_BEFORE) {
526                        uc=LRM_CHAR;
527                    }
528                    else if (markFlag & RLM_BEFORE) {
529                        uc=RLM_CHAR;
530                    }
531                    else  uc=0;
532                    if(uc) {
533                        if(destSize>0) {
534                            *dest++=uc;
535                        }
536                        --destSize;
537                    }
538
539                    runLength=doWriteReverse(src, runLength,
540                                             dest, destSize,
541                                             options, pErrorCode);
542                    if(dest!=nullptr) {
543                      dest+=runLength;
544                    }
545                    destSize-=runLength;
546
547                    if((pBiDi->isInverse) &&
548                       (/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart])))) {
549                        markFlag |= RLM_AFTER;
550                    }
551                    if (markFlag & LRM_AFTER) {
552                        uc=LRM_CHAR;
553                    }
554                    else if (markFlag & RLM_AFTER) {
555                        uc=RLM_CHAR;
556                    }
557                    else  uc=0;
558                    if(uc) {
559                        if(destSize>0) {
560                            *dest++=uc;
561                        }
562                        --destSize;
563                    }
564                }
565            }
566        }
567    } else {
568        /* reverse output */
569        if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) {
570            /* do not insert BiDi controls */
571            for(run=runCount; --run>=0;) {
572                if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) {
573                    runLength=doWriteReverse(text+logicalStart, runLength,
574                                             dest, destSize,
575                                             (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
576                } else {
577                    runLength=doWriteForward(text+logicalStart, runLength,
578                                             dest, destSize,
579                                             options, pErrorCode);
580                }
581                if(dest!=nullptr) {
582                  dest+=runLength;
583                }
584                destSize-=runLength;
585            }
586        } else {
587            /* insert BiDi controls for "inverse BiDi" */
588            const DirProp *dirProps=pBiDi->dirProps;
589            const char16_t *src;
590            UBiDiDirection dir;
591
592            for(run=runCount; --run>=0;) {
593                /* reverse output */
594                dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength);
595                src=text+logicalStart;
596
597                if(UBIDI_LTR==dir) {
598                    if(/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L) {
599                        if(destSize>0) {
600                            *dest++=LRM_CHAR;
601                        }
602                        --destSize;
603                    }
604
605                    runLength=doWriteReverse(src, runLength,
606                                             dest, destSize,
607                                             (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode);
608                    if(dest!=nullptr) {
609                      dest+=runLength;
610                    }
611                    destSize-=runLength;
612
613                    if(/*run>0 &&*/ dirProps[logicalStart]!=L) {
614                        if(destSize>0) {
615                            *dest++=LRM_CHAR;
616                        }
617                        --destSize;
618                    }
619                } else {
620                    if(/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart]))) {
621                        if(destSize>0) {
622                            *dest++=RLM_CHAR;
623                        }
624                        --destSize;
625                    }
626
627                    runLength=doWriteForward(src, runLength,
628                                             dest, destSize,
629                                             options, pErrorCode);
630                    if(dest!=nullptr) {
631                      dest+=runLength;
632                    }
633                    destSize-=runLength;
634
635                    if(/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1]))) {
636                        if(destSize>0) {
637                            *dest++=RLM_CHAR;
638                        }
639                        --destSize;
640                    }
641                }
642            }
643        }
644    }
645
646    return u_terminateUChars(saveDest, destCapacity, destCapacity-destSize, pErrorCode);
647}
648#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924))
649#pragma optimize( "", on )
650#endif
651