1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*******************************************************************************
9 *
10 * File nucnvtst.c
11 *
12 * Modification History:
13 *        Name                     Description
14 *    Steven R. Loomis     7/8/1999      Adding input buffer test
15 ********************************************************************************
16 */
17 #include <stdbool.h>
18 #include <stdio.h>
19 #include "cstring.h"
20 #include "unicode/uloc.h"
21 #include "unicode/ucnv.h"
22 #include "unicode/ucnv_err.h"
23 #include "unicode/ucnv_cb.h"
24 #include "cintltst.h"
25 #include "unicode/utypes.h"
26 #include "unicode/ustring.h"
27 #include "unicode/ucol.h"
28 #include "unicode/utf16.h"
29 #include "cmemory.h"
30 #include "nucnvtst.h"
31 
32 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
33 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
34 #if !UCONFIG_NO_COLLATION
35 static void TestJitterbug981(void);
36 #endif
37 #if !UCONFIG_NO_LEGACY_CONVERSION
38 static void TestJitterbug1293(void);
39 #endif
40 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
41 static void TestConverterTypesAndStarters(void);
42 static void TestAmbiguous(void);
43 static void TestSignatureDetection(void);
44 static void TestUTF7(void);
45 static void TestIMAP(void);
46 static void TestUTF8(void);
47 static void TestCESU8(void);
48 static void TestUTF16(void);
49 static void TestUTF16BE(void);
50 static void TestUTF16LE(void);
51 static void TestUTF32(void);
52 static void TestUTF32BE(void);
53 static void TestUTF32LE(void);
54 static void TestLATIN1(void);
55 
56 #if !UCONFIG_NO_LEGACY_CONVERSION
57 static void TestSBCS(void);
58 static void TestDBCS(void);
59 static void TestMBCS(void);
60 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
61 static void TestICCRunout(void);
62 #endif
63 
64 #ifdef U_ENABLE_GENERIC_ISO_2022
65 static void TestISO_2022(void);
66 #endif
67 
68 static void TestISO_2022_JP(void);
69 static void TestISO_2022_JP_1(void);
70 static void TestISO_2022_JP_2(void);
71 static void TestISO_2022_KR(void);
72 static void TestISO_2022_KR_1(void);
73 static void TestISO_2022_CN(void);
74 #if 0
75    /*
76     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
77     */
78 static void TestISO_2022_CN_EXT(void);
79 #endif
80 static void TestJIS(void);
81 static void TestHZ(void);
82 #endif
83 
84 static void TestSCSU(void);
85 
86 #if !UCONFIG_NO_LEGACY_CONVERSION
87 static void TestEBCDIC_STATEFUL(void);
88 static void TestGB18030(void);
89 static void TestLMBCS(void);
90 static void TestJitterbug255(void);
91 static void TestEBCDICUS4XML(void);
92 #if 0
93    /*
94     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
95     */
96 static void TestJitterbug915(void);
97 #endif
98 static void TestISCII(void);
99 
100 static void TestCoverageMBCS(void);
101 static void TestJitterbug2346(void);
102 static void TestJitterbug2411(void);
103 static void TestJB5275(void);
104 static void TestJB5275_1(void);
105 static void TestJitterbug6175(void);
106 
107 static void TestIsFixedWidth(void);
108 #endif
109 
110 static void TestInBufSizes(void);
111 
112 static void TestRoundTrippingAllUTF(void);
113 static void TestConv(const uint16_t in[],
114                      int len,
115                      const char* conv,
116                      const char* lang,
117                      char byteArr[],
118                      int byteArrLen);
119 
120 /* open a converter, using test data if it begins with '@' */
121 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
122 
123 
124 #define NEW_MAX_BUFFER 999
125 
126 static int32_t  gInBufferSize = NEW_MAX_BUFFER;
127 static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
128 static char     gNuConvTestName[1024];
129 
130 #define nct_min(x,y)  ((x<y) ? x : y)
131 
my_ucnv_open(const char *cnv, UErrorCode *err)132 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
133 {
134   if(cnv && cnv[0] == '@') {
135     return ucnv_openPackage(loadTestData(err), cnv+1, err);
136   } else {
137     return ucnv_open(cnv, err);
138   }
139 }
140 
printSeq(const unsigned char* a, int len)141 static void printSeq(const unsigned char* a, int len)
142 {
143     int i=0;
144     log_verbose("{");
145     while (i<len)
146         log_verbose("0x%02x ", a[i++]);
147     log_verbose("}\n");
148 }
149 
printUSeq(const UChar* a, int len)150 static void printUSeq(const UChar* a, int len)
151 {
152     int i=0;
153     log_verbose("{U+");
154     while (i<len) log_verbose("0x%04x ", a[i++]);
155     log_verbose("}\n");
156 }
157 
printSeqErr(const unsigned char* a, int len)158 static void printSeqErr(const unsigned char* a, int len)
159 {
160     int i=0;
161     fprintf(stderr, "{");
162     while (i<len)
163         fprintf(stderr, "0x%02x ", a[i++]);
164     fprintf(stderr, "}\n");
165 }
166 
printUSeqErr(const UChar* a, int len)167 static void printUSeqErr(const UChar* a, int len)
168 {
169     int i=0;
170     fprintf(stderr, "{U+");
171     while (i<len)
172         fprintf(stderr, "0x%04x ", a[i++]);
173     fprintf(stderr,"}\n");
174 }
175 
176 static void
TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)177 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
178 {
179      const char* s0;
180      const char* s=(char*)source;
181      const int32_t *r=results;
182      UErrorCode errorCode=U_ZERO_ERROR;
183      UChar32 c;
184 
185      while(s<limit) {
186         s0=s;
187         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
188         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
189             break; /* no more significant input */
190         } else if(U_FAILURE(errorCode)) {
191             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
192             break;
193         } else if(
194             /* test the expected number of input bytes only if >=0 */
195             (*r>=0 && (int32_t)(s-s0)!=*r) ||
196             c!=*(r+1)
197         ) {
198             log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
199                 message, c, (s-s0), *(r+1), *r);
200             break;
201         }
202         r+=2;
203     }
204 }
205 
206 static void
TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)207 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
208 {
209      const char* s=(char*)source;
210      UErrorCode errorCode=U_ZERO_ERROR;
211      uint32_t c;
212      c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
213      if(errorCode != expected){
214         log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
215      }
216      if(c != 0xFFFD && c != 0xffff){
217         log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
218      }
219 
220 }
221 
TestInBufSizes(void)222 static void TestInBufSizes(void)
223 {
224   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
225 #if 1
226   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
227   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
228   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
229   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
230   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
231   TestNewConvertWithBufferSizes(1,1);
232   TestNewConvertWithBufferSizes(2,3);
233   TestNewConvertWithBufferSizes(3,2);
234 #endif
235 }
236 
TestOutBufSizes(void)237 static void TestOutBufSizes(void)
238 {
239 #if 1
240   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
241   TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
242   TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
243   TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
244   TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
245   TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
246 
247 #endif
248 }
249 
250 
addTestNewConvert(TestNode** root)251 void addTestNewConvert(TestNode** root)
252 {
253 #if !UCONFIG_NO_FILE_IO
254    addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
255    addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
256 #endif
257    addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
258    addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
259    addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
260    addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
261    addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
262    addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
263 
264    /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
265    addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
266    addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
267    addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
268    addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
269    addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
270    addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
271    addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
272 
273 #if !UCONFIG_NO_LEGACY_CONVERSION
274    addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
275 #endif
276 
277    addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
278 
279 #if !UCONFIG_NO_LEGACY_CONVERSION
280    addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
281 #if !UCONFIG_NO_FILE_IO
282    addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
283    addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
284 #endif
285    addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
286 
287 #ifdef U_ENABLE_GENERIC_ISO_2022
288    addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
289 #endif
290 
291    addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
292    addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
293    addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
294    addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
295    addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
296    addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
297    addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
298    /*
299     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
300    addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
301    addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
302     */
303    addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
304 #endif
305 
306    addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
307 
308 #if !UCONFIG_NO_LEGACY_CONVERSION
309    addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
310    addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
311    addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
312    addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
313    addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
314    addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
315    addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
316 #if !UCONFIG_NO_COLLATION
317    addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
318 #endif
319 
320    addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
321 #endif
322 
323 
324 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
325    addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
326 #endif
327 
328    addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
329 
330 #if !UCONFIG_NO_LEGACY_CONVERSION
331    addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
332    addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
333    addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
334 
335    addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
336 #endif
337 }
338 
339 
340 /* Note that this test already makes use of statics, so it's not really
341    multithread safe.
342    This convenience function lets us make the error messages actually useful.
343 */
344 
setNuConvTestName(const char *codepage, const char *direction)345 static void setNuConvTestName(const char *codepage, const char *direction)
346 {
347     sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
348         codepage,
349         direction,
350         (int)gInBufferSize,
351         (int)gOutBufferSize);
352 }
353 
354 typedef enum
355 {
356   TC_OK       = 0,  /* test was OK */
357   TC_MISMATCH = 1,  /* Match failed - err was printed */
358   TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
359 } ETestConvertResult;
360 
361 /* Note: This function uses global variables and it will not do offset
362 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, const char *codepage, const int32_t *expectOffsets , UBool useFallback)363 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
364                 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
365 {
366     UErrorCode status = U_ZERO_ERROR;
367     UConverter *conv = 0;
368     char    junkout[NEW_MAX_BUFFER]; /* FIX */
369     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
370     char *p;
371     const UChar *src;
372     char *end;
373     char *targ;
374     int32_t *offs;
375     int i;
376     int32_t   realBufferSize;
377     char *realBufferEnd;
378     const UChar *realSourceEnd;
379     const UChar *sourceLimit;
380     UBool checkOffsets = true;
381     UBool doFlush;
382 
383     for(i=0;i<NEW_MAX_BUFFER;i++)
384         junkout[i] = (char)0xF0;
385     for(i=0;i<NEW_MAX_BUFFER;i++)
386         junokout[i] = 0xFF;
387 
388     setNuConvTestName(codepage, "FROM");
389 
390     log_verbose("\n=========  %s\n", gNuConvTestName);
391 
392     conv = my_ucnv_open(codepage, &status);
393 
394     if(U_FAILURE(status))
395     {
396         log_data_err("Couldn't open converter %s\n",codepage);
397         return TC_FAIL;
398     }
399     if(useFallback){
400         ucnv_setFallback(conv,useFallback);
401     }
402 
403     log_verbose("Converter opened..\n");
404 
405     src = source;
406     targ = junkout;
407     offs = junokout;
408 
409     realBufferSize = UPRV_LENGTHOF(junkout);
410     realBufferEnd = junkout + realBufferSize;
411     realSourceEnd = source + sourceLen;
412 
413     if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
414         checkOffsets = false;
415 
416     do
417     {
418       end = nct_min(targ + gOutBufferSize, realBufferEnd);
419       sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
420 
421       doFlush = (UBool)(sourceLimit == realSourceEnd);
422 
423       if(targ == realBufferEnd) {
424         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
425         return TC_FAIL;
426       }
427       log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
428 
429 
430       status = U_ZERO_ERROR;
431 
432       ucnv_fromUnicode (conv,
433                         &targ,
434                         end,
435                         &src,
436                         sourceLimit,
437                         checkOffsets ? offs : NULL,
438                         doFlush, /* flush if we're at the end of the input data */
439                         &status);
440     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
441 
442     if(U_FAILURE(status)) {
443       log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
444       return TC_FAIL;
445     }
446 
447     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
448                 sourceLen, targ-junkout);
449 
450     if(getTestOption(VERBOSITY_OPTION))
451     {
452       char junk[9999];
453       char offset_str[9999];
454       char *ptr;
455 
456       junk[0] = 0;
457       offset_str[0] = 0;
458       for(ptr = junkout;ptr<targ;ptr++) {
459         sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
460         sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
461       }
462 
463       log_verbose(junk);
464       printSeq((const uint8_t *)expect, expectLen);
465       if ( checkOffsets ) {
466         log_verbose("\nOffsets:");
467         log_verbose(offset_str);
468       }
469       log_verbose("\n");
470     }
471     ucnv_close(conv);
472 
473     if(expectLen != targ-junkout) {
474       log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
475       log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
476       fprintf(stderr, "Got:\n");
477       printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
478       fprintf(stderr, "Expected:\n");
479       printSeqErr((const unsigned char*)expect, expectLen);
480       return TC_MISMATCH;
481     }
482 
483     if (checkOffsets && (expectOffsets != 0) ) {
484       log_verbose("comparing %d offsets..\n", targ-junkout);
485       if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
486         log_err("did not get the expected offsets. %s\n", gNuConvTestName);
487         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
488         log_err("\n");
489         log_err("Got  :     ");
490         for(p=junkout;p<targ;p++) {
491           log_err("%d,", junokout[p-junkout]);
492         }
493         log_err("\n");
494         log_err("Expected:  ");
495         for(i=0; i<(targ-junkout); i++) {
496           log_err("%d,", expectOffsets[i]);
497         }
498         log_err("\n");
499       }
500     }
501 
502     log_verbose("comparing..\n");
503     if(!memcmp(junkout, expect, expectLen)) {
504       log_verbose("Matches!\n");
505       return TC_OK;
506     } else {
507       log_err("String does not match u->%s\n", gNuConvTestName);
508       printUSeqErr(source, sourceLen);
509       fprintf(stderr, "Got:\n");
510       printSeqErr((const unsigned char *)junkout, expectLen);
511       fprintf(stderr, "Expected:\n");
512       printSeqErr((const unsigned char *)expect, expectLen);
513 
514       return TC_MISMATCH;
515     }
516 }
517 
518 /* Note: This function uses global variables and it will not do offset
519 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, const char *codepage, const int32_t *expectOffsets, UBool useFallback)520 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
521                                           const char *codepage, const int32_t *expectOffsets, UBool useFallback)
522 {
523     UErrorCode status = U_ZERO_ERROR;
524     UConverter *conv = 0;
525     UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
526     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
527     const char *src;
528     const char *realSourceEnd;
529     const char *srcLimit;
530     UChar *p;
531     UChar *targ;
532     UChar *end;
533     int32_t *offs;
534     int i;
535     UBool   checkOffsets = true;
536 
537     int32_t   realBufferSize;
538     UChar *realBufferEnd;
539 
540 
541     for(i=0;i<NEW_MAX_BUFFER;i++)
542         junkout[i] = 0xFFFE;
543 
544     for(i=0;i<NEW_MAX_BUFFER;i++)
545         junokout[i] = -1;
546 
547     setNuConvTestName(codepage, "TO");
548 
549     log_verbose("\n=========  %s\n", gNuConvTestName);
550 
551     conv = my_ucnv_open(codepage, &status);
552 
553     if(U_FAILURE(status))
554     {
555         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
556         return TC_FAIL;
557     }
558     if(useFallback){
559         ucnv_setFallback(conv,useFallback);
560     }
561     log_verbose("Converter opened..\n");
562 
563     src = (const char *)source;
564     targ = junkout;
565     offs = junokout;
566 
567     realBufferSize = UPRV_LENGTHOF(junkout);
568     realBufferEnd = junkout + realBufferSize;
569     realSourceEnd = src + sourcelen;
570 
571     if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
572         checkOffsets = false;
573 
574     do
575     {
576         end = nct_min( targ + gOutBufferSize, realBufferEnd);
577         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
578 
579         if(targ == realBufferEnd)
580         {
581             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
582             return TC_FAIL;
583         }
584         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
585 
586         /* oldTarg = targ; */
587 
588         status = U_ZERO_ERROR;
589 
590         ucnv_toUnicode (conv,
591                 &targ,
592                 end,
593                 &src,
594                 srcLimit,
595                 checkOffsets ? offs : NULL,
596                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
597                 &status);
598 
599         /*        offs += (targ-oldTarg); */
600 
601       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
602 
603     if(U_FAILURE(status))
604     {
605         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
606         return TC_FAIL;
607     }
608 
609     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
610         sourcelen, targ-junkout);
611     if(getTestOption(VERBOSITY_OPTION))
612     {
613         char junk[9999];
614         char offset_str[9999];
615         UChar *ptr;
616 
617         junk[0] = 0;
618         offset_str[0] = 0;
619 
620         for(ptr = junkout;ptr<targ;ptr++)
621         {
622             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
623             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
624         }
625 
626         log_verbose(junk);
627         printUSeq(expect, expectlen);
628         if ( checkOffsets )
629           {
630             log_verbose("\nOffsets:");
631             log_verbose(offset_str);
632           }
633         log_verbose("\n");
634     }
635     ucnv_close(conv);
636 
637     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
638 
639     if (checkOffsets && (expectOffsets != 0))
640     {
641         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
642             log_err("did not get the expected offsets. %s\n",gNuConvTestName);
643             log_err("Got:      ");
644             for(p=junkout;p<targ;p++) {
645                 log_err("%d,", junokout[p-junkout]);
646             }
647             log_err("\n");
648             log_err("Expected: ");
649             for(i=0; i<(targ-junkout); i++) {
650                 log_err("%d,", expectOffsets[i]);
651             }
652             log_err("\n");
653             log_err("output:   ");
654             for(i=0; i<(targ-junkout); i++) {
655                 log_err("%X,", junkout[i]);
656             }
657             log_err("\n");
658             log_err("input:    ");
659             for(i=0; i<(src-(const char *)source); i++) {
660                 log_err("%X,", (unsigned char)source[i]);
661             }
662             log_err("\n");
663         }
664     }
665 
666     if(!memcmp(junkout, expect, expectlen*2))
667     {
668         log_verbose("Matches!\n");
669         return TC_OK;
670     }
671     else
672     {
673         log_err("String does not match. %s\n", gNuConvTestName);
674         log_verbose("String does not match. %s\n", gNuConvTestName);
675         printf("\nGot:");
676         printUSeqErr(junkout, expectlen);
677         printf("\nExpected:");
678         printUSeqErr(expect, expectlen);
679         return TC_MISMATCH;
680     }
681 }
682 
683 
TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )684 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
685 {
686 /** test chars #1 */
687     /*  1 2 3  1Han 2Han 3Han .  */
688     static const UChar   sampleText[] =
689      { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
690     static const UChar sampleTextRoundTripUnmappable[] =
691     { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
692 
693 
694     static const uint8_t expectedUTF8[] =
695      { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
696     static const int32_t toUTF8Offs[] =
697      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
698     static const int32_t fmUTF8Offs[] =
699      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
700 
701 #ifdef U_ENABLE_GENERIC_ISO_2022
702     /* Same as UTF8, but with ^[%B preceding */
703     static const const uint8_t expectedISO2022[] =
704      { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
705     static const int32_t toISO2022Offs[]     =
706      { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
707        0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
708     static const int32_t fmISO2022Offs[] =
709      { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
710 #endif
711 
712     /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
713     static const uint8_t expectedIBM930[] =
714      { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
715     static const int32_t toIBM930Offs[] =
716      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
717     static const int32_t fmIBM930Offs[] =
718      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
719 
720     /* 1 2 3 0 h1 h2 h3 . MBCS*/
721     static const uint8_t expectedIBM943[] =
722      {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
723     static const int32_t toIBM943Offs    [] =
724      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
725     static const int32_t fmIBM943Offs[] =
726      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
727 
728     /* 1 2 3 0 h1 h2 h3 . DBCS*/
729     static const uint8_t expectedIBM9027[] =
730      {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
731     static const int32_t toIBM9027Offs    [] =
732      {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
733 
734      /* 1 2 3 0 <?> <?> <?> . SBCS*/
735     static const uint8_t expectedIBM920[] =
736      {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
737     static const int32_t toIBM920Offs    [] =
738      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
739 
740     /* 1 2 3 0 <?> <?> <?> . SBCS*/
741     static const uint8_t expectedISO88593[] =
742      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
743     static const int32_t toISO88593Offs[]     =
744      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
745 
746     /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
747     static const uint8_t expectedLATIN1[] =
748      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
749     static const int32_t toLATIN1Offs[]     =
750      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
751 
752 
753     /*  etc */
754     static const uint8_t expectedUTF16BE[] =
755      { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
756     static const int32_t toUTF16BEOffs[]=
757      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
758     static const int32_t fmUTF16BEOffs[] =
759      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e, 0x0010, 0x0010 };
760 
761     static const uint8_t expectedUTF16LE[] =
762      { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
763     static const int32_t toUTF16LEOffs[]=
764      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
765     static const int32_t fmUTF16LEOffs[] =
766      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
767 
768     static const uint8_t expectedUTF32BE[] =
769      { 0x00, 0x00, 0x00, 0x31,
770        0x00, 0x00, 0x00, 0x32,
771        0x00, 0x00, 0x00, 0x33,
772        0x00, 0x00, 0x00, 0x00,
773        0x00, 0x00, 0x4e, 0x00,
774        0x00, 0x00, 0x4e, 0x8c,
775        0x00, 0x00, 0x4e, 0x09,
776        0x00, 0x00, 0x00, 0x2e,
777        0x00, 0x02, 0x00, 0x21 };
778     static const int32_t toUTF32BEOffs[]=
779      { 0x00, 0x00, 0x00, 0x00,
780        0x01, 0x01, 0x01, 0x01,
781        0x02, 0x02, 0x02, 0x02,
782        0x03, 0x03, 0x03, 0x03,
783        0x04, 0x04, 0x04, 0x04,
784        0x05, 0x05, 0x05, 0x05,
785        0x06, 0x06, 0x06, 0x06,
786        0x07, 0x07, 0x07, 0x07,
787        0x08, 0x08, 0x08, 0x08,
788        0x08, 0x08, 0x08, 0x08 };
789     static const int32_t fmUTF32BEOffs[] =
790      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c, 0x0020, 0x0020 };
791 
792     static const uint8_t expectedUTF32LE[] =
793      { 0x31, 0x00, 0x00, 0x00,
794        0x32, 0x00, 0x00, 0x00,
795        0x33, 0x00, 0x00, 0x00,
796        0x00, 0x00, 0x00, 0x00,
797        0x00, 0x4e, 0x00, 0x00,
798        0x8c, 0x4e, 0x00, 0x00,
799        0x09, 0x4e, 0x00, 0x00,
800        0x2e, 0x00, 0x00, 0x00,
801        0x21, 0x00, 0x02, 0x00 };
802     static const int32_t toUTF32LEOffs[]=
803      { 0x00, 0x00, 0x00, 0x00,
804        0x01, 0x01, 0x01, 0x01,
805        0x02, 0x02, 0x02, 0x02,
806        0x03, 0x03, 0x03, 0x03,
807        0x04, 0x04, 0x04, 0x04,
808        0x05, 0x05, 0x05, 0x05,
809        0x06, 0x06, 0x06, 0x06,
810        0x07, 0x07, 0x07, 0x07,
811        0x08, 0x08, 0x08, 0x08,
812        0x08, 0x08, 0x08, 0x08 };
813     static const int32_t fmUTF32LEOffs[] =
814      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
815 
816 
817 
818 
819 /** Test chars #2 **/
820 
821     /* Sahha [health],  slashed h's */
822     static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
823     static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
824 
825     /* LMBCS */
826     static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
827     static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
828     static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
829     static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
830     /*********************************** START OF CODE finally *************/
831 
832     gInBufferSize = insize;
833     gOutBufferSize = outsize;
834 
835     log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
836 
837 
838     /*UTF-8*/
839     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
840         expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,false );
841 
842     log_verbose("Test surrogate behaviour for UTF8\n");
843     {
844         static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
845         static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
846                            0xf0, 0x90, 0x90, 0x81,
847                            0xef, 0xbf, 0xbd
848         };
849         static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
850         testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
851                          expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,false );
852 
853 
854     }
855 
856 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
857     /*ISO-2022*/
858     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
859         expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,false );
860 #endif
861 
862     /*UTF16 LE*/
863     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
864         expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,false );
865     /*UTF16 BE*/
866     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
867         expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,false );
868     /*UTF32 LE*/
869     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
870         expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,false );
871     /*UTF32 BE*/
872     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
873         expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,false );
874 
875     /*LATIN_1*/
876     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
877         expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,false );
878 
879 #if !UCONFIG_NO_LEGACY_CONVERSION
880     /*EBCDIC_STATEFUL*/
881     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
882         expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,false );
883 
884     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
885         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,false );
886 
887     /*MBCS*/
888 
889     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
890         expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,false );
891     /*DBCS*/
892     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
893         expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,false );
894     /*SBCS*/
895     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
896         expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,false );
897     /*SBCS*/
898     testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
899         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,false );
900 #endif
901 
902 
903 /****/
904 
905     /*UTF-8*/
906     testConvertToU(expectedUTF8, sizeof(expectedUTF8),
907         sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,false);
908 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
909     /*ISO-2022*/
910     testConvertToU(expectedISO2022, sizeof(expectedISO2022),
911         sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,false);
912 #endif
913 
914     /*UTF16 LE*/
915     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
916         sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,false);
917     /*UTF16 BE*/
918     testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
919         sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,false);
920     /*UTF32 LE*/
921     testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
922         sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,false);
923     /*UTF32 BE*/
924     testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
925         sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,false);
926 
927 #if !UCONFIG_NO_LEGACY_CONVERSION
928     /*EBCDIC_STATEFUL*/
929     testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
930             UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,false);
931     /*MBCS*/
932     testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
933             UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,false);
934 #endif
935 
936     /* Try it again to make sure it still works */
937     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
938         sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,false);
939 
940 #if !UCONFIG_NO_LEGACY_CONVERSION
941     testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
942         malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,false);
943 
944     testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
945         expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,false );
946 
947     /*LMBCS*/
948     testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
949         expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,false );
950     testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
951         LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,false);
952 #endif
953 
954     /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
955     {
956         /* encode directly set D and set O */
957         static const uint8_t utf7[] = {
958             /*
959                 Hi Mom -+Jjo--!
960                 A+ImIDkQ.
961                 +-
962                 +ZeVnLIqe-
963             */
964             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
965             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
966             0x2b, 0x2d,
967             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
968         };
969         static const UChar unicode[] = {
970             /*
971                 Hi Mom -<WHITE SMILING FACE>-!
972                 A<NOT IDENTICAL TO><ALPHA>.
973                 +
974                 [Japanese word "nihongo"]
975             */
976             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
977             0x41, 0x2262, 0x0391, 0x2e,
978             0x2b,
979             0x65e5, 0x672c, 0x8a9e
980         };
981         static const int32_t toUnicodeOffsets[] = {
982             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
983             15, 17, 19, 23,
984             24,
985             27, 29, 32
986         };
987         static const int32_t fromUnicodeOffsets[] = {
988             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
989             11, 12, 12, 12, 13, 13, 13, 13, 14,
990             15, 15,
991             16, 16, 16, 17, 17, 17, 18, 18, 18, 18
992         };
993 
994         /* same but escaping set O (the exclamation mark) */
995         static const uint8_t utf7Restricted[] = {
996             /*
997                 Hi Mom -+Jjo--+ACE-
998                 A+ImIDkQ.
999                 +-
1000                 +ZeVnLIqe-
1001             */
1002             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1003             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1004             0x2b, 0x2d,
1005             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1006         };
1007         static const int32_t toUnicodeOffsetsR[] = {
1008             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1009             19, 21, 23, 27,
1010             28,
1011             31, 33, 36
1012         };
1013         static const int32_t fromUnicodeOffsetsR[] = {
1014             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1015             11, 12, 12, 12, 13, 13, 13, 13, 14,
1016             15, 15,
1017             16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1018         };
1019 
1020         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,false);
1021 
1022         testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,false);
1023 
1024         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,false);
1025 
1026         testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,false);
1027     }
1028 
1029     /*
1030      * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1031      * modified according to RFC 2060,
1032      * and supplemented with the one example in RFC 2060 itself.
1033      */
1034     {
1035         static const uint8_t imap[] = {
1036             /*  Hi Mom -&Jjo--!
1037                 A&ImIDkQ-.
1038                 &-
1039                 &ZeVnLIqe-
1040                 \
1041                 ~peter
1042                 /mail
1043                 /&ZeVnLIqe-
1044                 /&U,BTFw-
1045             */
1046             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1047             0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1048             0x26, 0x2d,
1049             0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1050             0x5c,
1051             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1052             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1053             0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1054             0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1055         };
1056         static const UChar unicode[] = {
1057             /*  Hi Mom -<WHITE SMILING FACE>-!
1058                 A<NOT IDENTICAL TO><ALPHA>.
1059                 &
1060                 [Japanese word "nihongo"]
1061                 \
1062                 ~peter
1063                 /mail
1064                 /<65e5, 672c, 8a9e>
1065                 /<53f0, 5317>
1066             */
1067             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1068             0x41, 0x2262, 0x0391, 0x2e,
1069             0x26,
1070             0x65e5, 0x672c, 0x8a9e,
1071             0x5c,
1072             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1073             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1074             0x2f, 0x65e5, 0x672c, 0x8a9e,
1075             0x2f, 0x53f0, 0x5317
1076         };
1077         static const int32_t toUnicodeOffsets[] = {
1078             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1079             15, 17, 19, 24,
1080             25,
1081             28, 30, 33,
1082             37,
1083             38, 39, 40, 41, 42, 43,
1084             44, 45, 46, 47, 48,
1085             49, 51, 53, 56,
1086             60, 62, 64
1087         };
1088         static const int32_t fromUnicodeOffsets[] = {
1089             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1090             11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1091             15, 15,
1092             16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1093             19,
1094             20, 21, 22, 23, 24, 25,
1095             26, 27, 28, 29, 30,
1096             31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1097             35, 36, 36, 36, 37, 37, 37, 37, 37
1098         };
1099 
1100         testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,false);
1101 
1102         testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,false);
1103     }
1104 
1105     /* Test UTF-8 bad data handling*/
1106     {
1107         static const uint8_t utf8[]={
1108             0x61,
1109             0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1110             0x00,
1111             0x62,
1112             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1113             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1114             0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1115             0xdf, 0xbf,                     /* 7ff */
1116             0xbf,                           /* truncated tail */
1117             0xf4, 0x90, 0x80, 0x80,         /* 110000 */
1118             0x02
1119         };
1120 
1121         static const uint16_t utf8Expected[]={
1122             0x0061,
1123             0xfffd, 0xfffd, 0xfffd, 0xfffd,
1124             0x0000,
1125             0x0062,
1126             0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1127             0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1128             0xdbff, 0xdfff,
1129             0x07ff,
1130             0xfffd,
1131             0xfffd, 0xfffd, 0xfffd, 0xfffd,
1132             0x0002
1133         };
1134 
1135         static const int32_t utf8Offsets[]={
1136             0,
1137             1, 2, 3, 4,
1138             5,
1139             6,
1140             7, 8, 9, 10, 11,
1141             12, 13, 14, 15, 16,
1142             17, 17,
1143             21,
1144             23,
1145             24, 25, 26, 27,
1146             28
1147         };
1148         testConvertToU(utf8, sizeof(utf8),
1149                        utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,false);
1150 
1151     }
1152 
1153     /* Test UTF-32BE bad data handling*/
1154     {
1155         static const uint8_t utf32[]={
1156             0x00, 0x00, 0x00, 0x61,
1157             0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1158             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1159             0x00, 0x00, 0x00, 0x62,
1160             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1161             0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1162             0x00, 0x00, 0x01, 0x62,
1163             0x00, 0x00, 0x02, 0x62
1164         };
1165         static const uint16_t utf32Expected[]={
1166             0x0061,
1167             0xfffd,         /* 0x110000 out of range */
1168             0xDBFF,         /* 0x10FFFF in range */
1169             0xDFFF,
1170             0x0062,
1171             0xfffd,         /* 0xffffffff out of range */
1172             0xfffd,         /* 0x7fffffff out of range */
1173             0x0162,
1174             0x0262
1175         };
1176         static const int32_t utf32Offsets[]={
1177             0, 4, 8, 8, 12, 16, 20, 24, 28
1178         };
1179         static const uint8_t utf32ExpectedBack[]={
1180             0x00, 0x00, 0x00, 0x61,
1181             0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1182             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1183             0x00, 0x00, 0x00, 0x62,
1184             0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1185             0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1186             0x00, 0x00, 0x01, 0x62,
1187             0x00, 0x00, 0x02, 0x62
1188         };
1189         static const int32_t utf32OffsetsBack[]={
1190             0,0,0,0,
1191             1,1,1,1,
1192             2,2,2,2,
1193             4,4,4,4,
1194             5,5,5,5,
1195             6,6,6,6,
1196             7,7,7,7,
1197             8,8,8,8
1198         };
1199 
1200         testConvertToU(utf32, sizeof(utf32),
1201                        utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,false);
1202         testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1203             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, false);
1204     }
1205 
1206     /* Test UTF-32LE bad data handling*/
1207     {
1208         static const uint8_t utf32[]={
1209             0x61, 0x00, 0x00, 0x00,
1210             0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1211             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1212             0x62, 0x00, 0x00, 0x00,
1213             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1214             0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1215             0x62, 0x01, 0x00, 0x00,
1216             0x62, 0x02, 0x00, 0x00,
1217         };
1218 
1219         static const uint16_t utf32Expected[]={
1220             0x0061,
1221             0xfffd,         /* 0x110000 out of range */
1222             0xDBFF,         /* 0x10FFFF in range */
1223             0xDFFF,
1224             0x0062,
1225             0xfffd,         /* 0xffffffff out of range */
1226             0xfffd,         /* 0x7fffffff out of range */
1227             0x0162,
1228             0x0262
1229         };
1230         static const int32_t utf32Offsets[]={
1231             0, 4, 8, 8, 12, 16, 20, 24, 28
1232         };
1233         static const uint8_t utf32ExpectedBack[]={
1234             0x61, 0x00, 0x00, 0x00,
1235             0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1236             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1237             0x62, 0x00, 0x00, 0x00,
1238             0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1239             0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1240             0x62, 0x01, 0x00, 0x00,
1241             0x62, 0x02, 0x00, 0x00
1242         };
1243         static const int32_t utf32OffsetsBack[]={
1244             0,0,0,0,
1245             1,1,1,1,
1246             2,2,2,2,
1247             4,4,4,4,
1248             5,5,5,5,
1249             6,6,6,6,
1250             7,7,7,7,
1251             8,8,8,8
1252         };
1253         testConvertToU(utf32, sizeof(utf32),
1254             utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,false );
1255         testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1256             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, false);
1257     }
1258 }
1259 
TestCoverageMBCSnull1260 static void TestCoverageMBCS(){
1261 #if 0
1262     UErrorCode status = U_ZERO_ERROR;
1263     const char *directory = loadTestData(&status);
1264     char* tdpath = NULL;
1265     char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1266     int len = strlen(directory);
1267     char* index=NULL;
1268 
1269     tdpath = (char*) malloc(sizeof(char) * (len * 2));
1270     uprv_strcpy(saveDirectory,u_getDataDirectory());
1271     log_verbose("Retrieved data directory %s \n",saveDirectory);
1272     uprv_strcpy(tdpath,directory);
1273     index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1274 
1275     if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1276             *(index+1)=0;
1277     }
1278     u_setDataDirectory(tdpath);
1279     log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1280 #endif
1281 
1282     /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1283       which is test file for MBCS conversion with single-byte codepage data.*/
1284     {
1285 
1286         /* MBCS with single byte codepage data test1.ucm*/
1287         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1288         const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1289         int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1290 
1291         /*from Unicode*/
1292         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1293             expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,false );
1294     }
1295 
1296     /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1297       which is test file for MBCS conversion with three-byte codepage data.*/
1298     {
1299 
1300         /* MBCS with three byte codepage data test3.ucm*/
1301         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1302         const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1303         int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1304 
1305         const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1306         const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1307         int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1308 
1309         /*from Unicode*/
1310         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1311             expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,false );
1312 
1313         /*to Unicode*/
1314         testConvertToU(test3input, sizeof(test3input),
1315             expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,false);
1316 
1317     }
1318 
1319     /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1320       which is test file for MBCS conversion with four-byte codepage data.*/
1321     {
1322 
1323         /* MBCS with three byte codepage data test4.ucm*/
1324         static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1325         static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1326         static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1327 
1328         static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1329         static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1330         static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1331 
1332         /*from Unicode*/
1333         testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1334             expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,false );
1335 
1336         /*to Unicode*/
1337         testConvertToU(test4input, sizeof(test4input),
1338             expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,false );
1339 
1340     }
1341 #if 0
1342     free(tdpath);
1343     /* restore the original data directory */
1344     log_verbose("Setting the data directory to %s \n", saveDirectory);
1345     u_setDataDirectory(saveDirectory);
1346     free(saveDirectory);
1347 #endif
1348 
1349 }
1350 
TestConverterType(const char *convName, UConverterType convType)1351 static void TestConverterType(const char *convName, UConverterType convType) {
1352     UConverter* myConverter;
1353     UErrorCode err = U_ZERO_ERROR;
1354 
1355     myConverter = my_ucnv_open(convName, &err);
1356 
1357     if (U_FAILURE(err)) {
1358         log_data_err("Failed to create an %s converter\n", convName);
1359         return;
1360     }
1361     else
1362     {
1363         if (ucnv_getType(myConverter)!=convType) {
1364             log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1365                 convName, convType);
1366         }
1367         else {
1368             log_verbose("ucnv_getType %s ok\n", convName);
1369         }
1370     }
1371     ucnv_close(myConverter);
1372 }
1373 
TestConverterTypesAndStartersnull1374 static void TestConverterTypesAndStarters()
1375 {
1376 #if !UCONFIG_NO_LEGACY_CONVERSION
1377     UConverter* myConverter;
1378     UErrorCode err = U_ZERO_ERROR;
1379     UBool mystarters[256];
1380 
1381 /*    const UBool expectedKSCstarters[256] = {
1382         false, false, false, false, false, false, false, false, false, false,
1383         false, false, false, false, false, false, false, false, false, false,
1384         false, false, false, false, false, false, false, false, false, false,
1385         false, false, false, false, false, false, false, false, false, false,
1386         false, false, false, false, false, false, false, false, false, false,
1387         false, false, false, false, false, false, false, false, false, false,
1388         false, false, false, false, false, false, false, false, false, false,
1389         false, false, false, false, false, false, false, false, false, false,
1390         false, false, false, false, false, false, false, false, false, false,
1391         false, false, false, false, false, false, false, false, false, false,
1392         false, false, false, false, false, false, false, false, false, false,
1393         false, false, false, false, false, false, false, false, false, false,
1394         false, false, false, false, false, false, false, false, false, false,
1395         false, false, false, false, false, false, false, false, false, false,
1396         false, false, false, true, true, true, true, true, true, true,
1397         true, true, true, true, true, true, true, true, true, true,
1398         true, true, true, true, true, true, true, true, true, true,
1399         true, true, true, false, false, true, true, true, true, true,
1400         true, true, true, true, true, true, true, true, true, true,
1401         true, true, true, true, true, true, true, true, true, true,
1402         true, true, true, true, true, true, true, true, true, true,
1403         true, true, true, true, true, true, true, true, true, true,
1404         true, true, true, true, true, true, true, true, true, true,
1405         true, true, true, true, true, true, true, true, true, true,
1406         true, true, true, true, true, true, true, true, true, true,
1407         true, true, true, true, true, true};*/
1408 
1409 
1410     log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1411 
1412     myConverter = ucnv_open("ksc", &err);
1413     if (U_FAILURE(err)) {
1414       log_data_err("Failed to create an ibm-ksc converter\n");
1415       return;
1416     }
1417     else
1418     {
1419         if (ucnv_getType(myConverter)!=UCNV_MBCS)
1420             log_err("ucnv_getType Failed for ibm-949\n");
1421         else
1422             log_verbose("ucnv_getType ibm-949 ok\n");
1423 
1424         if(myConverter!=NULL)
1425             ucnv_getStarters(myConverter, mystarters, &err);
1426 
1427         /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1428           log_err("Failed ucnv_getStarters for ksc\n");
1429           else
1430           log_verbose("ucnv_getStarters ok\n");*/
1431 
1432     }
1433     ucnv_close(myConverter);
1434 
1435     TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1436     TestConverterType("ibm-878", UCNV_SBCS);
1437 #endif
1438 
1439     TestConverterType("iso-8859-1", UCNV_LATIN_1);
1440 
1441     TestConverterType("ibm-1208", UCNV_UTF8);
1442 
1443     TestConverterType("utf-8", UCNV_UTF8);
1444     TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1445     TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1446     TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1447     TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1448 
1449 #if !UCONFIG_NO_LEGACY_CONVERSION
1450 
1451 #if defined(U_ENABLE_GENERIC_ISO_2022)
1452     TestConverterType("iso-2022", UCNV_ISO_2022);
1453 #endif
1454 
1455     TestConverterType("hz", UCNV_HZ);
1456 #endif
1457 
1458     TestConverterType("scsu", UCNV_SCSU);
1459 
1460 #if !UCONFIG_NO_LEGACY_CONVERSION
1461     TestConverterType("x-iscii-de", UCNV_ISCII);
1462 #endif
1463 
1464     TestConverterType("ascii", UCNV_US_ASCII);
1465     TestConverterType("utf-7", UCNV_UTF7);
1466     TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1467     TestConverterType("bocu-1", UCNV_BOCU1);
1468 }
1469 
1470 static void
TestAmbiguousConverter(UConverter *cnv)1471 TestAmbiguousConverter(UConverter *cnv) {
1472     static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1473     UChar outUnicode[20]={ 0, 0, 0, 0 };
1474 
1475     const char *s;
1476     UChar *u;
1477     UErrorCode errorCode;
1478     UBool isAmbiguous;
1479 
1480     /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1481     errorCode=U_ZERO_ERROR;
1482     s=inBytes;
1483     u=outUnicode;
1484     ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, true, &errorCode);
1485     if(U_FAILURE(errorCode)) {
1486         /* we do not care about general failures in this test; the input may just not be mappable */
1487         return;
1488     }
1489 
1490     if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1491         /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1492         /* There are some encodings that are partially ASCII based,
1493         like the ISO-7 and GSM series of codepages, which we ignore. */
1494         return;
1495     }
1496 
1497     isAmbiguous=ucnv_isAmbiguous(cnv);
1498 
1499     /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1500     if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1501         log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1502             ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1503         return;
1504     }
1505 
1506     if(outUnicode[2]!=0x5c) {
1507         /* needs fixup, fix it */
1508         ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1509         if(outUnicode[2]!=0x5c) {
1510             /* the fix failed */
1511             log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1512             return;
1513         }
1514     }
1515 }
1516 
TestAmbiguousnull1517 static void TestAmbiguous()
1518 {
1519     UErrorCode status = U_ZERO_ERROR;
1520     UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1521     static const char target[] = {
1522         /* "\\usr\\local\\share\\data\\icutest.txt" */
1523         0x5c, 0x75, 0x73, 0x72,
1524         0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1525         0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1526         0x5c, 0x64, 0x61, 0x74, 0x61,
1527         0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1528         0
1529     };
1530     UChar asciiResult[200], sjisResult[200];
1531     int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1532     const char *name;
1533 
1534     /* enumerate all converters */
1535     status=U_ZERO_ERROR;
1536     for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1537         cnv=ucnv_open(name, &status);
1538         if(U_SUCCESS(status)) {
1539             TestAmbiguousConverter(cnv);
1540             ucnv_close(cnv);
1541         } else {
1542             log_err("error: unable to open available converter \"%s\"\n", name);
1543             status=U_ZERO_ERROR;
1544         }
1545     }
1546 
1547 #if !UCONFIG_NO_LEGACY_CONVERSION
1548     sjis_cnv = ucnv_open("ibm-943", &status);
1549     if (U_FAILURE(status))
1550     {
1551         log_data_err("Failed to create a SJIS converter\n");
1552         return;
1553     }
1554     ascii_cnv = ucnv_open("LATIN-1", &status);
1555     if (U_FAILURE(status))
1556     {
1557         log_data_err("Failed to create a LATIN-1 converter\n");
1558         ucnv_close(sjis_cnv);
1559         return;
1560     }
1561     /* convert target from SJIS to Unicode */
1562     sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
1563     if (U_FAILURE(status))
1564     {
1565         log_err("Failed to convert the SJIS string.\n");
1566         ucnv_close(sjis_cnv);
1567         ucnv_close(ascii_cnv);
1568         return;
1569     }
1570     /* convert target from Latin-1 to Unicode */
1571     /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
1572     if (U_FAILURE(status))
1573     {
1574         log_err("Failed to convert the Latin-1 string.\n");
1575         ucnv_close(sjis_cnv);
1576         ucnv_close(ascii_cnv);
1577         return;
1578     }
1579     if (!ucnv_isAmbiguous(sjis_cnv))
1580     {
1581         log_err("SJIS converter should contain ambiguous character mappings.\n");
1582         ucnv_close(sjis_cnv);
1583         ucnv_close(ascii_cnv);
1584         return;
1585     }
1586     if (u_strcmp(sjisResult, asciiResult) == 0)
1587     {
1588         log_err("File separators for SJIS don't need to be fixed.\n");
1589     }
1590     ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1591     if (u_strcmp(sjisResult, asciiResult) != 0)
1592     {
1593         log_err("Fixing file separator for SJIS failed.\n");
1594     }
1595     ucnv_close(sjis_cnv);
1596     ucnv_close(ascii_cnv);
1597 #endif
1598 }
1599 
1600 static void
TestSignatureDetectionnull1601 TestSignatureDetection(){
1602     /* with null terminated strings */
1603     {
1604         static const char* data[] = {
1605                 "\xFE\xFF\x00\x00",     /* UTF-16BE */
1606                 "\xFF\xFE\x00\x00",     /* UTF-16LE */
1607                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1608                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1609 
1610                 "\xFE\xFF",             /* UTF-16BE */
1611                 "\xFF\xFE",             /* UTF-16LE */
1612                 "\xEF\xBB\xBF",         /* UTF-8    */
1613                 "\x0E\xFE\xFF",         /* SCSU     */
1614 
1615                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1616                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1617                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1618                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1619 
1620                 "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1621                 "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1622                 "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1623                 "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1624                 "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1625 
1626                 "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1627         };
1628         static const char* expected[] = {
1629                 "UTF-16BE",
1630                 "UTF-16LE",
1631                 "UTF-8",
1632                 "SCSU",
1633 
1634                 "UTF-16BE",
1635                 "UTF-16LE",
1636                 "UTF-8",
1637                 "SCSU",
1638 
1639                 "UTF-16BE",
1640                 "UTF-16LE",
1641                 "UTF-8",
1642                 "SCSU",
1643 
1644                 "UTF-7",
1645                 "UTF-7",
1646                 "UTF-7",
1647                 "UTF-7",
1648                 "UTF-7",
1649                 "UTF-EBCDIC"
1650         };
1651         static const int32_t expectedLength[] ={
1652             2,
1653             2,
1654             3,
1655             3,
1656 
1657             2,
1658             2,
1659             3,
1660             3,
1661 
1662             2,
1663             2,
1664             3,
1665             3,
1666 
1667             5,
1668             4,
1669             4,
1670             4,
1671             4,
1672             4
1673         };
1674         int i=0;
1675         UErrorCode err;
1676         int32_t signatureLength = -1;
1677         const char* source = NULL;
1678         const char* enc = NULL;
1679         for( ; i<UPRV_LENGTHOF(data); i++){
1680             err = U_ZERO_ERROR;
1681             source = data[i];
1682             enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1683             if(U_FAILURE(err)){
1684                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1685                 continue;
1686             }
1687             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1688                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1689                 continue;
1690             }
1691             if(signatureLength != expectedLength[i]){
1692                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1693             }
1694         }
1695     }
1696     {
1697         static const char* data[] = {
1698                 "\xFE\xFF\x00",         /* UTF-16BE */
1699                 "\xFF\xFE\x00",         /* UTF-16LE */
1700                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1701                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1702                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1703                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1704                 "\xFE\xFF",             /* UTF-16BE */
1705                 "\xFF\xFE",             /* UTF-16LE */
1706                 "\xEF\xBB\xBF",         /* UTF-8    */
1707                 "\x0E\xFE\xFF",         /* SCSU     */
1708                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1709                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1710                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1711                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1712                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1713                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1714                 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1715                 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1716                 "\xFB\xEE\x28",         /* BOCU-1   */
1717                 "\xFF\x41\x42"          /* NULL     */
1718         };
1719         static const int len[] = {
1720             3,
1721             3,
1722             4,
1723             4,
1724             4,
1725             4,
1726             2,
1727             2,
1728             3,
1729             3,
1730             4,
1731             4,
1732             4,
1733             4,
1734             4,
1735             4,
1736             5,
1737             5,
1738             3,
1739             3
1740         };
1741 
1742         static const char* expected[] = {
1743                 "UTF-16BE",
1744                 "UTF-16LE",
1745                 "UTF-8",
1746                 "SCSU",
1747                 "UTF-32BE",
1748                 "UTF-32LE",
1749                 "UTF-16BE",
1750                 "UTF-16LE",
1751                 "UTF-8",
1752                 "SCSU",
1753                 "UTF-32BE",
1754                 "UTF-32LE",
1755                 "UTF-16BE",
1756                 "UTF-16LE",
1757                 "UTF-8",
1758                 "SCSU",
1759                 "UTF-32BE",
1760                 "UTF-32LE",
1761                 "BOCU-1",
1762                 NULL
1763         };
1764         static const int32_t expectedLength[] ={
1765             2,
1766             2,
1767             3,
1768             3,
1769             4,
1770             4,
1771             2,
1772             2,
1773             3,
1774             3,
1775             4,
1776             4,
1777             2,
1778             2,
1779             3,
1780             3,
1781             4,
1782             4,
1783             3,
1784             0
1785         };
1786         int i=0;
1787         UErrorCode err;
1788         int32_t signatureLength = -1;
1789         int32_t sourceLength=-1;
1790         const char* source = NULL;
1791         const char* enc = NULL;
1792         for( ; i<UPRV_LENGTHOF(data); i++){
1793             err = U_ZERO_ERROR;
1794             source = data[i];
1795             sourceLength = len[i];
1796             enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1797             if(U_FAILURE(err)){
1798                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1799                 continue;
1800             }
1801             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1802                 if(expected[i] !=NULL){
1803                  log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1804                  continue;
1805                 }
1806             }
1807             if(signatureLength != expectedLength[i]){
1808                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1809             }
1810         }
1811     }
1812 }
1813 
TestUTF7null1814 static void TestUTF7() {
1815     /* test input */
1816     static const uint8_t in[]={
1817         /* H - +Jjo- - ! +- +2AHcAQ */
1818         0x48,
1819         0x2d,
1820         0x2b, 0x4a, 0x6a, 0x6f,
1821         0x2d, 0x2d,
1822         0x21,
1823         0x2b, 0x2d,
1824         0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1825     };
1826 
1827     /* expected test results */
1828     static const int32_t results[]={
1829         /* number of bytes read, code point */
1830         1, 0x48,
1831         1, 0x2d,
1832         4, 0x263a, /* <WHITE SMILING FACE> */
1833         2, 0x2d,
1834         1, 0x21,
1835         2, 0x2b,
1836         7, 0x10401
1837     };
1838 
1839     const char *cnvName;
1840     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1841     UErrorCode errorCode=U_ZERO_ERROR;
1842     UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1843     if(U_FAILURE(errorCode)) {
1844         log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
1845         return;
1846     }
1847     TestNextUChar(cnv, source, limit, results, "UTF-7");
1848     /* Test the condition when source >= sourceLimit */
1849     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1850     cnvName = ucnv_getName(cnv, &errorCode);
1851     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1852         log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1853     }
1854     ucnv_close(cnv);
1855 }
1856 
TestIMAPnull1857 static void TestIMAP() {
1858     /* test input */
1859     static const uint8_t in[]={
1860         /* H - &Jjo- - ! &- &2AHcAQ- \ */
1861         0x48,
1862         0x2d,
1863         0x26, 0x4a, 0x6a, 0x6f,
1864         0x2d, 0x2d,
1865         0x21,
1866         0x26, 0x2d,
1867         0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1868     };
1869 
1870     /* expected test results */
1871     static const int32_t results[]={
1872         /* number of bytes read, code point */
1873         1, 0x48,
1874         1, 0x2d,
1875         4, 0x263a, /* <WHITE SMILING FACE> */
1876         2, 0x2d,
1877         1, 0x21,
1878         2, 0x26,
1879         7, 0x10401
1880     };
1881 
1882     const char *cnvName;
1883     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1884     UErrorCode errorCode=U_ZERO_ERROR;
1885     UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1886     if(U_FAILURE(errorCode)) {
1887         log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
1888         return;
1889     }
1890     TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1891     /* Test the condition when source >= sourceLimit */
1892     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1893     cnvName = ucnv_getName(cnv, &errorCode);
1894     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1895         log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1896     }
1897     ucnv_close(cnv);
1898 }
1899 
TestUTF8null1900 static void TestUTF8() {
1901     /* test input */
1902     static const uint8_t in[]={
1903         0x61,
1904         0xc2, 0x80,
1905         0xe0, 0xa0, 0x80,
1906         0xf0, 0x90, 0x80, 0x80,
1907         0xf4, 0x84, 0x8c, 0xa1,
1908         0xf0, 0x90, 0x90, 0x81
1909     };
1910 
1911     /* expected test results */
1912     static const int32_t results[]={
1913         /* number of bytes read, code point */
1914         1, 0x61,
1915         2, 0x80,
1916         3, 0x800,
1917         4, 0x10000,
1918         4, 0x104321,
1919         4, 0x10401
1920     };
1921 
1922     /* error test input */
1923     static const uint8_t in2[]={
1924         0x61,
1925         0xc0, 0x80,                     /* illegal non-shortest form */
1926         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1927         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1928         0xc0, 0xc0,                     /* illegal trail byte */
1929         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1930         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1931         0xfe,                           /* illegal byte altogether */
1932         0x62
1933     };
1934 
1935     /* expected error test results */
1936     static const int32_t results2[]={
1937         /* number of bytes read, code point */
1938         1, 0x61,
1939         22, 0x62
1940     };
1941 
1942     UConverterToUCallback cb;
1943     const void *p;
1944 
1945     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1946     UErrorCode errorCode=U_ZERO_ERROR;
1947     UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1948     if(U_FAILURE(errorCode)) {
1949         log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1950         return;
1951     }
1952     TestNextUChar(cnv, source, limit, results, "UTF-8");
1953     /* Test the condition when source >= sourceLimit */
1954     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1955 
1956     /* test error behavior with a skip callback */
1957     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1958     source=(const char *)in2;
1959     limit=(const char *)(in2+sizeof(in2));
1960     TestNextUChar(cnv, source, limit, results2, "UTF-8");
1961 
1962     ucnv_close(cnv);
1963 }
1964 
TestCESU8null1965 static void TestCESU8() {
1966     /* test input */
1967     static const uint8_t in[]={
1968         0x61,
1969         0xc2, 0x80,
1970         0xe0, 0xa0, 0x80,
1971         0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1972         0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1973         0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1974         0xef, 0xbf, 0xbc
1975     };
1976 
1977     /* expected test results */
1978     static const int32_t results[]={
1979         /* number of bytes read, code point */
1980         1, 0x61,
1981         2, 0x80,
1982         3, 0x800,
1983         6, 0x10000,
1984         3, 0xdc01,
1985         -1,0xd802,  /* may read 3 or 6 bytes */
1986         -1,0x10ffff,/* may read 0 or 3 bytes */
1987         3, 0xfffc
1988     };
1989 
1990     /* error test input */
1991     static const uint8_t in2[]={
1992         0x61,
1993         0xc0, 0x80,                     /* illegal non-shortest form */
1994         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1995         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1996         0xc0, 0xc0,                     /* illegal trail byte */
1997         0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
1998         0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
1999         0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
2000         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
2001         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
2002         0xfe,                           /* illegal byte altogether */
2003         0x62
2004     };
2005 
2006     /* expected error test results */
2007     static const int32_t results2[]={
2008         /* number of bytes read, code point */
2009         1, 0x61,
2010         34, 0x62
2011     };
2012 
2013     UConverterToUCallback cb;
2014     const void *p;
2015 
2016     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2017     UErrorCode errorCode=U_ZERO_ERROR;
2018     UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2019     if(U_FAILURE(errorCode)) {
2020         log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2021         return;
2022     }
2023     TestNextUChar(cnv, source, limit, results, "CESU-8");
2024     /* Test the condition when source >= sourceLimit */
2025     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2026 
2027     /* test error behavior with a skip callback */
2028     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2029     source=(const char *)in2;
2030     limit=(const char *)(in2+sizeof(in2));
2031     TestNextUChar(cnv, source, limit, results2, "CESU-8");
2032 
2033     ucnv_close(cnv);
2034 }
2035 
TestUTF16null2036 static void TestUTF16() {
2037     /* test input */
2038     static const uint8_t in1[]={
2039         0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2040     };
2041     static const uint8_t in2[]={
2042         0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2043     };
2044     static const uint8_t in3[]={
2045         0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2046     };
2047 
2048     /* expected test results */
2049     static const int32_t results1[]={
2050         /* number of bytes read, code point */
2051         4, 0x4e00,
2052         2, 0xfeff
2053     };
2054     static const int32_t results2[]={
2055         /* number of bytes read, code point */
2056         4, 0x004e,
2057         2, 0xfffe
2058     };
2059     static const int32_t results3[]={
2060         /* number of bytes read, code point */
2061         2, 0xfefe,
2062         2, 0x4e00,
2063         2, 0xfeff,
2064         4, 0x20001
2065     };
2066 
2067     const char *source, *limit;
2068 
2069     UErrorCode errorCode=U_ZERO_ERROR;
2070     UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2071     if(U_FAILURE(errorCode)) {
2072         log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2073         return;
2074     }
2075 
2076     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2077     TestNextUChar(cnv, source, limit, results1, "UTF-16");
2078 
2079     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2080     ucnv_resetToUnicode(cnv);
2081     TestNextUChar(cnv, source, limit, results2, "UTF-16");
2082 
2083     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2084     ucnv_resetToUnicode(cnv);
2085     TestNextUChar(cnv, source, limit, results3, "UTF-16");
2086 
2087     /* Test the condition when source >= sourceLimit */
2088     ucnv_resetToUnicode(cnv);
2089     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2090 
2091     ucnv_close(cnv);
2092 }
2093 
TestUTF16BEnull2094 static void TestUTF16BE() {
2095     /* test input */
2096     static const uint8_t in[]={
2097         0x00, 0x61,
2098         0x00, 0xc0,
2099         0x00, 0x31,
2100         0x00, 0xf4,
2101         0xce, 0xfe,
2102         0xd8, 0x01, 0xdc, 0x01
2103     };
2104 
2105     /* expected test results */
2106     static const int32_t results[]={
2107         /* number of bytes read, code point */
2108         2, 0x61,
2109         2, 0xc0,
2110         2, 0x31,
2111         2, 0xf4,
2112         2, 0xcefe,
2113         4, 0x10401
2114     };
2115 
2116     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2117     UErrorCode errorCode=U_ZERO_ERROR;
2118     UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2119     if(U_FAILURE(errorCode)) {
2120         log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2121         return;
2122     }
2123     TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2124     /* Test the condition when source >= sourceLimit */
2125     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2126     /*Test for the condition where there is an invalid character*/
2127     {
2128         static const uint8_t source2[]={0x61};
2129         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2130         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2131     }
2132 #if 0
2133     /*
2134      * Test disabled because currently the UTF-16BE/LE converters are supposed
2135      * to not set errors for unpaired surrogates.
2136      * This may change with
2137      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2138      */
2139 
2140     /*Test for the condition where there is a surrogate pair*/
2141     {
2142         const uint8_t source2[]={0xd8, 0x01};
2143         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2144     }
2145 #endif
2146     ucnv_close(cnv);
2147 }
2148 
2149 static void
TestUTF16LEnull2150 TestUTF16LE() {
2151     /* test input */
2152     static const uint8_t in[]={
2153         0x61, 0x00,
2154         0x31, 0x00,
2155         0x4e, 0x2e,
2156         0x4e, 0x00,
2157         0x01, 0xd8, 0x01, 0xdc
2158     };
2159 
2160     /* expected test results */
2161     static const int32_t results[]={
2162         /* number of bytes read, code point */
2163         2, 0x61,
2164         2, 0x31,
2165         2, 0x2e4e,
2166         2, 0x4e,
2167         4, 0x10401
2168     };
2169 
2170     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2171     UErrorCode errorCode=U_ZERO_ERROR;
2172     UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2173     if(U_FAILURE(errorCode)) {
2174         log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2175         return;
2176     }
2177     TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2178     /* Test the condition when source >= sourceLimit */
2179     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2180     /*Test for the condition where there is an invalid character*/
2181     {
2182         static const uint8_t source2[]={0x61};
2183         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2184         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2185     }
2186 #if 0
2187     /*
2188      * Test disabled because currently the UTF-16BE/LE converters are supposed
2189      * to not set errors for unpaired surrogates.
2190      * This may change with
2191      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2192      */
2193 
2194     /*Test for the condition where there is a surrogate character*/
2195     {
2196         static const uint8_t source2[]={0x01, 0xd8};
2197         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2198     }
2199 #endif
2200 
2201     ucnv_close(cnv);
2202 }
2203 
TestUTF32null2204 static void TestUTF32() {
2205     /* test input */
2206     static const uint8_t in1[]={
2207         0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2208     };
2209     static const uint8_t in2[]={
2210         0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2211     };
2212     static const uint8_t in3[]={
2213         0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2214     };
2215 
2216     /* expected test results */
2217     static const int32_t results1[]={
2218         /* number of bytes read, code point */
2219         8, 0x100f00,
2220         4, 0xfeff
2221     };
2222     static const int32_t results2[]={
2223         /* number of bytes read, code point */
2224         8, 0x0f1000,
2225         4, 0xfffe
2226     };
2227     static const int32_t results3[]={
2228         /* number of bytes read, code point */
2229         4, 0xfefe,
2230         4, 0x100f00,
2231         4, 0xfffd, /* unmatched surrogate */
2232         4, 0xfffd  /* unmatched surrogate */
2233     };
2234 
2235     const char *source, *limit;
2236 
2237     UErrorCode errorCode=U_ZERO_ERROR;
2238     UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2239     if(U_FAILURE(errorCode)) {
2240         log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2241         return;
2242     }
2243 
2244     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2245     TestNextUChar(cnv, source, limit, results1, "UTF-32");
2246 
2247     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2248     ucnv_resetToUnicode(cnv);
2249     TestNextUChar(cnv, source, limit, results2, "UTF-32");
2250 
2251     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2252     ucnv_resetToUnicode(cnv);
2253     TestNextUChar(cnv, source, limit, results3, "UTF-32");
2254 
2255     /* Test the condition when source >= sourceLimit */
2256     ucnv_resetToUnicode(cnv);
2257     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2258 
2259     ucnv_close(cnv);
2260 }
2261 
2262 static void
TestUTF32BEnull2263 TestUTF32BE() {
2264     /* test input */
2265     static const uint8_t in[]={
2266         0x00, 0x00, 0x00, 0x61,
2267         0x00, 0x00, 0x30, 0x61,
2268         0x00, 0x00, 0xdc, 0x00,
2269         0x00, 0x00, 0xd8, 0x00,
2270         0x00, 0x00, 0xdf, 0xff,
2271         0x00, 0x00, 0xff, 0xfe,
2272         0x00, 0x10, 0xab, 0xcd,
2273         0x00, 0x10, 0xff, 0xff
2274     };
2275 
2276     /* expected test results */
2277     static const int32_t results[]={
2278         /* number of bytes read, code point */
2279         4, 0x61,
2280         4, 0x3061,
2281         4, 0xfffd,
2282         4, 0xfffd,
2283         4, 0xfffd,
2284         4, 0xfffe,
2285         4, 0x10abcd,
2286         4, 0x10ffff
2287     };
2288 
2289     /* error test input */
2290     static const uint8_t in2[]={
2291         0x00, 0x00, 0x00, 0x61,
2292         0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2293         0x00, 0x00, 0x00, 0x62,
2294         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2295         0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2296         0x00, 0x00, 0x01, 0x62,
2297         0x00, 0x00, 0x02, 0x62
2298     };
2299 
2300     /* expected error test results */
2301     static const int32_t results2[]={
2302         /* number of bytes read, code point */
2303         4,  0x61,
2304         8,  0x62,
2305         12, 0x162,
2306         4,  0x262
2307     };
2308 
2309     UConverterToUCallback cb;
2310     const void *p;
2311 
2312     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2313     UErrorCode errorCode=U_ZERO_ERROR;
2314     UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2315     if(U_FAILURE(errorCode)) {
2316         log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2317         return;
2318     }
2319     TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2320 
2321     /* Test the condition when source >= sourceLimit */
2322     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2323 
2324     /* test error behavior with a skip callback */
2325     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2326     source=(const char *)in2;
2327     limit=(const char *)(in2+sizeof(in2));
2328     TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2329 
2330     ucnv_close(cnv);
2331 }
2332 
2333 static void
TestUTF32LEnull2334 TestUTF32LE() {
2335     /* test input */
2336     static const uint8_t in[]={
2337         0x61, 0x00, 0x00, 0x00,
2338         0x61, 0x30, 0x00, 0x00,
2339         0x00, 0xdc, 0x00, 0x00,
2340         0x00, 0xd8, 0x00, 0x00,
2341         0xff, 0xdf, 0x00, 0x00,
2342         0xfe, 0xff, 0x00, 0x00,
2343         0xcd, 0xab, 0x10, 0x00,
2344         0xff, 0xff, 0x10, 0x00
2345     };
2346 
2347     /* expected test results */
2348     static const int32_t results[]={
2349         /* number of bytes read, code point */
2350         4, 0x61,
2351         4, 0x3061,
2352         4, 0xfffd,
2353         4, 0xfffd,
2354         4, 0xfffd,
2355         4, 0xfffe,
2356         4, 0x10abcd,
2357         4, 0x10ffff
2358     };
2359 
2360     /* error test input */
2361     static const uint8_t in2[]={
2362         0x61, 0x00, 0x00, 0x00,
2363         0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2364         0x62, 0x00, 0x00, 0x00,
2365         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2366         0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2367         0x62, 0x01, 0x00, 0x00,
2368         0x62, 0x02, 0x00, 0x00,
2369     };
2370 
2371     /* expected error test results */
2372     static const int32_t results2[]={
2373         /* number of bytes read, code point */
2374         4,  0x61,
2375         8,  0x62,
2376         12, 0x162,
2377         4,  0x262,
2378     };
2379 
2380     UConverterToUCallback cb;
2381     const void *p;
2382 
2383     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2384     UErrorCode errorCode=U_ZERO_ERROR;
2385     UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2386     if(U_FAILURE(errorCode)) {
2387         log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2388         return;
2389     }
2390     TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2391 
2392     /* Test the condition when source >= sourceLimit */
2393     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2394 
2395     /* test error behavior with a skip callback */
2396     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2397     source=(const char *)in2;
2398     limit=(const char *)(in2+sizeof(in2));
2399     TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2400 
2401     ucnv_close(cnv);
2402 }
2403 
2404 static void
TestLATIN1null2405 TestLATIN1() {
2406     /* test input */
2407     static const uint8_t in[]={
2408        0x61,
2409        0x31,
2410        0x32,
2411        0xc0,
2412        0xf0,
2413        0xf4,
2414     };
2415 
2416     /* expected test results */
2417     static const int32_t results[]={
2418         /* number of bytes read, code point */
2419         1, 0x61,
2420         1, 0x31,
2421         1, 0x32,
2422         1, 0xc0,
2423         1, 0xf0,
2424         1, 0xf4,
2425     };
2426     static const uint16_t in1[] = {
2427         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2428         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2429         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2430         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2431         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2432         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2433         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2434         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2435         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2436         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2437         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2438         0xcb, 0x82
2439     };
2440     static const uint8_t out1[] = {
2441         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2442         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2443         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2444         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2445         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2446         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2447         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2448         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2449         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2450         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2451         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2452         0xcb, 0x82
2453     };
2454     static const uint16_t in2[]={
2455         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2456         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2457         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2458         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2459         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2460         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2461         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2462         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2463         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2464         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2465         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2466         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2467         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2468         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2469         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2470         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2471         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2472         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2473         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2474         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2475         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2476         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2477         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2478         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2479         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2480         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2481         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2482         0x37, 0x20, 0x2A, 0x2F,
2483     };
2484     static const unsigned char out2[]={
2485         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2486         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2487         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2488         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2489         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2490         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2491         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2492         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2493         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2494         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2495         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2496         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2497         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2498         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2499         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2500         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2501         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2502         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2503         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2504         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2505         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2506         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2507         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2508         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2509         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2510         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2511         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2512         0x37, 0x20, 0x2A, 0x2F,
2513     };
2514     const char *source=(const char *)in;
2515     const char *limit=(const char *)in+sizeof(in);
2516 
2517     UErrorCode errorCode=U_ZERO_ERROR;
2518     UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2519     if(U_FAILURE(errorCode)) {
2520         log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2521         return;
2522     }
2523     TestNextUChar(cnv, source, limit, results, "LATIN_1");
2524     /* Test the condition when source >= sourceLimit */
2525     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2526     TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2527     TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2528 
2529     ucnv_close(cnv);
2530 }
2531 
2532 static void
TestSBCSnull2533 TestSBCS() {
2534     /* test input */
2535     static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2536     /* expected test results */
2537     static const int32_t results[]={
2538         /* number of bytes read, code point */
2539         1, 0x61,
2540         1, 0xbf,
2541         1, 0xc4,
2542         1, 0x2021,
2543         1, 0xf8ff,
2544         1, 0x00d9
2545     };
2546 
2547     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2548     UErrorCode errorCode=U_ZERO_ERROR;
2549     UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2550     if(U_FAILURE(errorCode)) {
2551         log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2552         return;
2553     }
2554     TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2555     /* Test the condition when source >= sourceLimit */
2556     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2557     /*Test for Illegal character */ /*
2558     {
2559     static const uint8_t input1[]={ 0xA1 };
2560     const char* illegalsource=(const char*)input1;
2561     TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal character");
2562     }
2563    */
2564     ucnv_close(cnv);
2565 }
2566 
2567 static void
TestDBCSnull2568 TestDBCS() {
2569     /* test input */
2570     static const uint8_t in[]={
2571         0x44, 0x6a,
2572         0xc4, 0x9c,
2573         0x7a, 0x74,
2574         0x46, 0xab,
2575         0x42, 0x5b,
2576 
2577     };
2578 
2579     /* expected test results */
2580     static const int32_t results[]={
2581         /* number of bytes read, code point */
2582         2, 0x00a7,
2583         2, 0xe1d2,
2584         2, 0x6962,
2585         2, 0xf842,
2586         2, 0xffe5,
2587     };
2588 
2589     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2590     UErrorCode errorCode=U_ZERO_ERROR;
2591 
2592     UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2593     if(U_FAILURE(errorCode)) {
2594         log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2595         return;
2596     }
2597     TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2598     /* Test the condition when source >= sourceLimit */
2599     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2600     /*Test for the condition where there is an invalid character*/
2601     {
2602         static const uint8_t source2[]={0x1a, 0x1b};
2603         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2604     }
2605     /*Test for the condition where we have a truncated char*/
2606     {
2607         static const uint8_t source1[]={0xc4};
2608         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2609         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2610     }
2611     ucnv_close(cnv);
2612 }
2613 
2614 static void
TestMBCSnull2615 TestMBCS() {
2616     /* test input */
2617     static const uint8_t in[]={
2618         0x01,
2619         0xa6, 0xa3,
2620         0x00,
2621         0xa6, 0xa1,
2622         0x08,
2623         0xc2, 0x76,
2624         0xc2, 0x78,
2625 
2626     };
2627 
2628     /* expected test results */
2629     static const int32_t results[]={
2630         /* number of bytes read, code point */
2631         1, 0x0001,
2632         2, 0x250c,
2633         1, 0x0000,
2634         2, 0x2500,
2635         1, 0x0008,
2636         2, 0xd60c,
2637         2, 0xd60e,
2638     };
2639 
2640     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2641     UErrorCode errorCode=U_ZERO_ERROR;
2642 
2643     UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2644     if(U_FAILURE(errorCode)) {
2645         log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2646         return;
2647     }
2648     TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2649     /* Test the condition when source >= sourceLimit */
2650     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2651     /*Test for the condition where there is an invalid character*/
2652     {
2653         static const uint8_t source2[]={0xa1, 0x80};
2654         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2655     }
2656     /*Test for the condition where we have a truncated char*/
2657     {
2658         static const uint8_t source1[]={0xc4};
2659         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2660         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2661     }
2662     ucnv_close(cnv);
2663 
2664 }
2665 
2666 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2667 static void
TestICCRunoutnull2668 TestICCRunout() {
2669 /*    { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2670 
2671     const char *cnvName = "ibm-1363";
2672     UErrorCode status = U_ZERO_ERROR;
2673     const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2674     /* UChar   expectUData[] = { 0x00a1, 0x001a }; */
2675     const char *source = sourceData;
2676     const char *sourceLim = sourceData+sizeof(sourceData);
2677     UChar c1, c2, c3;
2678     UConverter *cnv=ucnv_open(cnvName, &status);
2679     if(U_FAILURE(status)) {
2680         log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2681 	return;
2682     }
2683 
2684 #if 0
2685     {
2686     UChar   targetBuf[256];
2687     UChar   *target = targetBuf;
2688     UChar   *targetLim = target+256;
2689     ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, true, &status);
2690 
2691     log_info("After convert: target@%d, source@%d, status%s\n",
2692 	     target-targetBuf, source-sourceData, u_errorName(status));
2693 
2694     if(U_FAILURE(status)) {
2695 	log_err("Failed to convert: %s\n", u_errorName(status));
2696     } else {
2697 
2698     }
2699     }
2700 #endif
2701 
2702     c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2703     log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2704 
2705     c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2706     log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2707 
2708     c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2709     log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2710 
2711     if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2712 	log_verbose("OK\n");
2713     } else {
2714 	log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2715     }
2716 
2717     ucnv_close(cnv);
2718 
2719 }
2720 #endif
2721 
2722 #ifdef U_ENABLE_GENERIC_ISO_2022
2723 
2724 static void
TestISO_2022null2725 TestISO_2022() {
2726     /* test input */
2727     static const uint8_t in[]={
2728         0x1b, 0x25, 0x42,
2729         0x31,
2730         0x32,
2731         0x61,
2732         0xc2, 0x80,
2733         0xe0, 0xa0, 0x80,
2734         0xf0, 0x90, 0x80, 0x80
2735     };
2736 
2737 
2738 
2739     /* expected test results */
2740     static const int32_t results[]={
2741         /* number of bytes read, code point */
2742         4, 0x0031,  /* 4 bytes including the escape sequence */
2743         1, 0x0032,
2744         1, 0x61,
2745         2, 0x80,
2746         3, 0x800,
2747         4, 0x10000
2748     };
2749 
2750     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2751     UErrorCode errorCode=U_ZERO_ERROR;
2752     UConverter *cnv;
2753 
2754     cnv=ucnv_open("ISO_2022", &errorCode);
2755     if(U_FAILURE(errorCode)) {
2756         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2757         return;
2758     }
2759     TestNextUChar(cnv, source, limit, results, "ISO_2022");
2760 
2761     /* Test the condition when source >= sourceLimit */
2762     TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2763     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2764     /*Test for the condition where we have a truncated char*/
2765     {
2766         static const uint8_t source1[]={0xc4};
2767         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2768         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2769     }
2770     /*Test for the condition where there is an invalid character*/
2771     {
2772         static const uint8_t source2[]={0xa1, 0x01};
2773         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2774     }
2775     ucnv_close(cnv);
2776 }
2777 
2778 #endif
2779 
2780 static void
TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv)2781 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2782     const UChar* uSource;
2783     const UChar* uSourceLimit;
2784     const char* cSource;
2785     const char* cSourceLimit;
2786     UChar *uTargetLimit =NULL;
2787     UChar *uTarget;
2788     char *cTarget;
2789     const char *cTargetLimit;
2790     char *cBuf;
2791     UChar *uBuf; /*,*test;*/
2792     int32_t uBufSize = 120;
2793     int len=0;
2794     int i=2;
2795     UErrorCode errorCode=U_ZERO_ERROR;
2796     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2797     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2798     ucnv_reset(cnv);
2799     for(;--i>0; ){
2800         uSource = (UChar*) source;
2801         uSourceLimit=(const UChar*)sourceLimit;
2802         cTarget = cBuf;
2803         uTarget = uBuf;
2804         cSource = cBuf;
2805         cTargetLimit = cBuf;
2806         uTargetLimit = uBuf;
2807 
2808         do{
2809 
2810             cTargetLimit = cTargetLimit+ i;
2811             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,false, &errorCode);
2812             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2813                errorCode=U_ZERO_ERROR;
2814                 continue;
2815             }
2816 
2817             if(U_FAILURE(errorCode)){
2818                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2819                 return;
2820             }
2821 
2822         }while (uSource<uSourceLimit);
2823 
2824         cSourceLimit =cTarget;
2825         do{
2826             uTargetLimit=uTargetLimit+i;
2827             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,false,&errorCode);
2828             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2829                errorCode=U_ZERO_ERROR;
2830                 continue;
2831             }
2832             if(U_FAILURE(errorCode)){
2833                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2834                     return;
2835             }
2836         }while(cSource<cSourceLimit);
2837 
2838         uSource = source;
2839         /*test =uBuf;*/
2840         for(len=0;len<(int)(source - sourceLimit);len++){
2841             if(uBuf[len]!=uSource[len]){
2842                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2843             }
2844         }
2845     }
2846     free(uBuf);
2847     free(cBuf);
2848 }
2849 /* Test for Jitterbug 778 */
TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv)2850 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2851     const UChar* uSource;
2852     const UChar* uSourceLimit;
2853     const char* cSource;
2854     UChar *uTargetLimit =NULL;
2855     UChar *uTarget;
2856     char *cTarget;
2857     const char *cTargetLimit;
2858     char *cBuf;
2859     UChar *uBuf,*test;
2860     int32_t uBufSize = 120;
2861     int numCharsInTarget=0;
2862     UErrorCode errorCode=U_ZERO_ERROR;
2863     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2864     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2865     uSource = source;
2866     uSourceLimit=sourceLimit;
2867     cTarget = cBuf;
2868     cTargetLimit = cBuf +uBufSize*5;
2869     uTarget = uBuf;
2870     uTargetLimit = uBuf+ uBufSize*5;
2871     ucnv_reset(cnv);
2872     numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2873     if(U_FAILURE(errorCode)){
2874         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2875         return;
2876     }
2877     cSource = cBuf;
2878     test =uBuf;
2879     ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2880     if(U_FAILURE(errorCode)){
2881         log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2882         return;
2883     }
2884     uSource = source;
2885     while(uSource<uSourceLimit){
2886         if(*test!=*uSource){
2887 
2888             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2889         }
2890         uSource++;
2891         test++;
2892     }
2893     free(uBuf);
2894     free(cBuf);
2895 }
2896 
TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv)2897 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2898     const UChar* uSource;
2899     const UChar* uSourceLimit;
2900     const char* cSource;
2901     const char* cSourceLimit;
2902     UChar *uTargetLimit =NULL;
2903     UChar *uTarget;
2904     char *cTarget;
2905     const char *cTargetLimit;
2906     char *cBuf;
2907     UChar *uBuf; /*,*test;*/
2908     int32_t uBufSize = 120;
2909     int len=0;
2910     int i=2;
2911     const UChar *temp = sourceLimit;
2912     UErrorCode errorCode=U_ZERO_ERROR;
2913     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2914     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2915 
2916     ucnv_reset(cnv);
2917     for(;--i>0;){
2918         uSource = (UChar*) source;
2919         cTarget = cBuf;
2920         uTarget = uBuf;
2921         cSource = cBuf;
2922         cTargetLimit = cBuf;
2923         uTargetLimit = uBuf+uBufSize*5;
2924         cTargetLimit = cTargetLimit+uBufSize*10;
2925         uSourceLimit=uSource;
2926         do{
2927 
2928             if (uSourceLimit < sourceLimit) {
2929                 uSourceLimit = uSourceLimit+1;
2930             }
2931             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,false, &errorCode);
2932             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2933                errorCode=U_ZERO_ERROR;
2934                 continue;
2935             }
2936 
2937             if(U_FAILURE(errorCode)){
2938                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2939                 return;
2940             }
2941 
2942         }while (uSource<temp);
2943 
2944         cSourceLimit =cBuf;
2945         do{
2946             if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2947                 cSourceLimit = cSourceLimit+1;
2948             }
2949             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,false,&errorCode);
2950             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2951                errorCode=U_ZERO_ERROR;
2952                 continue;
2953             }
2954             if(U_FAILURE(errorCode)){
2955                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2956                     return;
2957             }
2958         }while(cSource<cTarget);
2959 
2960         uSource = source;
2961         /*test =uBuf;*/
2962         for(;len<(int)(source - sourceLimit);len++){
2963             if(uBuf[len]!=uSource[len]){
2964                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2965             }
2966         }
2967     }
2968     free(uBuf);
2969     free(cBuf);
2970 }
2971 static void
TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, const uint16_t results[], const char* message)2972 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2973                      const uint16_t results[], const char* message){
2974 /*     const char* s0; */
2975      const char* s=(char*)source;
2976      const uint16_t *r=results;
2977      UErrorCode errorCode=U_ZERO_ERROR;
2978      uint32_t c,exC;
2979      ucnv_reset(cnv);
2980      while(s<limit) {
2981 	 /* s0=s; */
2982         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2983         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2984             break; /* no more significant input */
2985         } else if(U_FAILURE(errorCode)) {
2986             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2987             break;
2988         } else {
2989             if(U16_IS_LEAD(*r)){
2990                 int i =0, len = 2;
2991                 U16_NEXT(r, i, len, exC);
2992                 r++;
2993             }else{
2994                 exC = *r;
2995             }
2996             if(c!=(uint32_t)(exC))
2997                 log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
2998         }
2999         r++;
3000     }
3001 }
3002 
TestJitterbug930(const char* enc)3003 static int TestJitterbug930(const char* enc){
3004     UErrorCode err = U_ZERO_ERROR;
3005     UConverter*converter;
3006     char out[80];
3007     char*target = out;
3008     UChar in[4];
3009     const UChar*source = in;
3010     int32_t off[80];
3011     int32_t* offsets = off;
3012     int numOffWritten=0;
3013     UBool flush = 0;
3014     converter = my_ucnv_open(enc, &err);
3015 
3016     in[0] = 0x41;     /* 0x4E00;*/
3017     in[1] = 0x4E01;
3018     in[2] = 0x4E02;
3019     in[3] = 0x4E03;
3020 
3021     memset(off, '*', sizeof(off));
3022 
3023     ucnv_fromUnicode (converter,
3024             &target,
3025             target+2,
3026             &source,
3027             source+3,
3028             offsets,
3029             flush,
3030             &err);
3031 
3032         /* writes three bytes into the output buffer: 41 1B 24
3033         * but offsets contains 0 1 1
3034     */
3035     while(*offsets< off[10]){
3036         numOffWritten++;
3037         offsets++;
3038     }
3039     log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3040     if(numOffWritten!= (int)(target-out)){
3041         log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3042     }
3043 
3044     err = U_ZERO_ERROR;
3045 
3046     memset(off,'*' , sizeof(off));
3047 
3048     flush = 1;
3049     offsets=off;
3050     ucnv_fromUnicode (converter,
3051             &target,
3052             target+4,
3053             &source,
3054             source,
3055             offsets,
3056             flush,
3057             &err);
3058     numOffWritten=0;
3059     while(*offsets< off[10]){
3060         numOffWritten++;
3061         if(*offsets!= -1){
3062             log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3063         }
3064         offsets++;
3065     }
3066 
3067     /* writes 42 43 7A into output buffer,
3068      * offsets contains -1 -1 -1
3069      */
3070     ucnv_close(converter);
3071     return 0;
3072 }
3073 
3074 static void
TestHZnull3075 TestHZ() {
3076     /* test input */
3077     static const uint16_t in[]={
3078             0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3079             0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3080             0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3081             0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3082             0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3083             0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3084             0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3085             0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3086             0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3087             0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3088             0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3089             0x005A, 0x005B, 0x005C, 0x000A
3090       };
3091     const UChar* uSource;
3092     const UChar* uSourceLimit;
3093     const char* cSource;
3094     const char* cSourceLimit;
3095     UChar *uTargetLimit =NULL;
3096     UChar *uTarget;
3097     char *cTarget;
3098     const char *cTargetLimit;
3099     char *cBuf = NULL;
3100     UChar *uBuf = NULL;
3101     UChar *test;
3102     int32_t uBufSize = 120;
3103     UErrorCode errorCode=U_ZERO_ERROR;
3104     UConverter *cnv = NULL;
3105     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3106     int32_t* myOff= offsets;
3107     cnv=ucnv_open("HZ", &errorCode);
3108     if(U_FAILURE(errorCode)) {
3109         log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3110         goto cleanup;
3111     }
3112 
3113     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3114     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3115     uSource = (const UChar*)in;
3116     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3117     cTarget = cBuf;
3118     cTargetLimit = cBuf +uBufSize*5;
3119     uTarget = uBuf;
3120     uTargetLimit = uBuf+ uBufSize*5;
3121     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3122     if(U_FAILURE(errorCode)){
3123         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3124         goto cleanup;
3125     }
3126     cSource = cBuf;
3127     cSourceLimit =cTarget;
3128     test =uBuf;
3129     myOff=offsets;
3130     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3131     if(U_FAILURE(errorCode)){
3132         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3133         goto cleanup;
3134     }
3135     uSource = (const UChar*)in;
3136     while(uSource<uSourceLimit){
3137         if(*test!=*uSource){
3138 
3139             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3140         }
3141         uSource++;
3142         test++;
3143     }
3144     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3145     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3146     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3147     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3148     TestJitterbug930("csISO2022JP");
3149 
3150 cleanup:
3151     ucnv_close(cnv);
3152     free(offsets);
3153     free(uBuf);
3154     free(cBuf);
3155 }
3156 
3157 static void
TestISCIInull3158 TestISCII(){
3159         /* test input */
3160     static const uint16_t in[]={
3161         /* test full range of Devanagari */
3162         0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3163         0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3164         0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3165         0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3166         0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3167         0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3168         0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3169         0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3170         0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3171         0x096D,0x096E,0x096F,
3172         /* test Soft halant*/
3173         0x0915,0x094d, 0x200D,
3174         /* test explicit halant */
3175         0x0915,0x094d, 0x200c,
3176         /* test double danda */
3177         0x965,
3178         /* test ASCII */
3179         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3180         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3181         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3182         /* tests from Lotus */
3183         0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3184         0x0930,0x094D,0x200D,
3185         0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3186         0x0915,0x0921,0x002B,0x095F,
3187         /* tamil range */
3188         0x0B86, 0xB87, 0xB88,
3189         /* telugu range */
3190         0x0C05, 0x0C02, 0x0C03,0x0c31,
3191         /* kannada range */
3192         0x0C85, 0xC82, 0x0C83,
3193         /* test Abbr sign and Anudatta */
3194         0x0970, 0x952,
3195        /* 0x0958,
3196         0x0959,
3197         0x095A,
3198         0x095B,
3199         0x095C,
3200         0x095D,
3201         0x095E,
3202         0x095F,*/
3203         0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3204         0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3205         0x090C ,
3206         0x0962,
3207         0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3208         0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3209         0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3210         0x093D /* Avagraha  0xEA, 0xE9*/,
3211         0x0958,
3212         0x0959,
3213         0x095A,
3214         0x095B,
3215         0x095C,
3216         0x095D,
3217         0x095E,
3218         0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3219       };
3220     static const unsigned char byteArr[]={
3221 
3222         0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3223         0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3224         0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3225         0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3226         0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3227         0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3228         0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3229         0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3230         0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3231         0xf8,0xf9,0xfa,
3232         /* test soft halant */
3233         0xb3, 0xE8, 0xE9,
3234         /* test explicit halant */
3235         0xb3, 0xE8, 0xE8,
3236         /* test double danda */
3237         0xea, 0xea,
3238         /* test ASCII */
3239         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3240         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3241         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3242         /* test ATR code */
3243 
3244         /* tests from Lotus */
3245         0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3246         0xEF,0x42,0xCF,0xE8,0xD9,
3247         0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3248         0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3249         /* tamil range */
3250         0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3251         /* telugu range */
3252         0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3253         /* kannada range */
3254         0xEF, 0x48,0xa4, 0xa2, 0xa3,
3255         /* anudatta and abbreviation sign */
3256         0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3257 
3258 
3259         0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3260 
3261         0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3262 
3263         0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3264 
3265         0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3266 
3267         0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3268 
3269         0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3270 
3271         0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3272 
3273         0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3274 
3275         0xB3, 0xE9, /* Ka + NUKTA */
3276 
3277         0xB4, 0xE9, /* Kha + NUKTA */
3278 
3279         0xB5, 0xE9, /* Ga + NUKTA */
3280 
3281         0xBA, 0xE9,
3282 
3283         0xBF, 0xE9,
3284 
3285         0xC0, 0xE9,
3286 
3287         0xC9, 0xE9,
3288         /* INV halant RA    */
3289         0xD9, 0xE8, 0xCF,
3290         0x00, 0x00A0,
3291         /* just consume unhandled codepoints */
3292         0xEF, 0x30,
3293 
3294     };
3295     testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,true);
3296     TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3297 
3298 }
3299 
3300 static void
TestISO_2022_JPnull3301 TestISO_2022_JP() {
3302     /* test input */
3303     static const uint16_t in[]={
3304         0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3305         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3306         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3307         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3308         0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3309         0x201D, 0x3014, 0x000D, 0x000A,
3310         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3311         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3312         };
3313     const UChar* uSource;
3314     const UChar* uSourceLimit;
3315     const char* cSource;
3316     const char* cSourceLimit;
3317     UChar *uTargetLimit =NULL;
3318     UChar *uTarget;
3319     char *cTarget;
3320     const char *cTargetLimit;
3321     char *cBuf = NULL;
3322     UChar *uBuf = NULL;
3323     UChar *test;
3324     int32_t uBufSize = 120;
3325     UErrorCode errorCode=U_ZERO_ERROR;
3326     UConverter *cnv = NULL;
3327     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3328     int32_t* myOff= offsets;
3329     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3330     if(U_FAILURE(errorCode)) {
3331         log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3332         goto cleanup;
3333     }
3334 
3335     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3336     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3337     uSource = (const UChar*)in;
3338     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3339     cTarget = cBuf;
3340     cTargetLimit = cBuf +uBufSize*5;
3341     uTarget = uBuf;
3342     uTargetLimit = uBuf+ uBufSize*5;
3343     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3344     if(U_FAILURE(errorCode)){
3345         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3346         goto cleanup;
3347     }
3348     cSource = cBuf;
3349     cSourceLimit =cTarget;
3350     test =uBuf;
3351     myOff=offsets;
3352     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3353     if(U_FAILURE(errorCode)){
3354         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3355         goto cleanup;
3356     }
3357 
3358     uSource = (const UChar*)in;
3359     while(uSource<uSourceLimit){
3360         if(*test!=*uSource){
3361 
3362             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3363         }
3364         uSource++;
3365         test++;
3366     }
3367 
3368     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3369     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3370     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3371     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3372     TestJitterbug930("csISO2022JP");
3373 
3374 cleanup:
3375     ucnv_close(cnv);
3376     free(uBuf);
3377     free(cBuf);
3378     free(offsets);
3379 }
3380 
TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen)3381 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3382     const UChar* uSource;
3383     const UChar* uSourceLimit;
3384     const char* cSource;
3385     const char* cSourceLimit;
3386     UChar *uTargetLimit =NULL;
3387     UChar *uTarget;
3388     char *cTarget;
3389     const char *cTargetLimit;
3390     char *cBuf;
3391     UChar *uBuf,*test;
3392     int32_t uBufSize = 120*10;
3393     UErrorCode errorCode=U_ZERO_ERROR;
3394     UConverter *cnv;
3395     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3396     int32_t* myOff= offsets;
3397     cnv=my_ucnv_open(conv, &errorCode);
3398     if(U_FAILURE(errorCode)) {
3399         log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3400         return;
3401     }
3402 
3403     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3404     cBuf =(char*)malloc(uBufSize * sizeof(char));
3405     uSource = (const UChar*)in;
3406     uSourceLimit=uSource+len;
3407     cTarget = cBuf;
3408     cTargetLimit = cBuf +uBufSize;
3409     uTarget = uBuf;
3410     uTargetLimit = uBuf+ uBufSize;
3411     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3412     if(U_FAILURE(errorCode)){
3413         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3414         return;
3415     }
3416     /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3417     cSource = cBuf;
3418     cSourceLimit =cTarget;
3419     test =uBuf;
3420     myOff=offsets;
3421     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3422     if(U_FAILURE(errorCode)){
3423         log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3424         return;
3425     }
3426 
3427     uSource = (const UChar*)in;
3428     while(uSource<uSourceLimit){
3429         if(*test!=*uSource){
3430             log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3431         }
3432         uSource++;
3433         test++;
3434     }
3435     TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3436     TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3437     TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3438     if(byteArr && byteArrLen!=0){
3439         TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3440         TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3441         {
3442             cSource = byteArr;
3443             cSourceLimit = cSource+byteArrLen;
3444             test=uBuf;
3445             myOff = offsets;
3446             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3447             if(U_FAILURE(errorCode)){
3448                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3449                 return;
3450             }
3451 
3452             uSource = (const UChar*)in;
3453             while(uSource<uSourceLimit){
3454                 if(*test!=*uSource){
3455                     log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3456                 }
3457                 uSource++;
3458                 test++;
3459             }
3460         }
3461     }
3462 
3463     ucnv_close(cnv);
3464     free(uBuf);
3465     free(cBuf);
3466     free(offsets);
3467 }
3468 static UChar U_CALLCONV
_charAt(int32_t offset, void *context)3469 _charAt(int32_t offset, void *context) {
3470     return ((char*)context)[offset];
3471 }
3472 
3473 static int32_t
unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status)3474 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3475     int32_t srcIndex=0;
3476     int32_t dstIndex=0;
3477     if(U_FAILURE(*status)){
3478         return 0;
3479     }
3480     if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3481         *status = U_ILLEGAL_ARGUMENT_ERROR;
3482         return 0;
3483     }
3484     if(srcLen==-1){
3485         srcLen = (int32_t)uprv_strlen(src);
3486     }
3487 
3488     for (; srcIndex<srcLen; ) {
3489         UChar32 c = src[srcIndex++];
3490         if (c == 0x005C /*'\\'*/) {
3491             c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3492             if (c == (UChar32)0xFFFFFFFF) {
3493                 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3494                 break; /* invalid escape sequence */
3495             }
3496         }
3497         if(dstIndex < dstLen){
3498             if(c>0xFFFF){
3499                dst[dstIndex++] = U16_LEAD(c);
3500                if(dstIndex<dstLen){
3501                     dst[dstIndex]=U16_TRAIL(c);
3502                }else{
3503                    *status=U_BUFFER_OVERFLOW_ERROR;
3504                }
3505             }else{
3506                 dst[dstIndex]=(UChar)c;
3507             }
3508 
3509         }else{
3510             *status = U_BUFFER_OVERFLOW_ERROR;
3511         }
3512         dstIndex++; /* for preflighting */
3513     }
3514     return dstIndex;
3515 }
3516 
3517 static void
TestFullRoundtrip(const char* cp)3518 TestFullRoundtrip(const char* cp){
3519     UChar usource[10] ={0};
3520     UChar nsrc[10] = {0};
3521     uint32_t i=1;
3522     int len=0, ulen;
3523     nsrc[0]=0x0061;
3524     /* Test codepoint 0 */
3525     TestConv(usource,1,cp,"",NULL,0);
3526     TestConv(usource,2,cp,"",NULL,0);
3527     nsrc[2]=0x5555;
3528     TestConv(nsrc,3,cp,"",NULL,0);
3529 
3530     for(;i<=0x10FFFF;i++){
3531         if(i==0xD800){
3532             i=0xDFFF;
3533             continue;
3534         }
3535         if(i<=0xFFFF){
3536             usource[0] =(UChar) i;
3537             len=1;
3538         }else{
3539             usource[0]=U16_LEAD(i);
3540             usource[1]=U16_TRAIL(i);
3541             len=2;
3542         }
3543         ulen=len;
3544         if(i==0x80) {
3545             usource[2]=0;
3546         }
3547         /* Test only single code points */
3548         TestConv(usource,ulen,cp,"",NULL,0);
3549         /* Test codepoint repeated twice */
3550         usource[ulen]=usource[0];
3551         usource[ulen+1]=usource[1];
3552         ulen+=len;
3553         TestConv(usource,ulen,cp,"",NULL,0);
3554         /* Test codepoint repeated 3 times */
3555         usource[ulen]=usource[0];
3556         usource[ulen+1]=usource[1];
3557         ulen+=len;
3558         TestConv(usource,ulen,cp,"",NULL,0);
3559         /* Test codepoint in between 2 codepoints */
3560         nsrc[1]=usource[0];
3561         nsrc[2]=usource[1];
3562         nsrc[len+1]=0x5555;
3563         TestConv(nsrc,len+2,cp,"",NULL,0);
3564         uprv_memset(usource,0,sizeof(UChar)*10);
3565     }
3566 }
3567 
3568 static void
TestRoundTrippingAllUTF(void)3569 TestRoundTrippingAllUTF(void){
3570     if(!getTestOption(QUICK_OPTION)){
3571         log_verbose("Running exhaustive round trip test for BOCU-1\n");
3572         TestFullRoundtrip("BOCU-1");
3573         log_verbose("Running exhaustive round trip test for SCSU\n");
3574         TestFullRoundtrip("SCSU");
3575         log_verbose("Running exhaustive round trip test for UTF-8\n");
3576         TestFullRoundtrip("UTF-8");
3577         log_verbose("Running exhaustive round trip test for CESU-8\n");
3578         TestFullRoundtrip("CESU-8");
3579         log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3580         TestFullRoundtrip("UTF-16BE");
3581         log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3582         TestFullRoundtrip("UTF-16LE");
3583         log_verbose("Running exhaustive round trip test for UTF-16\n");
3584         TestFullRoundtrip("UTF-16");
3585         log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3586         TestFullRoundtrip("UTF-32BE");
3587         log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3588         TestFullRoundtrip("UTF-32LE");
3589         log_verbose("Running exhaustive round trip test for UTF-32\n");
3590         TestFullRoundtrip("UTF-32");
3591         log_verbose("Running exhaustive round trip test for UTF-7\n");
3592         TestFullRoundtrip("UTF-7");
3593         log_verbose("Running exhaustive round trip test for UTF-7\n");
3594         TestFullRoundtrip("UTF-7,version=1");
3595         log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3596         TestFullRoundtrip("IMAP-mailbox-name");
3597         /*
3598          *
3599          * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3600          * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3601          * The old mappings remain as fallbacks.
3602          * This test may be reintroduced at a later time.
3603          *
3604          * 110118 - mow
3605          */
3606          /*
3607          log_verbose("Running exhaustive round trip test for GB18030\n");
3608          TestFullRoundtrip("GB18030");
3609          */
3610     }
3611 }
3612 
3613 static void
TestSCSUnull3614 TestSCSU() {
3615 
3616     static const uint16_t germanUTF16[]={
3617         0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3618     };
3619 
3620     static const uint8_t germanSCSU[]={
3621         0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3622     };
3623 
3624     static const uint16_t russianUTF16[]={
3625         0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3626     };
3627 
3628     static const uint8_t russianSCSU[]={
3629         0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3630     };
3631 
3632     static const uint16_t japaneseUTF16[]={
3633         0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3634         0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3635         0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3636         0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3637         0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3638         0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3639         0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3640         0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3641         0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3642         0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3643         0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3644         0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3645         0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3646         0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3647         0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3648     };
3649 
3650     /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3651      it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3652     static const uint8_t japaneseSCSU[]={
3653         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3654         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3655         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3656         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3657         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3658         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3659         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3660         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3661         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3662         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3663         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3664         0xcb, 0x82
3665     };
3666 
3667     static const uint16_t allFeaturesUTF16[]={
3668         0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3669         0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3670         0x01df, 0xf000, 0xdbff, 0xdfff
3671     };
3672 
3673     /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3674      * result here (34B vs. 35B)
3675      */
3676     static const uint8_t allFeaturesSCSU[]={
3677         0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3678         0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3679         0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3680         0xdf, 0x14, 0x80, 0x15, 0xff
3681     };
3682     static const uint16_t monkeyIn[]={
3683         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3684         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3685         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3686         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3687         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3688         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3689         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3690         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3691         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3692         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3693         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3694         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3695         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3696         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3697         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3698         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3699         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3700         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3701         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3702         /* test non-BMP code points */
3703         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3704         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3705         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3706         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3707         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3708         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3709         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3710         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3711         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3712         0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3713         0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3714 
3715 
3716         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3717         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3718         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3719         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3720         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3721     };
3722     static const char *fTestCases [] = {
3723           "\\ud800\\udc00", /* smallest surrogate*/
3724           "\\ud8ff\\udcff",
3725           "\\udBff\\udFff", /* largest surrogate pair*/
3726           "\\ud834\\udc00",
3727           "\\U0010FFFF",
3728           "Hello \\u9292 \\u9192 World!",
3729           "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3730           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3731 
3732           "\\u0648\\u06c8", /* catch missing reset*/
3733           "\\u0648\\u06c8",
3734 
3735           "\\u4444\\uE001", /* lowest quotable*/
3736           "\\u4444\\uf2FF", /* highest quotable*/
3737           "\\u4444\\uf188\\u4444",
3738           "\\u4444\\uf188\\uf288",
3739           "\\u4444\\uf188abc\\u0429\\uf288",
3740           "\\u9292\\u2222",
3741           "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3742           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3743           "Hello World!123456",
3744           "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3745 
3746           "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3747           "abc\\u4411d",      /* uses SQU*/
3748           "abc\\u4411\\u4412d",/* uses SCU*/
3749           "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3750           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3751           "\\u9292\\u2222",
3752           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3753           "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3754           "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3755 
3756           "", /* empty input*/
3757           "\\u0000", /* smallest BMP character*/
3758           "\\uFFFF", /* largest BMP character*/
3759 
3760           /* regression tests*/
3761           "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3762           "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3763           "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3764           "\\u0041\\u00df\\u0401\\u015f",
3765           "\\u9066\\u2123abc",
3766           "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3767           "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3768     };
3769     int i=0;
3770     for(;i<UPRV_LENGTHOF(fTestCases);i++){
3771         const char* cSrc = fTestCases[i];
3772         UErrorCode status = U_ZERO_ERROR;
3773         int32_t cSrcLen,srcLen;
3774         UChar* src;
3775         /* UConverter* cnv = ucnv_open("SCSU",&status); */
3776         cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3777         src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3778         srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3779         log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3780         TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3781         free(src);
3782     }
3783     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3784     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3785     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3786     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3787     TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3788     TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3789     TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3790 }
3791 
3792 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug2346null3793 static void TestJitterbug2346(){
3794     char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3795                       0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3796     uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3797 
3798     UChar uTarget[500]={'\0'};
3799     UChar* utarget=uTarget;
3800     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3801 
3802     char cTarget[500]={'\0'};
3803     char* ctarget=cTarget;
3804     char* ctargetLimit=cTarget+sizeof(cTarget);
3805     const char* csource=source;
3806     UChar* temp = expected;
3807     UErrorCode err=U_ZERO_ERROR;
3808 
3809     UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3810     if(U_FAILURE(err)) {
3811         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3812         return;
3813     }
3814     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,true,&err);
3815     if(U_FAILURE(err)) {
3816         log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3817         return;
3818     }
3819     utargetLimit=utarget;
3820     utarget = uTarget;
3821     while(utarget<utargetLimit){
3822         if(*temp!=*utarget){
3823 
3824             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3825         }
3826         utarget++;
3827         temp++;
3828     }
3829     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,true,&err);
3830     if(U_FAILURE(err)) {
3831         log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3832         return;
3833     }
3834     ctargetLimit=ctarget;
3835     ctarget =cTarget;
3836     ucnv_close(conv);
3837 
3838 
3839 }
3840 
3841 static void
TestISO_2022_JP_1null3842 TestISO_2022_JP_1() {
3843     /* test input */
3844     static const uint16_t in[]={
3845         0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3846         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3847         0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3848         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3849         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3850         0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3851         0x201D, 0x000D, 0x000A,
3852         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3853         0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3854         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3855         0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3856         0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3857         0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3858       };
3859     const UChar* uSource;
3860     const UChar* uSourceLimit;
3861     const char* cSource;
3862     const char* cSourceLimit;
3863     UChar *uTargetLimit =NULL;
3864     UChar *uTarget;
3865     char *cTarget;
3866     const char *cTargetLimit;
3867     char *cBuf;
3868     UChar *uBuf,*test;
3869     int32_t uBufSize = 120;
3870     UErrorCode errorCode=U_ZERO_ERROR;
3871     UConverter *cnv;
3872 
3873     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3874     if(U_FAILURE(errorCode)) {
3875         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3876         return;
3877     }
3878 
3879     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3880     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3881     uSource = (const UChar*)in;
3882     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3883     cTarget = cBuf;
3884     cTargetLimit = cBuf +uBufSize*5;
3885     uTarget = uBuf;
3886     uTargetLimit = uBuf+ uBufSize*5;
3887     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,true, &errorCode);
3888     if(U_FAILURE(errorCode)){
3889         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3890         return;
3891     }
3892     cSource = cBuf;
3893     cSourceLimit =cTarget;
3894     test =uBuf;
3895     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,true,&errorCode);
3896     if(U_FAILURE(errorCode)){
3897         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3898         return;
3899     }
3900     uSource = (const UChar*)in;
3901     while(uSource<uSourceLimit){
3902         if(*test!=*uSource){
3903 
3904             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3905         }
3906         uSource++;
3907         test++;
3908     }
3909     /*ucnv_close(cnv);
3910     cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3911     /*Test for the condition where there is an invalid character*/
3912     ucnv_reset(cnv);
3913     {
3914         static const uint8_t source2[]={0x0e,0x24,0x053};
3915         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3916     }
3917     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3918     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3919     ucnv_close(cnv);
3920     free(uBuf);
3921     free(cBuf);
3922 }
3923 
3924 static void
TestISO_2022_JP_2null3925 TestISO_2022_JP_2() {
3926     /* test input */
3927     static const uint16_t in[]={
3928         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3929         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3930         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3931         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3932         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3933         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3934         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3935         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3936         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3937         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3938         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3939         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3940         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3941         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3942         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3943         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3944         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3945         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3946         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3947       };
3948     const UChar* uSource;
3949     const UChar* uSourceLimit;
3950     const char* cSource;
3951     const char* cSourceLimit;
3952     UChar *uTargetLimit =NULL;
3953     UChar *uTarget;
3954     char *cTarget;
3955     const char *cTargetLimit;
3956     char *cBuf = NULL;
3957     UChar *uBuf = NULL;
3958     UChar *test;
3959     int32_t uBufSize = 120;
3960     UErrorCode errorCode=U_ZERO_ERROR;
3961     UConverter *cnv = NULL;
3962     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3963     int32_t* myOff= offsets;
3964     cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3965     if(U_FAILURE(errorCode)) {
3966         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3967         goto cleanup;
3968     }
3969 
3970     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3971     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3972     uSource = (const UChar*)in;
3973     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3974     cTarget = cBuf;
3975     cTargetLimit = cBuf +uBufSize*5;
3976     uTarget = uBuf;
3977     uTargetLimit = uBuf+ uBufSize*5;
3978     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3979     if(U_FAILURE(errorCode)){
3980         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3981         goto cleanup;
3982     }
3983     cSource = cBuf;
3984     cSourceLimit =cTarget;
3985     test =uBuf;
3986     myOff=offsets;
3987     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3988     if(U_FAILURE(errorCode)){
3989         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3990         goto cleanup;
3991     }
3992     uSource = (const UChar*)in;
3993     while(uSource<uSourceLimit){
3994         if(*test!=*uSource){
3995 
3996             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3997         }
3998         uSource++;
3999         test++;
4000     }
4001     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4002     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4003     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4004     /*Test for the condition where there is an invalid character*/
4005     ucnv_reset(cnv);
4006     {
4007         static const uint8_t source2[]={0x0e,0x24,0x053};
4008         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
4009     }
4010 
4011 cleanup:
4012     ucnv_close(cnv);
4013     free(uBuf);
4014     free(cBuf);
4015     free(offsets);
4016 }
4017 
4018 static void
TestISO_2022_KRnull4019 TestISO_2022_KR() {
4020     /* test input */
4021     static const uint16_t in[]={
4022                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4023                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4024                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4025                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4026                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4027                    ,0x53E3,0x53E4,0x000A,0x000D};
4028     const UChar* uSource;
4029     const UChar* uSourceLimit;
4030     const char* cSource;
4031     const char* cSourceLimit;
4032     UChar *uTargetLimit =NULL;
4033     UChar *uTarget;
4034     char *cTarget;
4035     const char *cTargetLimit;
4036     char *cBuf = NULL;
4037     UChar *uBuf = NULL;
4038     UChar *test;
4039     int32_t uBufSize = 120;
4040     UErrorCode errorCode=U_ZERO_ERROR;
4041     UConverter *cnv = NULL;
4042     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4043     int32_t* myOff= offsets;
4044     cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4045     if(U_FAILURE(errorCode)) {
4046         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4047         goto cleanup;
4048     }
4049 
4050     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4051     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4052     uSource = (const UChar*)in;
4053     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4054     cTarget = cBuf;
4055     cTargetLimit = cBuf +uBufSize*5;
4056     uTarget = uBuf;
4057     uTargetLimit = uBuf+ uBufSize*5;
4058     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4059     if(U_FAILURE(errorCode)){
4060         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4061         goto cleanup;
4062     }
4063     cSource = cBuf;
4064     cSourceLimit =cTarget;
4065     test =uBuf;
4066     myOff=offsets;
4067     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4068     if(U_FAILURE(errorCode)){
4069         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4070         goto cleanup;
4071     }
4072     uSource = (const UChar*)in;
4073     while(uSource<uSourceLimit){
4074         if(*test!=*uSource){
4075             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4076         }
4077         uSource++;
4078         test++;
4079     }
4080     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4081     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4082     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4083     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4084     TestJitterbug930("csISO2022KR");
4085     /*Test for the condition where there is an invalid character*/
4086     ucnv_reset(cnv);
4087     {
4088         static const uint8_t source2[]={0x1b,0x24,0x053};
4089         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4090         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4091     }
4092 
4093 cleanup:
4094     ucnv_close(cnv);
4095     free(uBuf);
4096     free(cBuf);
4097     free(offsets);
4098 }
4099 
4100 static void
TestISO_2022_KR_1null4101 TestISO_2022_KR_1() {
4102     /* test input */
4103     static const uint16_t in[]={
4104                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4105                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4106                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4107                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4108                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4109                    ,0x53E3,0x53E4,0x000A,0x000D};
4110     const UChar* uSource;
4111     const UChar* uSourceLimit;
4112     const char* cSource;
4113     const char* cSourceLimit;
4114     UChar *uTargetLimit =NULL;
4115     UChar *uTarget;
4116     char *cTarget;
4117     const char *cTargetLimit;
4118     char *cBuf = NULL;
4119     UChar *uBuf = NULL;
4120     UChar *test;
4121     int32_t uBufSize = 120;
4122     UErrorCode errorCode=U_ZERO_ERROR;
4123     UConverter *cnv = NULL;
4124     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4125     int32_t* myOff= offsets;
4126     cnv=ucnv_open("ibm-25546", &errorCode);
4127     if(U_FAILURE(errorCode)) {
4128         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4129         goto cleanup;
4130     }
4131 
4132     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4133     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4134     uSource = (const UChar*)in;
4135     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4136     cTarget = cBuf;
4137     cTargetLimit = cBuf +uBufSize*5;
4138     uTarget = uBuf;
4139     uTargetLimit = uBuf+ uBufSize*5;
4140     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4141     if(U_FAILURE(errorCode)){
4142         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4143         goto cleanup;
4144     }
4145     cSource = cBuf;
4146     cSourceLimit =cTarget;
4147     test =uBuf;
4148     myOff=offsets;
4149     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4150     if(U_FAILURE(errorCode)){
4151         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4152         goto cleanup;
4153     }
4154     uSource = (const UChar*)in;
4155     while(uSource<uSourceLimit){
4156         if(*test!=*uSource){
4157             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4158         }
4159         uSource++;
4160         test++;
4161     }
4162     ucnv_reset(cnv);
4163     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4164     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4165     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4166     ucnv_reset(cnv);
4167     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4168         /*Test for the condition where there is an invalid character*/
4169     ucnv_reset(cnv);
4170     {
4171         static const uint8_t source2[]={0x1b,0x24,0x053};
4172         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4173         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4174     }
4175 
4176 cleanup:
4177     ucnv_close(cnv);
4178     free(uBuf);
4179     free(cBuf);
4180     free(offsets);
4181 }
4182 
TestJitterbug2411null4183 static void TestJitterbug2411(){
4184     static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4185                          "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4186     UConverter* kr=NULL, *kr1=NULL;
4187     UErrorCode errorCode = U_ZERO_ERROR;
4188     UChar tgt[100]={'\0'};
4189     UChar* target = tgt;
4190     UChar* targetLimit = target+100;
4191     kr=ucnv_open("iso-2022-kr", &errorCode);
4192     if(U_FAILURE(errorCode)) {
4193         log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4194         return;
4195     }
4196     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,true,&errorCode);
4197     if(U_FAILURE(errorCode)) {
4198         log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4199         return;
4200     }
4201     kr1 = ucnv_open("ibm-25546", &errorCode);
4202     if(U_FAILURE(errorCode)) {
4203         log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4204         return;
4205     }
4206     target = tgt;
4207     targetLimit = target+100;
4208     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,true,&errorCode);
4209 
4210     if(U_FAILURE(errorCode)) {
4211         log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4212         return;
4213     }
4214 
4215     ucnv_close(kr);
4216     ucnv_close(kr1);
4217 
4218 }
4219 
4220 static void
TestJISnull4221 TestJIS(){
4222     /* From Unicode moved to testdata/conversion.txt */
4223     /*To Unicode*/
4224     {
4225         static const uint8_t sampleTextJIS[] = {
4226             0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4227             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4228             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4229         };
4230         static const uint16_t expectedISO2022JIS[] = {
4231             0x0041, 0x0042,
4232             0xFF81, 0xFF82,
4233             0x3000
4234         };
4235         static const int32_t  toISO2022JISOffs[]={
4236             3,4,
4237             8,9,
4238             16
4239         };
4240 
4241         static const uint8_t sampleTextJIS7[] = {
4242             0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4243             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4244             0x1b,0x24,0x42,0x21,0x21,
4245             0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4246             0x21,0x22,
4247             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4248         };
4249         static const uint16_t expectedISO2022JIS7[] = {
4250             0x0041, 0x0042,
4251             0xFF81, 0xFF82,
4252             0x3000,
4253             0xFF81, 0xFF82,
4254             0x3001,
4255             0x3000
4256         };
4257         static const int32_t  toISO2022JIS7Offs[]={
4258             3,4,
4259             8,9,
4260             13,16,
4261             17,
4262             19,27
4263         };
4264         static const uint8_t sampleTextJIS8[] = {
4265             0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4266             0xa1,0xc8,0xd9,/*Katakana Set*/
4267             0x1b,0x28,0x42,
4268             0x41,0x42,
4269             0xb1,0xc3, /*Katakana Set*/
4270             0x1b,0x24,0x42,0x21,0x21
4271         };
4272         static const uint16_t expectedISO2022JIS8[] = {
4273             0x0041, 0x0042,
4274             0xff61, 0xff88, 0xff99,
4275             0x0041, 0x0042,
4276             0xff71, 0xff83,
4277             0x3000
4278         };
4279         static const int32_t  toISO2022JIS8Offs[]={
4280             3, 4,  5,  6,
4281             7, 11, 12, 13,
4282             14, 18,
4283         };
4284 
4285         testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4286             UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,true);
4287         testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4288             UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,true);
4289         testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4290             UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,true);
4291     }
4292 
4293 }
4294 
4295 
4296 #if 0
4297  ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4298 
4299 static void TestJitterbug915(){
4300 /* tests for roundtripping of the below sequence
4301 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4302 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4303 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4304 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4305 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4306 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4307 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4308 */
4309     static const char cSource[]={
4310         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4311         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4312         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4313         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4314         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4315         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4316         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4317         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4318         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4319         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4320         0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4321         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4322         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4323         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4324         0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4325         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4326         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4327         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4328         0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4329         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4330         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4331         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4332         0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4333         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4334         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4335         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4336         0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4337         0x37, 0x20, 0x2A, 0x2F
4338     };
4339     UChar uTarget[500]={'\0'};
4340     UChar* utarget=uTarget;
4341     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4342 
4343     char cTarget[500]={'\0'};
4344     char* ctarget=cTarget;
4345     char* ctargetLimit=cTarget+sizeof(cTarget);
4346     const char* csource=cSource;
4347     const char* tempSrc = cSource;
4348     UErrorCode err=U_ZERO_ERROR;
4349 
4350     UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4351     if(U_FAILURE(err)) {
4352         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4353         return;
4354     }
4355     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,true,&err);
4356     if(U_FAILURE(err)) {
4357         log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4358         return;
4359     }
4360     utargetLimit=utarget;
4361     utarget = uTarget;
4362     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,true,&err);
4363     if(U_FAILURE(err)) {
4364         log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4365         return;
4366     }
4367     ctargetLimit=ctarget;
4368     ctarget =cTarget;
4369     while(ctarget<ctargetLimit){
4370         if(*ctarget != *tempSrc){
4371             log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4372         }
4373         ++ctarget;
4374         ++tempSrc;
4375     }
4376 
4377     ucnv_close(conv);
4378 }
4379 
4380 static void
4381 TestISO_2022_CN_EXT() {
4382     /* test input */
4383     static const uint16_t in[]={
4384                 /* test Non-BMP code points */
4385          0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4386          0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4387          0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4388          0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4389          0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4390          0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4391          0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4392          0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4393          0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4394          0xD869, 0xDED5,
4395 
4396          0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4397          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4398          0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4399          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4400          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4401          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4402          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4403          0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4404          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4405          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4406          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4407          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4408          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4409          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4410          0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4411          0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4412          0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4413          0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4414 
4415          0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4416 
4417       };
4418 
4419     const UChar* uSource;
4420     const UChar* uSourceLimit;
4421     const char* cSource;
4422     const char* cSourceLimit;
4423     UChar *uTargetLimit =NULL;
4424     UChar *uTarget;
4425     char *cTarget;
4426     const char *cTargetLimit;
4427     char *cBuf = NULL;
4428     UChar *uBuf = NULL;
4429     UChar *test;
4430     int32_t uBufSize = 180;
4431     UErrorCode errorCode=U_ZERO_ERROR;
4432     UConverter *cnv = NULL;
4433     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4434     int32_t* myOff= offsets;
4435     cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4436     if(U_FAILURE(errorCode)) {
4437         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4438         goto cleanup;
4439     }
4440 
4441     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4442     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4443     uSource = (const UChar*)in;
4444     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4445     cTarget = cBuf;
4446     cTargetLimit = cBuf +uBufSize*5;
4447     uTarget = uBuf;
4448     uTargetLimit = uBuf+ uBufSize*5;
4449     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4450     if(U_FAILURE(errorCode)){
4451         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4452         goto cleanup;
4453     }
4454     cSource = cBuf;
4455     cSourceLimit =cTarget;
4456     test =uBuf;
4457     myOff=offsets;
4458     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4459     if(U_FAILURE(errorCode)){
4460         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4461         goto cleanup;
4462     }
4463     uSource = (const UChar*)in;
4464     while(uSource<uSourceLimit){
4465         if(*test!=*uSource){
4466             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4467         }
4468         else{
4469             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4470         }
4471         uSource++;
4472         test++;
4473     }
4474     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4475     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4476     /*Test for the condition where there is an invalid character*/
4477     ucnv_reset(cnv);
4478     {
4479         static const uint8_t source2[]={0x0e,0x24,0x053};
4480         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4481     }
4482 
4483 cleanup:
4484     ucnv_close(cnv);
4485     free(uBuf);
4486     free(cBuf);
4487     free(offsets);
4488 }
4489 #endif
4490 
4491 static void
TestISO_2022_CNnull4492 TestISO_2022_CN() {
4493     /* test input */
4494     static const uint16_t in[]={
4495          /* jitterbug 951 */
4496          0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4497          0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4498          0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4499          0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4500          0x0020, 0x0045, 0x004e, 0x0044,
4501          /**/
4502          0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4503          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4504          0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4505          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4506          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4507          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4508          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4509          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4510          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4511          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4512          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4513          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4514          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4515          0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4516          0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4517          0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4518          0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4519 
4520       };
4521     const UChar* uSource;
4522     const UChar* uSourceLimit;
4523     const char* cSource;
4524     const char* cSourceLimit;
4525     UChar *uTargetLimit =NULL;
4526     UChar *uTarget;
4527     char *cTarget;
4528     const char *cTargetLimit;
4529     char *cBuf = NULL;
4530     UChar *uBuf = NULL;
4531     UChar *test;
4532     int32_t uBufSize = 180;
4533     UErrorCode errorCode=U_ZERO_ERROR;
4534     UConverter *cnv = NULL;
4535     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4536     int32_t* myOff= offsets;
4537     cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4538     if(U_FAILURE(errorCode)) {
4539         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4540         goto cleanup;
4541     }
4542 
4543     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4544     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4545     uSource = (const UChar*)in;
4546     uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4547     cTarget = cBuf;
4548     cTargetLimit = cBuf +uBufSize*5;
4549     uTarget = uBuf;
4550     uTargetLimit = uBuf+ uBufSize*5;
4551     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4552     if(U_FAILURE(errorCode)){
4553         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4554         goto cleanup;
4555     }
4556     cSource = cBuf;
4557     cSourceLimit =cTarget;
4558     test =uBuf;
4559     myOff=offsets;
4560     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4561     if(U_FAILURE(errorCode)){
4562         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4563         goto cleanup;
4564     }
4565     uSource = (const UChar*)in;
4566     while(uSource<uSourceLimit){
4567         if(*test!=*uSource){
4568             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4569         }
4570         else{
4571             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4572         }
4573         uSource++;
4574         test++;
4575     }
4576     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4577     TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4578     TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4579     TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4580     TestJitterbug930("csISO2022CN");
4581     /*Test for the condition where there is an invalid character*/
4582     ucnv_reset(cnv);
4583     {
4584         static const uint8_t source2[]={0x0e,0x24,0x053};
4585         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4586     }
4587 
4588 cleanup:
4589     ucnv_close(cnv);
4590     free(uBuf);
4591     free(cBuf);
4592     free(offsets);
4593 }
4594 
4595 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4596 typedef struct {
4597     const char *    converterName;
4598     const char *    inputText;
4599     int             inputTextLength;
4600 } EmptySegmentTest;
4601 
4602 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, int32_t length, UConverterCallbackReason reason, UErrorCode * err )4603 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4604                                              int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4605     // suppress compiler warnings about unused variables
4606     (void)context;
4607     (void)codeUnits;
4608     (void)length;
4609     if (reason > UCNV_IRREGULAR) {
4610         return;
4611     }
4612     if (reason != UCNV_IRREGULAR) {
4613         log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4614     }
4615     /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4616     *err = U_ZERO_ERROR;
4617     ucnv_cbToUWriteSub(toArgs,0,err);
4618 }
4619 
4620 enum { kEmptySegmentToUCharsMax = 64 };
TestJitterbug6175(void)4621 static void TestJitterbug6175(void) {
4622     static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4623     static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4624     static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4625     static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4626     static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4627     static const EmptySegmentTest emptySegmentTests[] = {
4628         /* converterName inputText    inputTextLength */
4629         { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4630         { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4631         { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4632         { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4633         { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
4634         /* terminator: */
4635         { NULL,          NULL,        0,                  }
4636     };
4637     const EmptySegmentTest * testPtr;
4638     for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4639         UErrorCode   err = U_ZERO_ERROR;
4640         UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4641         if (U_FAILURE(err)) {
4642             log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4643             return;
4644         }
4645         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4646         if (U_FAILURE(err)) {
4647             log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4648             ucnv_close(cnv);
4649             return;
4650         }
4651         {
4652             UChar         toUChars[kEmptySegmentToUCharsMax];
4653             UChar *       toUCharsPtr = toUChars;
4654             const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4655             const char *  inCharsPtr = testPtr->inputText;
4656             const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4657             ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, true, &err);
4658         }
4659         ucnv_close(cnv);
4660     }
4661 }
4662 
4663 static void
TestEBCDIC_STATEFULnull4664 TestEBCDIC_STATEFUL() {
4665     /* test input */
4666     static const uint8_t in[]={
4667         0x61,
4668         0x1a,
4669         0x0f, 0x4b,
4670         0x42,
4671         0x40,
4672         0x36,
4673     };
4674 
4675     /* expected test results */
4676     static const int32_t results[]={
4677         /* number of bytes read, code point */
4678         1, 0x002f,
4679         1, 0x0092,
4680         2, 0x002e,
4681         1, 0xff62,
4682         1, 0x0020,
4683         1, 0x0096,
4684 
4685     };
4686     static const uint8_t in2[]={
4687         0x0f,
4688         0xa1,
4689         0x01
4690     };
4691 
4692     /* expected test results */
4693     static const int32_t results2[]={
4694         /* number of bytes read, code point */
4695         2, 0x203E,
4696         1, 0x0001,
4697     };
4698 
4699     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4700     UErrorCode errorCode=U_ZERO_ERROR;
4701     UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4702     if(U_FAILURE(errorCode)) {
4703         log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4704         return;
4705     }
4706     TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4707     ucnv_reset(cnv);
4708      /* Test the condition when source >= sourceLimit */
4709     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4710     ucnv_reset(cnv);
4711     /*Test for the condition where source > sourcelimit after consuming the shift character */
4712     {
4713         static const uint8_t source1[]={0x0f};
4714         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4715     }
4716     /*Test for the condition where there is an invalid character*/
4717     ucnv_reset(cnv);
4718     {
4719         static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4720         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4721     }
4722     ucnv_reset(cnv);
4723     source=(const char*)in2;
4724     limit=(const char*)in2+sizeof(in2);
4725     TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4726     ucnv_close(cnv);
4727 
4728 }
4729 
4730 static void
TestGB18030null4731 TestGB18030() {
4732     /* test input */
4733     static const uint8_t in[]={
4734         0x24,
4735         0x7f,
4736         0x81, 0x30, 0x81, 0x30,
4737         0xa8, 0xbf,
4738         0xa2, 0xe3,
4739         0xd2, 0xbb,
4740         0x82, 0x35, 0x8f, 0x33,
4741         0x84, 0x31, 0xa4, 0x39,
4742         0x90, 0x30, 0x81, 0x30,
4743         0xe3, 0x32, 0x9a, 0x35
4744 #if 0
4745         /*
4746          * Feature removed   markus 2000-oct-26
4747          * Only some codepages must match surrogate pairs into supplementary code points -
4748          * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4749          * GB 18030 provides direct encodings for supplementary code points, therefore
4750          * it must not combine two single-encoded surrogates into one code point.
4751          */
4752         0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4753 #endif
4754     };
4755 
4756     /* expected test results */
4757     static const int32_t results[]={
4758         /* number of bytes read, code point */
4759         1, 0x24,
4760         1, 0x7f,
4761         4, 0x80,
4762         2, 0x1f9,
4763         2, 0x20ac,
4764         2, 0x4e00,
4765         4, 0x9fa6,
4766         4, 0xffff,
4767         4, 0x10000,
4768         4, 0x10ffff
4769 #if 0
4770         /* Feature removed. See comment above. */
4771         8, 0x10000
4772 #endif
4773     };
4774 
4775 /*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4776     UErrorCode errorCode=U_ZERO_ERROR;
4777     UConverter *cnv=ucnv_open("gb18030", &errorCode);
4778     if(U_FAILURE(errorCode)) {
4779         log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4780         return;
4781     }
4782     TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4783     ucnv_close(cnv);
4784 }
4785 
4786 static void
TestLMBCSnull4787 TestLMBCS() {
4788     /* LMBCS-1 string */
4789     static const uint8_t pszLMBCS[]={
4790         0x61,
4791         0x01, 0x29,
4792         0x81,
4793         0xA0,
4794         0x0F, 0x27,
4795         0x0F, 0x91,
4796         0x14, 0x0a, 0x74,
4797         0x14, 0xF6, 0x02,
4798         0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4799         0x10, 0x88, 0xA0,
4800     };
4801 
4802     /* Unicode UChar32 equivalents */
4803     static const UChar32 pszUnicode32[]={
4804         /* code point */
4805         0x00000061,
4806         0x00002013,
4807         0x000000FC,
4808         0x000000E1,
4809         0x00000007,
4810         0x00000091,
4811         0x00000a74,
4812         0x00000200,
4813         0x00023456, /* code point for surrogate pair */
4814         0x00005516
4815     };
4816 
4817 /* Unicode UChar equivalents */
4818     static const UChar pszUnicode[]={
4819         /* code point */
4820         0x0061,
4821         0x2013,
4822         0x00FC,
4823         0x00E1,
4824         0x0007,
4825         0x0091,
4826         0x0a74,
4827         0x0200,
4828         0xD84D, /* low surrogate */
4829         0xDC56, /* high surrogate */
4830         0x5516
4831     };
4832 
4833 /* expected test results */
4834     static const int offsets32[]={
4835         /* number of bytes read, code point */
4836         0,
4837         1,
4838         3,
4839         4,
4840         5,
4841         7,
4842         9,
4843         12,
4844         15,
4845         21,
4846         24
4847     };
4848 
4849 /* expected test results */
4850     static const int offsets[]={
4851         /* number of bytes read, code point */
4852         0,
4853         1,
4854         3,
4855         4,
4856         5,
4857         7,
4858         9,
4859         12,
4860         15,
4861         18,
4862         21,
4863         24
4864     };
4865 
4866 
4867     UConverter *cnv;
4868 
4869 #define NAME_LMBCS_1 "LMBCS-1"
4870 #define NAME_LMBCS_2 "LMBCS-2"
4871 
4872 
4873    /* Some basic open/close/property tests on some LMBCS converters */
4874     {
4875 
4876       char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4877       char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4878       char get_subchars [1];
4879       const char * get_name;
4880       UConverter *cnv1;
4881       UConverter *cnv2;
4882 
4883       int8_t len = sizeof(get_subchars);
4884 
4885       UErrorCode errorCode=U_ZERO_ERROR;
4886 
4887       /* Open */
4888       cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4889       if(U_FAILURE(errorCode)) {
4890          log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4891          return;
4892       }
4893       cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4894       if(U_FAILURE(errorCode)) {
4895          log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4896          return;
4897       }
4898 
4899       /* Name */
4900       get_name = ucnv_getName (cnv1, &errorCode);
4901       if (strcmp(NAME_LMBCS_1,get_name)){
4902          log_err("Unexpected converter name: %s\n", get_name);
4903       }
4904       get_name = ucnv_getName (cnv2, &errorCode);
4905       if (strcmp(NAME_LMBCS_2,get_name)){
4906          log_err("Unexpected converter name: %s\n", get_name);
4907       }
4908 
4909       /* substitution chars */
4910       ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4911       if(U_FAILURE(errorCode)) {
4912          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4913       }
4914       if (len!=1){
4915          log_err("Unexpected length of sub chars\n");
4916       }
4917       if (get_subchars[0] != expected_subchars[0]){
4918            log_err("Unexpected value of sub chars\n");
4919       }
4920       ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4921       if(U_FAILURE(errorCode)) {
4922          log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4923       }
4924       ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4925       if(U_FAILURE(errorCode)) {
4926          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4927       }
4928       if (len!=1){
4929          log_err("Unexpected length of sub chars\n");
4930       }
4931       if (get_subchars[0] != new_subchars[0]){
4932            log_err("Unexpected value of sub chars\n");
4933       }
4934       ucnv_close(cnv1);
4935       ucnv_close(cnv2);
4936 
4937     }
4938 
4939     /* LMBCS to Unicode - offsets */
4940     {
4941        UErrorCode errorCode=U_ZERO_ERROR;
4942 
4943        const char * pSource = (const char *)pszLMBCS;
4944        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4945 
4946        UChar Out [sizeof(pszUnicode) + 1];
4947        UChar * pOut = Out;
4948        UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
4949 
4950        int32_t off [sizeof(offsets)];
4951 
4952       /* last 'offset' in expected results is just the final size.
4953          (Makes other tests easier). Compensate here: */
4954 
4955        off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
4956 
4957 
4958 
4959       cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4960       if(U_FAILURE(errorCode)) {
4961            log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4962            return;
4963       }
4964 
4965 
4966 
4967       ucnv_toUnicode (cnv,
4968                       &pOut,
4969                       OutLimit,
4970                       &pSource,
4971                       sourceLimit,
4972                       off,
4973                       true,
4974                       &errorCode);
4975 
4976 
4977        if (memcmp(off,offsets,sizeof(offsets)))
4978        {
4979          log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4980        }
4981        if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4982        {
4983          log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4984        }
4985        ucnv_close(cnv);
4986     }
4987     {
4988    /* LMBCS to Unicode - getNextUChar */
4989       const char * sourceStart;
4990       const char *source=(const char *)pszLMBCS;
4991       const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4992       const UChar32 *results= pszUnicode32;
4993       const int *off = offsets32;
4994 
4995       UErrorCode errorCode=U_ZERO_ERROR;
4996       UChar32 uniChar;
4997 
4998       cnv=ucnv_open("LMBCS-1", &errorCode);
4999       if(U_FAILURE(errorCode)) {
5000            log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5001            return;
5002       }
5003       else
5004       {
5005 
5006          while(source<limit) {
5007             sourceStart=source;
5008             uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
5009             if(U_FAILURE(errorCode)) {
5010                   log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
5011                   break;
5012             } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
5013                log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
5014                    uniChar, (source-sourceStart), *results, *off);
5015                break;
5016             }
5017             results++;
5018             off++;
5019          }
5020        }
5021        ucnv_close(cnv);
5022     }
5023     { /* test locale & optimization group operations: Unicode to LMBCS */
5024 
5025       UErrorCode errorCode=U_ZERO_ERROR;
5026       UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
5027       UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
5028       UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
5029       UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5030       const UChar * pUniOut = uniString;
5031       UChar * pUniIn = uniString;
5032       uint8_t lmbcsString [4];
5033       const char * pLMBCSOut = (const char *)lmbcsString;
5034       char * pLMBCSIn = (char *)lmbcsString;
5035 
5036       /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5037       ucnv_fromUnicode (cnv16he,
5038                         &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
5039                         &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5040                         NULL, 1, &errorCode);
5041 
5042       if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5043       {
5044          log_err("LMBCS-16,locale=he gives unexpected translation\n");
5045       }
5046 
5047       pLMBCSIn= (char *)lmbcsString;
5048       pUniOut = uniString;
5049       ucnv_fromUnicode (cnv01us,
5050                         &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
5051                         &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5052                         NULL, 1, &errorCode);
5053 
5054       if (lmbcsString[0] != 0x9F)
5055       {
5056          log_err("LMBCS-1,locale=US gives unexpected translation\n");
5057       }
5058 
5059       /* single byte char from mbcs char set */
5060       lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
5061       pLMBCSOut = (const char *)lmbcsString;
5062       pUniIn = uniString;
5063       ucnv_toUnicode (cnv16jp,
5064                         &pUniIn, pUniIn + 1,
5065                         &pLMBCSOut, (pLMBCSOut + 1),
5066                         NULL, 1, &errorCode);
5067       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5068       {
5069            log_err("Unexpected results from LMBCS-16 single byte char\n");
5070       }
5071       /* convert to group 1: should be 3 bytes */
5072       pLMBCSIn = (char *)lmbcsString;
5073       pUniOut = uniString;
5074       ucnv_fromUnicode (cnv01us,
5075                         &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5076                         &pUniOut, pUniOut + 1,
5077                         NULL, 1, &errorCode);
5078       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5079          || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5080       {
5081            log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5082       }
5083       pLMBCSOut = (const char *)lmbcsString;
5084       pUniIn = uniString;
5085       ucnv_toUnicode (cnv01us,
5086                         &pUniIn, pUniIn + 1,
5087                         &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5088                         NULL, 1, &errorCode);
5089       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5090       {
5091            log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5092       }
5093       pLMBCSIn = (char *)lmbcsString;
5094       pUniOut = uniString;
5095       ucnv_fromUnicode (cnv16jp,
5096                         &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5097                         &pUniOut, pUniOut + 1,
5098                         NULL, 1, &errorCode);
5099       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5100       {
5101            log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5102       }
5103       ucnv_close(cnv16he);
5104       ucnv_close(cnv16jp);
5105       ucnv_close(cnv01us);
5106     }
5107     {
5108        /* Small source buffer testing, LMBCS -> Unicode */
5109 
5110        UErrorCode errorCode=U_ZERO_ERROR;
5111 
5112        const char * pSource = (const char *)pszLMBCS;
5113        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5114        int codepointCount = 0;
5115 
5116        UChar Out [sizeof(pszUnicode) + 1];
5117        UChar * pOut = Out;
5118        UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
5119 
5120 
5121        cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5122        if(U_FAILURE(errorCode)) {
5123            log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5124            return;
5125        }
5126 
5127 
5128        while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5129        {
5130            ucnv_toUnicode (cnv,
5131                &pOut,
5132                OutLimit,
5133                &pSource,
5134                (pSource+1), /* claim that this is a 1- byte buffer */
5135                NULL,
5136                false,    /* false means there might be more chars in the next buffer */
5137                &errorCode);
5138 
5139            if (U_SUCCESS (errorCode))
5140            {
5141                if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5142                {
5143                    /* we are on to the next code point: check value */
5144 
5145                    if (Out[0] != pszUnicode[codepointCount]){
5146                        log_err("LMBCS->Uni result %lx should have been %lx \n",
5147                            Out[0], pszUnicode[codepointCount]);
5148                    }
5149 
5150                    pOut = Out; /* reset for accumulating next code point */
5151                    codepointCount++;
5152                }
5153            }
5154            else
5155            {
5156                log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5157            }
5158        }
5159        {
5160          /* limits & surrogate error testing */
5161          char LIn [sizeof(pszLMBCS)];
5162          const char * pLIn = LIn;
5163 
5164          char LOut [sizeof(pszLMBCS)];
5165          char * pLOut = LOut;
5166 
5167          UChar UOut [sizeof(pszUnicode)];
5168          UChar * pUOut = UOut;
5169 
5170          UChar UIn [sizeof(pszUnicode)];
5171          const UChar * pUIn = UIn;
5172 
5173          int32_t off [sizeof(offsets)];
5174          UChar32 uniChar;
5175 
5176          errorCode=U_ZERO_ERROR;
5177 
5178          /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5179          pUIn++;
5180          ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, false, &errorCode);
5181          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5182          {
5183             log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5184          }
5185          pUIn--;
5186 
5187          errorCode=U_ZERO_ERROR;
5188          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,false, &errorCode);
5189          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5190          {
5191             log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5192          }
5193          errorCode=U_ZERO_ERROR;
5194 
5195          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5196          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5197          {
5198             log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5199          }
5200          errorCode=U_ZERO_ERROR;
5201 
5202          /* 0 byte source request - no error, no pointer movement */
5203          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,false, &errorCode);
5204          ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,false, &errorCode);
5205          if(U_FAILURE(errorCode)) {
5206             log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5207          }
5208          if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5209          {
5210               log_err("Unexpected pointer move in 0 byte source request \n");
5211          }
5212          /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5213          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5214          if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5215          {
5216             log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5217          }
5218          if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5219          {
5220             log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5221          }
5222          errorCode = U_ZERO_ERROR;
5223 
5224          /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5225 
5226          pUIn = pszUnicode;
5227          ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,false, &errorCode);
5228          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5229          {
5230             log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5231          }
5232 
5233          errorCode = U_ZERO_ERROR;
5234 
5235          pLIn = (const char *)pszLMBCS;
5236          ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,false, &errorCode);
5237          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5238          {
5239             log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5240          }
5241 
5242          /* unpaired or chopped LMBCS surrogates */
5243 
5244          /* OK high surrogate, Low surrogate is chopped */
5245          LIn [0] = (char)0x14;
5246          LIn [1] = (char)0xD8;
5247          LIn [2] = (char)0x01;
5248          LIn [3] = (char)0x14;
5249          LIn [4] = (char)0xDC;
5250          pLIn = LIn;
5251          errorCode = U_ZERO_ERROR;
5252          pUOut = UOut;
5253 
5254          ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5255          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,true, &errorCode);
5256          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5257          {
5258             log_err("Unexpected results on chopped low surrogate\n");
5259          }
5260 
5261          /* chopped at surrogate boundary */
5262          LIn [0] = (char)0x14;
5263          LIn [1] = (char)0xD8;
5264          LIn [2] = (char)0x01;
5265          pLIn = LIn;
5266          errorCode = U_ZERO_ERROR;
5267          pUOut = UOut;
5268 
5269          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,true, &errorCode);
5270          if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5271          {
5272             log_err("Unexpected results on chopped at surrogate boundary \n");
5273          }
5274 
5275          /* unpaired surrogate plus valid Unichar */
5276          LIn [0] = (char)0x14;
5277          LIn [1] = (char)0xD8;
5278          LIn [2] = (char)0x01;
5279          LIn [3] = (char)0x14;
5280          LIn [4] = (char)0xC9;
5281          LIn [5] = (char)0xD0;
5282          pLIn = LIn;
5283          errorCode = U_ZERO_ERROR;
5284          pUOut = UOut;
5285 
5286          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,true, &errorCode);
5287          if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5288          {
5289             log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5290          }
5291 
5292       /* unpaired surrogate plus chopped Unichar */
5293          LIn [0] = (char)0x14;
5294          LIn [1] = (char)0xD8;
5295          LIn [2] = (char)0x01;
5296          LIn [3] = (char)0x14;
5297          LIn [4] = (char)0xC9;
5298 
5299          pLIn = LIn;
5300          errorCode = U_ZERO_ERROR;
5301          pUOut = UOut;
5302 
5303          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,true, &errorCode);
5304          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5305          {
5306             log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5307          }
5308 
5309          /* unpaired surrogate plus valid non-Unichar */
5310          LIn [0] = (char)0x14;
5311          LIn [1] = (char)0xD8;
5312          LIn [2] = (char)0x01;
5313          LIn [3] = (char)0x0F;
5314          LIn [4] = (char)0x3B;
5315 
5316          pLIn = LIn;
5317          errorCode = U_ZERO_ERROR;
5318          pUOut = UOut;
5319 
5320          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,true, &errorCode);
5321          if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5322          {
5323             log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5324          }
5325 
5326          /* unpaired surrogate plus chopped non-Unichar */
5327          LIn [0] = (char)0x14;
5328          LIn [1] = (char)0xD8;
5329          LIn [2] = (char)0x01;
5330          LIn [3] = (char)0x0F;
5331 
5332          pLIn = LIn;
5333          errorCode = U_ZERO_ERROR;
5334          pUOut = UOut;
5335 
5336          ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,true, &errorCode);
5337 
5338          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5339          {
5340             log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5341          }
5342        }
5343     }
5344    ucnv_close(cnv);  /* final cleanup */
5345 }
5346 
5347 
TestJitterbug255null5348 static void TestJitterbug255()
5349 {
5350     static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5351     const char *testBuffer = (const char *)testBytes;
5352     const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5353     UErrorCode status = U_ZERO_ERROR;
5354     /*UChar32 result;*/
5355     UConverter *cnv = 0;
5356 
5357     cnv = ucnv_open("shift-jis", &status);
5358     if (U_FAILURE(status) || cnv == 0) {
5359         log_data_err("Failed to open the converter for SJIS.\n");
5360                 return;
5361     }
5362     while (testBuffer != testEnd)
5363     {
5364         /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5365         if (U_FAILURE(status))
5366         {
5367             log_err("Failed to convert the next UChar for SJIS.\n");
5368             break;
5369         }
5370     }
5371     ucnv_close(cnv);
5372 }
5373 
TestEBCDICUS4XMLnull5374 static void TestEBCDICUS4XML()
5375 {
5376     UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5377     static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5378     static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5379     static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5380     char target_x[] = {0x00, 0x00, 0x00, 0x00};
5381     UChar *unicodes = unicodes_x;
5382     const UChar *toUnicodeMaps = toUnicodeMaps_x;
5383     char *target = target_x;
5384     const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5385     UErrorCode status = U_ZERO_ERROR;
5386     UConverter *cnv = 0;
5387 
5388     cnv = ucnv_open("ebcdic-xml-us", &status);
5389     if (U_FAILURE(status) || cnv == 0) {
5390         log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5391         return;
5392     }
5393     ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, true, &status);
5394     if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5395         log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5396             u_errorName(status));
5397         printUSeqErr(unicodes_x, 3);
5398         printUSeqErr(toUnicodeMaps, 3);
5399     }
5400     status = U_ZERO_ERROR;
5401     ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, true, &status);
5402     if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5403         log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5404             u_errorName(status));
5405         printSeqErr((const unsigned char*)target_x, 3);
5406         printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5407     }
5408     ucnv_close(cnv);
5409 }
5410 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5411 
5412 #if !UCONFIG_NO_COLLATION
5413 
TestJitterbug981null5414 static void TestJitterbug981(){
5415     const UChar* rules;
5416     int32_t rules_length, target_cap, bytes_needed, buff_size;
5417     UErrorCode status = U_ZERO_ERROR;
5418     UConverter *utf8cnv;
5419     UCollator* myCollator;
5420     char *buff;
5421     int numNeeded=0;
5422     utf8cnv = ucnv_open ("utf8", &status);
5423     if(U_FAILURE(status)){
5424         log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5425         return;
5426     }
5427     myCollator = ucol_open("zh", &status);
5428     if(U_FAILURE(status)){
5429         log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5430         ucnv_close(utf8cnv);
5431         return;
5432     }
5433 
5434     rules = ucol_getRules(myCollator, &rules_length);
5435     if(rules_length == 0) {
5436         log_data_err("missing zh tailoring rule string\n");
5437         ucol_close(myCollator);
5438         ucnv_close(utf8cnv);
5439         return;
5440     }
5441     buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5442     buff = malloc(buff_size);
5443 
5444     target_cap = 0;
5445     do {
5446         ucnv_reset(utf8cnv);
5447         status = U_ZERO_ERROR;
5448         if(target_cap >= buff_size) {
5449             log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5450             break;
5451         }
5452         bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5453             rules, rules_length, &status);
5454         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5455         if(numNeeded!=0 && numNeeded!= bytes_needed){
5456             log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5457             break;
5458         }
5459         numNeeded = bytes_needed;
5460     } while (status == U_BUFFER_OVERFLOW_ERROR);
5461     ucol_close(myCollator);
5462     ucnv_close(utf8cnv);
5463     free(buff);
5464 }
5465 
5466 #endif
5467 
5468 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug1293null5469 static void TestJitterbug1293(){
5470     static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5471     char target[256];
5472     UErrorCode status = U_ZERO_ERROR;
5473     UConverter* conv=NULL;
5474     int32_t target_cap, bytes_needed, numNeeded = 0;
5475     conv = ucnv_open("shift-jis",&status);
5476     if(U_FAILURE(status)){
5477       log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5478       return;
5479     }
5480 
5481     do{
5482         target_cap =0;
5483         bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5484         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5485         if(numNeeded!=0 && numNeeded!= bytes_needed){
5486           log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5487         }
5488         numNeeded = bytes_needed;
5489     } while (status == U_BUFFER_OVERFLOW_ERROR);
5490     if(U_FAILURE(status)){
5491       log_err("An error occurred in ucnv_fromUChars. Error: %s", u_errorName(status));
5492       return;
5493     }
5494     ucnv_close(conv);
5495 }
5496 #endif
5497 
TestJB5275_1null5498 static void TestJB5275_1(){
5499 
5500     static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5501                                 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5502                                 /* Switch script: */
5503                                 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5504                                 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5505                                 "\xEF\x40\x3B\xB3\x0A";
5506     static const UChar expected[] ={
5507             0x003b, 0x0a15, 0x000a, /* Easy characters */
5508             0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5509             0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5510             0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5511             0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5512     };
5513 
5514     UErrorCode status = U_ZERO_ERROR;
5515     UConverter* conv = ucnv_open("iscii-gur", &status);
5516     UChar dest[100] = {'\0'};
5517     UChar* target = dest;
5518     UChar* targetLimit = dest+100;
5519     const char* source = data;
5520     const char* sourceLimit = data+strlen(data);
5521     const UChar* exp = expected;
5522 
5523     if (U_FAILURE(status)) {
5524         log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5525         return;
5526     }
5527 
5528     log_verbose("Testing switching back to default script when new line is encountered.\n");
5529     ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, true, &status);
5530     if(U_FAILURE(status)){
5531         log_err("conversion failed: %s \n", u_errorName(status));
5532     }
5533     targetLimit = target;
5534     target = dest;
5535     printUSeq(target, (int)(targetLimit-target));
5536     while(target<targetLimit){
5537         if(*exp!=*target){
5538             log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5539         }
5540         target++;
5541         exp++;
5542     }
5543     ucnv_close(conv);
5544 }
5545 
TestJB5275null5546 static void TestJB5275(){
5547     static const char* data =
5548     /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A"  unsupported sequence \xEF\x41 */
5549     /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A"  unsupported sequence \xEF\x41  */
5550     /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A"  unsupported sequence \xEF\x41 */
5551         "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A"  /* Gurmukhi test */
5552         "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A"  /* Gujarati test */
5553         "\xEF\x48\x38\xB3\x0A"  /* Kannada test */
5554         "\xEF\x49\x39\xB3\x0A"  /* Malayalam test */
5555         "\xEF\x4A\x3A\xB3\x0A"  /* Gujarati test */
5556         "\xEF\x4B\x3B\xB3\x0A"  /* Punjabi test */
5557         /* "\xEF\x4C\x3C\xB3\x0A"  unsupported sequence \xEF\x41 */;
5558     static const UChar expected[] ={
5559         0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5560         0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A,     /* Gujarati test */
5561         0x0038, 0x0C95, 0x000A, /* Kannada test */
5562         0x0039, 0x0D15, 0x000A, /* Malayalam test */
5563         0x003A, 0x0A95, 0x000A, /* Gujarati test */
5564         0x003B, 0x0A15, 0x000A, /* Punjabi test */
5565     };
5566 
5567     UErrorCode status = U_ZERO_ERROR;
5568     UConverter* conv = ucnv_open("iscii", &status);
5569     UChar dest[100] = {'\0'};
5570     UChar* target = dest;
5571     UChar* targetLimit = dest+100;
5572     const char* source = data;
5573     const char* sourceLimit = data+strlen(data);
5574     const UChar* exp = expected;
5575     ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, true, &status);
5576     if(U_FAILURE(status)){
5577         log_data_err("conversion failed: %s \n", u_errorName(status));
5578     }
5579     targetLimit = target;
5580     target = dest;
5581 
5582     printUSeq(target, (int)(targetLimit-target));
5583 
5584     while(target<targetLimit){
5585         if(*exp!=*target){
5586             log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5587         }
5588         target++;
5589         exp++;
5590     }
5591     ucnv_close(conv);
5592 }
5593 
5594 static void
TestIsFixedWidthnull5595 TestIsFixedWidth() {
5596     UErrorCode status = U_ZERO_ERROR;
5597     UConverter *cnv = NULL;
5598     int32_t i;
5599 
5600     const char *fixedWidth[] = {
5601             "US-ASCII",
5602             "UTF32",
5603             "ibm-5478_P100-1995"
5604     };
5605 
5606     const char *notFixedWidth[] = {
5607             "GB18030",
5608             "UTF8",
5609             "windows-949-2000",
5610             "UTF16"
5611     };
5612 
5613     for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
5614         cnv = ucnv_open(fixedWidth[i], &status);
5615         if (cnv == NULL || U_FAILURE(status)) {
5616             log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5617             continue;
5618         }
5619 
5620         if (!ucnv_isFixedWidth(cnv, &status)) {
5621             log_err("%s is a fixedWidth converter but returned false.\n", fixedWidth[i]);
5622         }
5623         ucnv_close(cnv);
5624     }
5625 
5626     for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
5627         cnv = ucnv_open(notFixedWidth[i], &status);
5628         if (cnv == NULL || U_FAILURE(status)) {
5629             log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5630             continue;
5631         }
5632 
5633         if (ucnv_isFixedWidth(cnv, &status)) {
5634             log_err("%s is NOT a fixedWidth converter but returned true.\n", notFixedWidth[i]);
5635         }
5636         ucnv_close(cnv);
5637     }
5638 }
5639