1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*******************************************************************************
9 *
10 * File nucnvtst.c
11 *
12 * Modification History:
13 * Name Description
14 * Steven R. Loomis 7/8/1999 Adding input buffer test
15 ********************************************************************************
16 */
17 #include <stdbool.h>
18 #include <stdio.h>
19 #include "cstring.h"
20 #include "unicode/uloc.h"
21 #include "unicode/ucnv.h"
22 #include "unicode/ucnv_err.h"
23 #include "unicode/ucnv_cb.h"
24 #include "cintltst.h"
25 #include "unicode/utypes.h"
26 #include "unicode/ustring.h"
27 #include "unicode/ucol.h"
28 #include "unicode/utf16.h"
29 #include "cmemory.h"
30 #include "nucnvtst.h"
31
32 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
33 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
34 #if !UCONFIG_NO_COLLATION
35 static void TestJitterbug981(void);
36 #endif
37 #if !UCONFIG_NO_LEGACY_CONVERSION
38 static void TestJitterbug1293(void);
39 #endif
40 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
41 static void TestConverterTypesAndStarters(void);
42 static void TestAmbiguous(void);
43 static void TestSignatureDetection(void);
44 static void TestUTF7(void);
45 static void TestIMAP(void);
46 static void TestUTF8(void);
47 static void TestCESU8(void);
48 static void TestUTF16(void);
49 static void TestUTF16BE(void);
50 static void TestUTF16LE(void);
51 static void TestUTF32(void);
52 static void TestUTF32BE(void);
53 static void TestUTF32LE(void);
54 static void TestLATIN1(void);
55
56 #if !UCONFIG_NO_LEGACY_CONVERSION
57 static void TestSBCS(void);
58 static void TestDBCS(void);
59 static void TestMBCS(void);
60 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
61 static void TestICCRunout(void);
62 #endif
63
64 #ifdef U_ENABLE_GENERIC_ISO_2022
65 static void TestISO_2022(void);
66 #endif
67
68 static void TestISO_2022_JP(void);
69 static void TestISO_2022_JP_1(void);
70 static void TestISO_2022_JP_2(void);
71 static void TestISO_2022_KR(void);
72 static void TestISO_2022_KR_1(void);
73 static void TestISO_2022_CN(void);
74 #if 0
75 /*
76 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
77 */
78 static void TestISO_2022_CN_EXT(void);
79 #endif
80 static void TestJIS(void);
81 static void TestHZ(void);
82 #endif
83
84 static void TestSCSU(void);
85
86 #if !UCONFIG_NO_LEGACY_CONVERSION
87 static void TestEBCDIC_STATEFUL(void);
88 static void TestGB18030(void);
89 static void TestLMBCS(void);
90 static void TestJitterbug255(void);
91 static void TestEBCDICUS4XML(void);
92 #if 0
93 /*
94 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
95 */
96 static void TestJitterbug915(void);
97 #endif
98 static void TestISCII(void);
99
100 static void TestCoverageMBCS(void);
101 static void TestJitterbug2346(void);
102 static void TestJitterbug2411(void);
103 static void TestJB5275(void);
104 static void TestJB5275_1(void);
105 static void TestJitterbug6175(void);
106
107 static void TestIsFixedWidth(void);
108 #endif
109
110 static void TestInBufSizes(void);
111
112 static void TestRoundTrippingAllUTF(void);
113 static void TestConv(const uint16_t in[],
114 int len,
115 const char* conv,
116 const char* lang,
117 char byteArr[],
118 int byteArrLen);
119
120 /* open a converter, using test data if it begins with '@' */
121 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
122
123
124 #define NEW_MAX_BUFFER 999
125
126 static int32_t gInBufferSize = NEW_MAX_BUFFER;
127 static int32_t gOutBufferSize = NEW_MAX_BUFFER;
128 static char gNuConvTestName[1024];
129
130 #define nct_min(x,y) ((x<y) ? x : y)
131
my_ucnv_open(const char *cnv, UErrorCode *err)132 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
133 {
134 if(cnv && cnv[0] == '@') {
135 return ucnv_openPackage(loadTestData(err), cnv+1, err);
136 } else {
137 return ucnv_open(cnv, err);
138 }
139 }
140
printSeq(const unsigned char* a, int len)141 static void printSeq(const unsigned char* a, int len)
142 {
143 int i=0;
144 log_verbose("{");
145 while (i<len)
146 log_verbose("0x%02x ", a[i++]);
147 log_verbose("}\n");
148 }
149
printUSeq(const UChar* a, int len)150 static void printUSeq(const UChar* a, int len)
151 {
152 int i=0;
153 log_verbose("{U+");
154 while (i<len) log_verbose("0x%04x ", a[i++]);
155 log_verbose("}\n");
156 }
157
printSeqErr(const unsigned char* a, int len)158 static void printSeqErr(const unsigned char* a, int len)
159 {
160 int i=0;
161 fprintf(stderr, "{");
162 while (i<len)
163 fprintf(stderr, "0x%02x ", a[i++]);
164 fprintf(stderr, "}\n");
165 }
166
printUSeqErr(const UChar* a, int len)167 static void printUSeqErr(const UChar* a, int len)
168 {
169 int i=0;
170 fprintf(stderr, "{U+");
171 while (i<len)
172 fprintf(stderr, "0x%04x ", a[i++]);
173 fprintf(stderr,"}\n");
174 }
175
176 static void
TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)177 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
178 {
179 const char* s0;
180 const char* s=(char*)source;
181 const int32_t *r=results;
182 UErrorCode errorCode=U_ZERO_ERROR;
183 UChar32 c;
184
185 while(s<limit) {
186 s0=s;
187 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
188 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
189 break; /* no more significant input */
190 } else if(U_FAILURE(errorCode)) {
191 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
192 break;
193 } else if(
194 /* test the expected number of input bytes only if >=0 */
195 (*r>=0 && (int32_t)(s-s0)!=*r) ||
196 c!=*(r+1)
197 ) {
198 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
199 message, c, (s-s0), *(r+1), *r);
200 break;
201 }
202 r+=2;
203 }
204 }
205
206 static void
TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)207 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
208 {
209 const char* s=(char*)source;
210 UErrorCode errorCode=U_ZERO_ERROR;
211 uint32_t c;
212 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
213 if(errorCode != expected){
214 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
215 }
216 if(c != 0xFFFD && c != 0xffff){
217 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
218 }
219
220 }
221
TestInBufSizes(void)222 static void TestInBufSizes(void)
223 {
224 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
225 #if 1
226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
230 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
231 TestNewConvertWithBufferSizes(1,1);
232 TestNewConvertWithBufferSizes(2,3);
233 TestNewConvertWithBufferSizes(3,2);
234 #endif
235 }
236
TestOutBufSizes(void)237 static void TestOutBufSizes(void)
238 {
239 #if 1
240 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
241 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
242 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
243 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
244 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
245 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
246
247 #endif
248 }
249
250
addTestNewConvert(TestNode** root)251 void addTestNewConvert(TestNode** root)
252 {
253 #if !UCONFIG_NO_FILE_IO
254 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
255 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
256 #endif
257 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
258 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
259 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
260 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
261 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
262 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
263
264 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
265 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
266 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
267 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
268 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
269 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
270 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
271 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
272
273 #if !UCONFIG_NO_LEGACY_CONVERSION
274 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
275 #endif
276
277 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
278
279 #if !UCONFIG_NO_LEGACY_CONVERSION
280 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
281 #if !UCONFIG_NO_FILE_IO
282 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
283 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
284 #endif
285 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
286
287 #ifdef U_ENABLE_GENERIC_ISO_2022
288 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
289 #endif
290
291 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
292 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
293 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
294 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
295 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
296 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
297 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
298 /*
299 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
300 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
301 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
302 */
303 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
304 #endif
305
306 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
307
308 #if !UCONFIG_NO_LEGACY_CONVERSION
309 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
310 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
311 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
312 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
313 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
314 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
315 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
316 #if !UCONFIG_NO_COLLATION
317 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
318 #endif
319
320 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
321 #endif
322
323
324 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
325 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
326 #endif
327
328 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
329
330 #if !UCONFIG_NO_LEGACY_CONVERSION
331 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
332 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
333 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
334
335 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
336 #endif
337 }
338
339
340 /* Note that this test already makes use of statics, so it's not really
341 multithread safe.
342 This convenience function lets us make the error messages actually useful.
343 */
344
setNuConvTestName(const char *codepage, const char *direction)345 static void setNuConvTestName(const char *codepage, const char *direction)
346 {
347 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
348 codepage,
349 direction,
350 (int)gInBufferSize,
351 (int)gOutBufferSize);
352 }
353
354 typedef enum
355 {
356 TC_OK = 0, /* test was OK */
357 TC_MISMATCH = 1, /* Match failed - err was printed */
358 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */
359 } ETestConvertResult;
360
361 /* Note: This function uses global variables and it will not do offset
362 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, const char *codepage, const int32_t *expectOffsets , UBool useFallback)363 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
364 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
365 {
366 UErrorCode status = U_ZERO_ERROR;
367 UConverter *conv = 0;
368 char junkout[NEW_MAX_BUFFER]; /* FIX */
369 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
370 char *p;
371 const UChar *src;
372 char *end;
373 char *targ;
374 int32_t *offs;
375 int i;
376 int32_t realBufferSize;
377 char *realBufferEnd;
378 const UChar *realSourceEnd;
379 const UChar *sourceLimit;
380 UBool checkOffsets = true;
381 UBool doFlush;
382
383 for(i=0;i<NEW_MAX_BUFFER;i++)
384 junkout[i] = (char)0xF0;
385 for(i=0;i<NEW_MAX_BUFFER;i++)
386 junokout[i] = 0xFF;
387
388 setNuConvTestName(codepage, "FROM");
389
390 log_verbose("\n========= %s\n", gNuConvTestName);
391
392 conv = my_ucnv_open(codepage, &status);
393
394 if(U_FAILURE(status))
395 {
396 log_data_err("Couldn't open converter %s\n",codepage);
397 return TC_FAIL;
398 }
399 if(useFallback){
400 ucnv_setFallback(conv,useFallback);
401 }
402
403 log_verbose("Converter opened..\n");
404
405 src = source;
406 targ = junkout;
407 offs = junokout;
408
409 realBufferSize = UPRV_LENGTHOF(junkout);
410 realBufferEnd = junkout + realBufferSize;
411 realSourceEnd = source + sourceLen;
412
413 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
414 checkOffsets = false;
415
416 do
417 {
418 end = nct_min(targ + gOutBufferSize, realBufferEnd);
419 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
420
421 doFlush = (UBool)(sourceLimit == realSourceEnd);
422
423 if(targ == realBufferEnd) {
424 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
425 return TC_FAIL;
426 }
427 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
428
429
430 status = U_ZERO_ERROR;
431
432 ucnv_fromUnicode (conv,
433 &targ,
434 end,
435 &src,
436 sourceLimit,
437 checkOffsets ? offs : NULL,
438 doFlush, /* flush if we're at the end of the input data */
439 &status);
440 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
441
442 if(U_FAILURE(status)) {
443 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
444 return TC_FAIL;
445 }
446
447 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
448 sourceLen, targ-junkout);
449
450 if(getTestOption(VERBOSITY_OPTION))
451 {
452 char junk[9999];
453 char offset_str[9999];
454 char *ptr;
455
456 junk[0] = 0;
457 offset_str[0] = 0;
458 for(ptr = junkout;ptr<targ;ptr++) {
459 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
460 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
461 }
462
463 log_verbose(junk);
464 printSeq((const uint8_t *)expect, expectLen);
465 if ( checkOffsets ) {
466 log_verbose("\nOffsets:");
467 log_verbose(offset_str);
468 }
469 log_verbose("\n");
470 }
471 ucnv_close(conv);
472
473 if(expectLen != targ-junkout) {
474 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
475 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
476 fprintf(stderr, "Got:\n");
477 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
478 fprintf(stderr, "Expected:\n");
479 printSeqErr((const unsigned char*)expect, expectLen);
480 return TC_MISMATCH;
481 }
482
483 if (checkOffsets && (expectOffsets != 0) ) {
484 log_verbose("comparing %d offsets..\n", targ-junkout);
485 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
486 log_err("did not get the expected offsets. %s\n", gNuConvTestName);
487 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
488 log_err("\n");
489 log_err("Got : ");
490 for(p=junkout;p<targ;p++) {
491 log_err("%d,", junokout[p-junkout]);
492 }
493 log_err("\n");
494 log_err("Expected: ");
495 for(i=0; i<(targ-junkout); i++) {
496 log_err("%d,", expectOffsets[i]);
497 }
498 log_err("\n");
499 }
500 }
501
502 log_verbose("comparing..\n");
503 if(!memcmp(junkout, expect, expectLen)) {
504 log_verbose("Matches!\n");
505 return TC_OK;
506 } else {
507 log_err("String does not match u->%s\n", gNuConvTestName);
508 printUSeqErr(source, sourceLen);
509 fprintf(stderr, "Got:\n");
510 printSeqErr((const unsigned char *)junkout, expectLen);
511 fprintf(stderr, "Expected:\n");
512 printSeqErr((const unsigned char *)expect, expectLen);
513
514 return TC_MISMATCH;
515 }
516 }
517
518 /* Note: This function uses global variables and it will not do offset
519 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, const char *codepage, const int32_t *expectOffsets, UBool useFallback)520 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
521 const char *codepage, const int32_t *expectOffsets, UBool useFallback)
522 {
523 UErrorCode status = U_ZERO_ERROR;
524 UConverter *conv = 0;
525 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
526 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
527 const char *src;
528 const char *realSourceEnd;
529 const char *srcLimit;
530 UChar *p;
531 UChar *targ;
532 UChar *end;
533 int32_t *offs;
534 int i;
535 UBool checkOffsets = true;
536
537 int32_t realBufferSize;
538 UChar *realBufferEnd;
539
540
541 for(i=0;i<NEW_MAX_BUFFER;i++)
542 junkout[i] = 0xFFFE;
543
544 for(i=0;i<NEW_MAX_BUFFER;i++)
545 junokout[i] = -1;
546
547 setNuConvTestName(codepage, "TO");
548
549 log_verbose("\n========= %s\n", gNuConvTestName);
550
551 conv = my_ucnv_open(codepage, &status);
552
553 if(U_FAILURE(status))
554 {
555 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
556 return TC_FAIL;
557 }
558 if(useFallback){
559 ucnv_setFallback(conv,useFallback);
560 }
561 log_verbose("Converter opened..\n");
562
563 src = (const char *)source;
564 targ = junkout;
565 offs = junokout;
566
567 realBufferSize = UPRV_LENGTHOF(junkout);
568 realBufferEnd = junkout + realBufferSize;
569 realSourceEnd = src + sourcelen;
570
571 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
572 checkOffsets = false;
573
574 do
575 {
576 end = nct_min( targ + gOutBufferSize, realBufferEnd);
577 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
578
579 if(targ == realBufferEnd)
580 {
581 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
582 return TC_FAIL;
583 }
584 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
585
586 /* oldTarg = targ; */
587
588 status = U_ZERO_ERROR;
589
590 ucnv_toUnicode (conv,
591 &targ,
592 end,
593 &src,
594 srcLimit,
595 checkOffsets ? offs : NULL,
596 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
597 &status);
598
599 /* offs += (targ-oldTarg); */
600
601 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
602
603 if(U_FAILURE(status))
604 {
605 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
606 return TC_FAIL;
607 }
608
609 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
610 sourcelen, targ-junkout);
611 if(getTestOption(VERBOSITY_OPTION))
612 {
613 char junk[9999];
614 char offset_str[9999];
615 UChar *ptr;
616
617 junk[0] = 0;
618 offset_str[0] = 0;
619
620 for(ptr = junkout;ptr<targ;ptr++)
621 {
622 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
623 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
624 }
625
626 log_verbose(junk);
627 printUSeq(expect, expectlen);
628 if ( checkOffsets )
629 {
630 log_verbose("\nOffsets:");
631 log_verbose(offset_str);
632 }
633 log_verbose("\n");
634 }
635 ucnv_close(conv);
636
637 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
638
639 if (checkOffsets && (expectOffsets != 0))
640 {
641 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
642 log_err("did not get the expected offsets. %s\n",gNuConvTestName);
643 log_err("Got: ");
644 for(p=junkout;p<targ;p++) {
645 log_err("%d,", junokout[p-junkout]);
646 }
647 log_err("\n");
648 log_err("Expected: ");
649 for(i=0; i<(targ-junkout); i++) {
650 log_err("%d,", expectOffsets[i]);
651 }
652 log_err("\n");
653 log_err("output: ");
654 for(i=0; i<(targ-junkout); i++) {
655 log_err("%X,", junkout[i]);
656 }
657 log_err("\n");
658 log_err("input: ");
659 for(i=0; i<(src-(const char *)source); i++) {
660 log_err("%X,", (unsigned char)source[i]);
661 }
662 log_err("\n");
663 }
664 }
665
666 if(!memcmp(junkout, expect, expectlen*2))
667 {
668 log_verbose("Matches!\n");
669 return TC_OK;
670 }
671 else
672 {
673 log_err("String does not match. %s\n", gNuConvTestName);
674 log_verbose("String does not match. %s\n", gNuConvTestName);
675 printf("\nGot:");
676 printUSeqErr(junkout, expectlen);
677 printf("\nExpected:");
678 printUSeqErr(expect, expectlen);
679 return TC_MISMATCH;
680 }
681 }
682
683
TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )684 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
685 {
686 /** test chars #1 */
687 /* 1 2 3 1Han 2Han 3Han . */
688 static const UChar sampleText[] =
689 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
690 static const UChar sampleTextRoundTripUnmappable[] =
691 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
692
693
694 static const uint8_t expectedUTF8[] =
695 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
696 static const int32_t toUTF8Offs[] =
697 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
698 static const int32_t fmUTF8Offs[] =
699 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
700
701 #ifdef U_ENABLE_GENERIC_ISO_2022
702 /* Same as UTF8, but with ^[%B preceding */
703 static const const uint8_t expectedISO2022[] =
704 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
705 static const int32_t toISO2022Offs[] =
706 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
707 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
708 static const int32_t fmISO2022Offs[] =
709 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
710 #endif
711
712 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
713 static const uint8_t expectedIBM930[] =
714 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
715 static const int32_t toIBM930Offs[] =
716 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
717 static const int32_t fmIBM930Offs[] =
718 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
719
720 /* 1 2 3 0 h1 h2 h3 . MBCS*/
721 static const uint8_t expectedIBM943[] =
722 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
723 static const int32_t toIBM943Offs [] =
724 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
725 static const int32_t fmIBM943Offs[] =
726 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
727
728 /* 1 2 3 0 h1 h2 h3 . DBCS*/
729 static const uint8_t expectedIBM9027[] =
730 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
731 static const int32_t toIBM9027Offs [] =
732 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
733
734 /* 1 2 3 0 <?> <?> <?> . SBCS*/
735 static const uint8_t expectedIBM920[] =
736 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
737 static const int32_t toIBM920Offs [] =
738 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
739
740 /* 1 2 3 0 <?> <?> <?> . SBCS*/
741 static const uint8_t expectedISO88593[] =
742 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
743 static const int32_t toISO88593Offs[] =
744 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
745
746 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
747 static const uint8_t expectedLATIN1[] =
748 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
749 static const int32_t toLATIN1Offs[] =
750 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
751
752
753 /* etc */
754 static const uint8_t expectedUTF16BE[] =
755 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
756 static const int32_t toUTF16BEOffs[]=
757 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
758 static const int32_t fmUTF16BEOffs[] =
759 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
760
761 static const uint8_t expectedUTF16LE[] =
762 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
763 static const int32_t toUTF16LEOffs[]=
764 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
765 static const int32_t fmUTF16LEOffs[] =
766 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
767
768 static const uint8_t expectedUTF32BE[] =
769 { 0x00, 0x00, 0x00, 0x31,
770 0x00, 0x00, 0x00, 0x32,
771 0x00, 0x00, 0x00, 0x33,
772 0x00, 0x00, 0x00, 0x00,
773 0x00, 0x00, 0x4e, 0x00,
774 0x00, 0x00, 0x4e, 0x8c,
775 0x00, 0x00, 0x4e, 0x09,
776 0x00, 0x00, 0x00, 0x2e,
777 0x00, 0x02, 0x00, 0x21 };
778 static const int32_t toUTF32BEOffs[]=
779 { 0x00, 0x00, 0x00, 0x00,
780 0x01, 0x01, 0x01, 0x01,
781 0x02, 0x02, 0x02, 0x02,
782 0x03, 0x03, 0x03, 0x03,
783 0x04, 0x04, 0x04, 0x04,
784 0x05, 0x05, 0x05, 0x05,
785 0x06, 0x06, 0x06, 0x06,
786 0x07, 0x07, 0x07, 0x07,
787 0x08, 0x08, 0x08, 0x08,
788 0x08, 0x08, 0x08, 0x08 };
789 static const int32_t fmUTF32BEOffs[] =
790 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
791
792 static const uint8_t expectedUTF32LE[] =
793 { 0x31, 0x00, 0x00, 0x00,
794 0x32, 0x00, 0x00, 0x00,
795 0x33, 0x00, 0x00, 0x00,
796 0x00, 0x00, 0x00, 0x00,
797 0x00, 0x4e, 0x00, 0x00,
798 0x8c, 0x4e, 0x00, 0x00,
799 0x09, 0x4e, 0x00, 0x00,
800 0x2e, 0x00, 0x00, 0x00,
801 0x21, 0x00, 0x02, 0x00 };
802 static const int32_t toUTF32LEOffs[]=
803 { 0x00, 0x00, 0x00, 0x00,
804 0x01, 0x01, 0x01, 0x01,
805 0x02, 0x02, 0x02, 0x02,
806 0x03, 0x03, 0x03, 0x03,
807 0x04, 0x04, 0x04, 0x04,
808 0x05, 0x05, 0x05, 0x05,
809 0x06, 0x06, 0x06, 0x06,
810 0x07, 0x07, 0x07, 0x07,
811 0x08, 0x08, 0x08, 0x08,
812 0x08, 0x08, 0x08, 0x08 };
813 static const int32_t fmUTF32LEOffs[] =
814 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
815
816
817
818
819 /** Test chars #2 **/
820
821 /* Sahha [health], slashed h's */
822 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
823 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
824
825 /* LMBCS */
826 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
827 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
828 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
829 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
830 /*********************************** START OF CODE finally *************/
831
832 gInBufferSize = insize;
833 gOutBufferSize = outsize;
834
835 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
836
837
838 /*UTF-8*/
839 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
840 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,false );
841
842 log_verbose("Test surrogate behaviour for UTF8\n");
843 {
844 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
845 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
846 0xf0, 0x90, 0x90, 0x81,
847 0xef, 0xbf, 0xbd
848 };
849 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
850 testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
851 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,false );
852
853
854 }
855
856 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
857 /*ISO-2022*/
858 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
859 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,false );
860 #endif
861
862 /*UTF16 LE*/
863 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
864 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,false );
865 /*UTF16 BE*/
866 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
867 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,false );
868 /*UTF32 LE*/
869 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
870 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,false );
871 /*UTF32 BE*/
872 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
873 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,false );
874
875 /*LATIN_1*/
876 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
877 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,false );
878
879 #if !UCONFIG_NO_LEGACY_CONVERSION
880 /*EBCDIC_STATEFUL*/
881 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
882 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,false );
883
884 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
885 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,false );
886
887 /*MBCS*/
888
889 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
890 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,false );
891 /*DBCS*/
892 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
893 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,false );
894 /*SBCS*/
895 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
896 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,false );
897 /*SBCS*/
898 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
899 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,false );
900 #endif
901
902
903 /****/
904
905 /*UTF-8*/
906 testConvertToU(expectedUTF8, sizeof(expectedUTF8),
907 sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,false);
908 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
909 /*ISO-2022*/
910 testConvertToU(expectedISO2022, sizeof(expectedISO2022),
911 sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,false);
912 #endif
913
914 /*UTF16 LE*/
915 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
916 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,false);
917 /*UTF16 BE*/
918 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
919 sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,false);
920 /*UTF32 LE*/
921 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
922 sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,false);
923 /*UTF32 BE*/
924 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
925 sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,false);
926
927 #if !UCONFIG_NO_LEGACY_CONVERSION
928 /*EBCDIC_STATEFUL*/
929 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
930 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,false);
931 /*MBCS*/
932 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
933 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,false);
934 #endif
935
936 /* Try it again to make sure it still works */
937 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
938 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,false);
939
940 #if !UCONFIG_NO_LEGACY_CONVERSION
941 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
942 malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,false);
943
944 testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
945 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,false );
946
947 /*LMBCS*/
948 testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
949 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,false );
950 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
951 LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,false);
952 #endif
953
954 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
955 {
956 /* encode directly set D and set O */
957 static const uint8_t utf7[] = {
958 /*
959 Hi Mom -+Jjo--!
960 A+ImIDkQ.
961 +-
962 +ZeVnLIqe-
963 */
964 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
965 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
966 0x2b, 0x2d,
967 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
968 };
969 static const UChar unicode[] = {
970 /*
971 Hi Mom -<WHITE SMILING FACE>-!
972 A<NOT IDENTICAL TO><ALPHA>.
973 +
974 [Japanese word "nihongo"]
975 */
976 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
977 0x41, 0x2262, 0x0391, 0x2e,
978 0x2b,
979 0x65e5, 0x672c, 0x8a9e
980 };
981 static const int32_t toUnicodeOffsets[] = {
982 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
983 15, 17, 19, 23,
984 24,
985 27, 29, 32
986 };
987 static const int32_t fromUnicodeOffsets[] = {
988 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
989 11, 12, 12, 12, 13, 13, 13, 13, 14,
990 15, 15,
991 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
992 };
993
994 /* same but escaping set O (the exclamation mark) */
995 static const uint8_t utf7Restricted[] = {
996 /*
997 Hi Mom -+Jjo--+ACE-
998 A+ImIDkQ.
999 +-
1000 +ZeVnLIqe-
1001 */
1002 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1003 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1004 0x2b, 0x2d,
1005 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1006 };
1007 static const int32_t toUnicodeOffsetsR[] = {
1008 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1009 19, 21, 23, 27,
1010 28,
1011 31, 33, 36
1012 };
1013 static const int32_t fromUnicodeOffsetsR[] = {
1014 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1015 11, 12, 12, 12, 13, 13, 13, 13, 14,
1016 15, 15,
1017 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1018 };
1019
1020 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,false);
1021
1022 testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,false);
1023
1024 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,false);
1025
1026 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,false);
1027 }
1028
1029 /*
1030 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1031 * modified according to RFC 2060,
1032 * and supplemented with the one example in RFC 2060 itself.
1033 */
1034 {
1035 static const uint8_t imap[] = {
1036 /* Hi Mom -&Jjo--!
1037 A&ImIDkQ-.
1038 &-
1039 &ZeVnLIqe-
1040 \
1041 ~peter
1042 /mail
1043 /&ZeVnLIqe-
1044 /&U,BTFw-
1045 */
1046 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1047 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1048 0x26, 0x2d,
1049 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1050 0x5c,
1051 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1052 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1053 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1054 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1055 };
1056 static const UChar unicode[] = {
1057 /* Hi Mom -<WHITE SMILING FACE>-!
1058 A<NOT IDENTICAL TO><ALPHA>.
1059 &
1060 [Japanese word "nihongo"]
1061 \
1062 ~peter
1063 /mail
1064 /<65e5, 672c, 8a9e>
1065 /<53f0, 5317>
1066 */
1067 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1068 0x41, 0x2262, 0x0391, 0x2e,
1069 0x26,
1070 0x65e5, 0x672c, 0x8a9e,
1071 0x5c,
1072 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1073 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1074 0x2f, 0x65e5, 0x672c, 0x8a9e,
1075 0x2f, 0x53f0, 0x5317
1076 };
1077 static const int32_t toUnicodeOffsets[] = {
1078 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1079 15, 17, 19, 24,
1080 25,
1081 28, 30, 33,
1082 37,
1083 38, 39, 40, 41, 42, 43,
1084 44, 45, 46, 47, 48,
1085 49, 51, 53, 56,
1086 60, 62, 64
1087 };
1088 static const int32_t fromUnicodeOffsets[] = {
1089 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1090 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1091 15, 15,
1092 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1093 19,
1094 20, 21, 22, 23, 24, 25,
1095 26, 27, 28, 29, 30,
1096 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1097 35, 36, 36, 36, 37, 37, 37, 37, 37
1098 };
1099
1100 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,false);
1101
1102 testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,false);
1103 }
1104
1105 /* Test UTF-8 bad data handling*/
1106 {
1107 static const uint8_t utf8[]={
1108 0x61,
1109 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1110 0x00,
1111 0x62,
1112 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1113 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1114 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1115 0xdf, 0xbf, /* 7ff */
1116 0xbf, /* truncated tail */
1117 0xf4, 0x90, 0x80, 0x80, /* 110000 */
1118 0x02
1119 };
1120
1121 static const uint16_t utf8Expected[]={
1122 0x0061,
1123 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1124 0x0000,
1125 0x0062,
1126 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1127 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1128 0xdbff, 0xdfff,
1129 0x07ff,
1130 0xfffd,
1131 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1132 0x0002
1133 };
1134
1135 static const int32_t utf8Offsets[]={
1136 0,
1137 1, 2, 3, 4,
1138 5,
1139 6,
1140 7, 8, 9, 10, 11,
1141 12, 13, 14, 15, 16,
1142 17, 17,
1143 21,
1144 23,
1145 24, 25, 26, 27,
1146 28
1147 };
1148 testConvertToU(utf8, sizeof(utf8),
1149 utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,false);
1150
1151 }
1152
1153 /* Test UTF-32BE bad data handling*/
1154 {
1155 static const uint8_t utf32[]={
1156 0x00, 0x00, 0x00, 0x61,
1157 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1158 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1159 0x00, 0x00, 0x00, 0x62,
1160 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1161 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1162 0x00, 0x00, 0x01, 0x62,
1163 0x00, 0x00, 0x02, 0x62
1164 };
1165 static const uint16_t utf32Expected[]={
1166 0x0061,
1167 0xfffd, /* 0x110000 out of range */
1168 0xDBFF, /* 0x10FFFF in range */
1169 0xDFFF,
1170 0x0062,
1171 0xfffd, /* 0xffffffff out of range */
1172 0xfffd, /* 0x7fffffff out of range */
1173 0x0162,
1174 0x0262
1175 };
1176 static const int32_t utf32Offsets[]={
1177 0, 4, 8, 8, 12, 16, 20, 24, 28
1178 };
1179 static const uint8_t utf32ExpectedBack[]={
1180 0x00, 0x00, 0x00, 0x61,
1181 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1182 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1183 0x00, 0x00, 0x00, 0x62,
1184 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1185 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1186 0x00, 0x00, 0x01, 0x62,
1187 0x00, 0x00, 0x02, 0x62
1188 };
1189 static const int32_t utf32OffsetsBack[]={
1190 0,0,0,0,
1191 1,1,1,1,
1192 2,2,2,2,
1193 4,4,4,4,
1194 5,5,5,5,
1195 6,6,6,6,
1196 7,7,7,7,
1197 8,8,8,8
1198 };
1199
1200 testConvertToU(utf32, sizeof(utf32),
1201 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,false);
1202 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1203 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, false);
1204 }
1205
1206 /* Test UTF-32LE bad data handling*/
1207 {
1208 static const uint8_t utf32[]={
1209 0x61, 0x00, 0x00, 0x00,
1210 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1211 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1212 0x62, 0x00, 0x00, 0x00,
1213 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1214 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1215 0x62, 0x01, 0x00, 0x00,
1216 0x62, 0x02, 0x00, 0x00,
1217 };
1218
1219 static const uint16_t utf32Expected[]={
1220 0x0061,
1221 0xfffd, /* 0x110000 out of range */
1222 0xDBFF, /* 0x10FFFF in range */
1223 0xDFFF,
1224 0x0062,
1225 0xfffd, /* 0xffffffff out of range */
1226 0xfffd, /* 0x7fffffff out of range */
1227 0x0162,
1228 0x0262
1229 };
1230 static const int32_t utf32Offsets[]={
1231 0, 4, 8, 8, 12, 16, 20, 24, 28
1232 };
1233 static const uint8_t utf32ExpectedBack[]={
1234 0x61, 0x00, 0x00, 0x00,
1235 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1236 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1237 0x62, 0x00, 0x00, 0x00,
1238 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1239 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1240 0x62, 0x01, 0x00, 0x00,
1241 0x62, 0x02, 0x00, 0x00
1242 };
1243 static const int32_t utf32OffsetsBack[]={
1244 0,0,0,0,
1245 1,1,1,1,
1246 2,2,2,2,
1247 4,4,4,4,
1248 5,5,5,5,
1249 6,6,6,6,
1250 7,7,7,7,
1251 8,8,8,8
1252 };
1253 testConvertToU(utf32, sizeof(utf32),
1254 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,false );
1255 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1256 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, false);
1257 }
1258 }
1259
TestCoverageMBCSnull1260 static void TestCoverageMBCS(){
1261 #if 0
1262 UErrorCode status = U_ZERO_ERROR;
1263 const char *directory = loadTestData(&status);
1264 char* tdpath = NULL;
1265 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1266 int len = strlen(directory);
1267 char* index=NULL;
1268
1269 tdpath = (char*) malloc(sizeof(char) * (len * 2));
1270 uprv_strcpy(saveDirectory,u_getDataDirectory());
1271 log_verbose("Retrieved data directory %s \n",saveDirectory);
1272 uprv_strcpy(tdpath,directory);
1273 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1274
1275 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1276 *(index+1)=0;
1277 }
1278 u_setDataDirectory(tdpath);
1279 log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1280 #endif
1281
1282 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1283 which is test file for MBCS conversion with single-byte codepage data.*/
1284 {
1285
1286 /* MBCS with single byte codepage data test1.ucm*/
1287 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1288 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1289 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
1290
1291 /*from Unicode*/
1292 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1293 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,false );
1294 }
1295
1296 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1297 which is test file for MBCS conversion with three-byte codepage data.*/
1298 {
1299
1300 /* MBCS with three byte codepage data test3.ucm*/
1301 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1302 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1303 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1304
1305 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1306 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1307 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1308
1309 /*from Unicode*/
1310 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1311 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,false );
1312
1313 /*to Unicode*/
1314 testConvertToU(test3input, sizeof(test3input),
1315 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,false);
1316
1317 }
1318
1319 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1320 which is test file for MBCS conversion with four-byte codepage data.*/
1321 {
1322
1323 /* MBCS with three byte codepage data test4.ucm*/
1324 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1325 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1326 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1327
1328 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1329 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1330 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1331
1332 /*from Unicode*/
1333 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1334 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,false );
1335
1336 /*to Unicode*/
1337 testConvertToU(test4input, sizeof(test4input),
1338 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,false );
1339
1340 }
1341 #if 0
1342 free(tdpath);
1343 /* restore the original data directory */
1344 log_verbose("Setting the data directory to %s \n", saveDirectory);
1345 u_setDataDirectory(saveDirectory);
1346 free(saveDirectory);
1347 #endif
1348
1349 }
1350
TestConverterType(const char *convName, UConverterType convType)1351 static void TestConverterType(const char *convName, UConverterType convType) {
1352 UConverter* myConverter;
1353 UErrorCode err = U_ZERO_ERROR;
1354
1355 myConverter = my_ucnv_open(convName, &err);
1356
1357 if (U_FAILURE(err)) {
1358 log_data_err("Failed to create an %s converter\n", convName);
1359 return;
1360 }
1361 else
1362 {
1363 if (ucnv_getType(myConverter)!=convType) {
1364 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1365 convName, convType);
1366 }
1367 else {
1368 log_verbose("ucnv_getType %s ok\n", convName);
1369 }
1370 }
1371 ucnv_close(myConverter);
1372 }
1373
TestConverterTypesAndStartersnull1374 static void TestConverterTypesAndStarters()
1375 {
1376 #if !UCONFIG_NO_LEGACY_CONVERSION
1377 UConverter* myConverter;
1378 UErrorCode err = U_ZERO_ERROR;
1379 UBool mystarters[256];
1380
1381 /* const UBool expectedKSCstarters[256] = {
1382 false, false, false, false, false, false, false, false, false, false,
1383 false, false, false, false, false, false, false, false, false, false,
1384 false, false, false, false, false, false, false, false, false, false,
1385 false, false, false, false, false, false, false, false, false, false,
1386 false, false, false, false, false, false, false, false, false, false,
1387 false, false, false, false, false, false, false, false, false, false,
1388 false, false, false, false, false, false, false, false, false, false,
1389 false, false, false, false, false, false, false, false, false, false,
1390 false, false, false, false, false, false, false, false, false, false,
1391 false, false, false, false, false, false, false, false, false, false,
1392 false, false, false, false, false, false, false, false, false, false,
1393 false, false, false, false, false, false, false, false, false, false,
1394 false, false, false, false, false, false, false, false, false, false,
1395 false, false, false, false, false, false, false, false, false, false,
1396 false, false, false, true, true, true, true, true, true, true,
1397 true, true, true, true, true, true, true, true, true, true,
1398 true, true, true, true, true, true, true, true, true, true,
1399 true, true, true, false, false, true, true, true, true, true,
1400 true, true, true, true, true, true, true, true, true, true,
1401 true, true, true, true, true, true, true, true, true, true,
1402 true, true, true, true, true, true, true, true, true, true,
1403 true, true, true, true, true, true, true, true, true, true,
1404 true, true, true, true, true, true, true, true, true, true,
1405 true, true, true, true, true, true, true, true, true, true,
1406 true, true, true, true, true, true, true, true, true, true,
1407 true, true, true, true, true, true};*/
1408
1409
1410 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1411
1412 myConverter = ucnv_open("ksc", &err);
1413 if (U_FAILURE(err)) {
1414 log_data_err("Failed to create an ibm-ksc converter\n");
1415 return;
1416 }
1417 else
1418 {
1419 if (ucnv_getType(myConverter)!=UCNV_MBCS)
1420 log_err("ucnv_getType Failed for ibm-949\n");
1421 else
1422 log_verbose("ucnv_getType ibm-949 ok\n");
1423
1424 if(myConverter!=NULL)
1425 ucnv_getStarters(myConverter, mystarters, &err);
1426
1427 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1428 log_err("Failed ucnv_getStarters for ksc\n");
1429 else
1430 log_verbose("ucnv_getStarters ok\n");*/
1431
1432 }
1433 ucnv_close(myConverter);
1434
1435 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1436 TestConverterType("ibm-878", UCNV_SBCS);
1437 #endif
1438
1439 TestConverterType("iso-8859-1", UCNV_LATIN_1);
1440
1441 TestConverterType("ibm-1208", UCNV_UTF8);
1442
1443 TestConverterType("utf-8", UCNV_UTF8);
1444 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1445 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1446 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1447 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1448
1449 #if !UCONFIG_NO_LEGACY_CONVERSION
1450
1451 #if defined(U_ENABLE_GENERIC_ISO_2022)
1452 TestConverterType("iso-2022", UCNV_ISO_2022);
1453 #endif
1454
1455 TestConverterType("hz", UCNV_HZ);
1456 #endif
1457
1458 TestConverterType("scsu", UCNV_SCSU);
1459
1460 #if !UCONFIG_NO_LEGACY_CONVERSION
1461 TestConverterType("x-iscii-de", UCNV_ISCII);
1462 #endif
1463
1464 TestConverterType("ascii", UCNV_US_ASCII);
1465 TestConverterType("utf-7", UCNV_UTF7);
1466 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1467 TestConverterType("bocu-1", UCNV_BOCU1);
1468 }
1469
1470 static void
TestAmbiguousConverter(UConverter *cnv)1471 TestAmbiguousConverter(UConverter *cnv) {
1472 static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1473 UChar outUnicode[20]={ 0, 0, 0, 0 };
1474
1475 const char *s;
1476 UChar *u;
1477 UErrorCode errorCode;
1478 UBool isAmbiguous;
1479
1480 /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1481 errorCode=U_ZERO_ERROR;
1482 s=inBytes;
1483 u=outUnicode;
1484 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, true, &errorCode);
1485 if(U_FAILURE(errorCode)) {
1486 /* we do not care about general failures in this test; the input may just not be mappable */
1487 return;
1488 }
1489
1490 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1491 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1492 /* There are some encodings that are partially ASCII based,
1493 like the ISO-7 and GSM series of codepages, which we ignore. */
1494 return;
1495 }
1496
1497 isAmbiguous=ucnv_isAmbiguous(cnv);
1498
1499 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1500 if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1501 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1502 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1503 return;
1504 }
1505
1506 if(outUnicode[2]!=0x5c) {
1507 /* needs fixup, fix it */
1508 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1509 if(outUnicode[2]!=0x5c) {
1510 /* the fix failed */
1511 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1512 return;
1513 }
1514 }
1515 }
1516
TestAmbiguousnull1517 static void TestAmbiguous()
1518 {
1519 UErrorCode status = U_ZERO_ERROR;
1520 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1521 static const char target[] = {
1522 /* "\\usr\\local\\share\\data\\icutest.txt" */
1523 0x5c, 0x75, 0x73, 0x72,
1524 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1525 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1526 0x5c, 0x64, 0x61, 0x74, 0x61,
1527 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1528 0
1529 };
1530 UChar asciiResult[200], sjisResult[200];
1531 int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1532 const char *name;
1533
1534 /* enumerate all converters */
1535 status=U_ZERO_ERROR;
1536 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1537 cnv=ucnv_open(name, &status);
1538 if(U_SUCCESS(status)) {
1539 TestAmbiguousConverter(cnv);
1540 ucnv_close(cnv);
1541 } else {
1542 log_err("error: unable to open available converter \"%s\"\n", name);
1543 status=U_ZERO_ERROR;
1544 }
1545 }
1546
1547 #if !UCONFIG_NO_LEGACY_CONVERSION
1548 sjis_cnv = ucnv_open("ibm-943", &status);
1549 if (U_FAILURE(status))
1550 {
1551 log_data_err("Failed to create a SJIS converter\n");
1552 return;
1553 }
1554 ascii_cnv = ucnv_open("LATIN-1", &status);
1555 if (U_FAILURE(status))
1556 {
1557 log_data_err("Failed to create a LATIN-1 converter\n");
1558 ucnv_close(sjis_cnv);
1559 return;
1560 }
1561 /* convert target from SJIS to Unicode */
1562 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
1563 if (U_FAILURE(status))
1564 {
1565 log_err("Failed to convert the SJIS string.\n");
1566 ucnv_close(sjis_cnv);
1567 ucnv_close(ascii_cnv);
1568 return;
1569 }
1570 /* convert target from Latin-1 to Unicode */
1571 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
1572 if (U_FAILURE(status))
1573 {
1574 log_err("Failed to convert the Latin-1 string.\n");
1575 ucnv_close(sjis_cnv);
1576 ucnv_close(ascii_cnv);
1577 return;
1578 }
1579 if (!ucnv_isAmbiguous(sjis_cnv))
1580 {
1581 log_err("SJIS converter should contain ambiguous character mappings.\n");
1582 ucnv_close(sjis_cnv);
1583 ucnv_close(ascii_cnv);
1584 return;
1585 }
1586 if (u_strcmp(sjisResult, asciiResult) == 0)
1587 {
1588 log_err("File separators for SJIS don't need to be fixed.\n");
1589 }
1590 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1591 if (u_strcmp(sjisResult, asciiResult) != 0)
1592 {
1593 log_err("Fixing file separator for SJIS failed.\n");
1594 }
1595 ucnv_close(sjis_cnv);
1596 ucnv_close(ascii_cnv);
1597 #endif
1598 }
1599
1600 static void
TestSignatureDetectionnull1601 TestSignatureDetection(){
1602 /* with null terminated strings */
1603 {
1604 static const char* data[] = {
1605 "\xFE\xFF\x00\x00", /* UTF-16BE */
1606 "\xFF\xFE\x00\x00", /* UTF-16LE */
1607 "\xEF\xBB\xBF\x00", /* UTF-8 */
1608 "\x0E\xFE\xFF\x00", /* SCSU */
1609
1610 "\xFE\xFF", /* UTF-16BE */
1611 "\xFF\xFE", /* UTF-16LE */
1612 "\xEF\xBB\xBF", /* UTF-8 */
1613 "\x0E\xFE\xFF", /* SCSU */
1614
1615 "\xFE\xFF\x41\x42", /* UTF-16BE */
1616 "\xFF\xFE\x41\x41", /* UTF-16LE */
1617 "\xEF\xBB\xBF\x41", /* UTF-8 */
1618 "\x0E\xFE\xFF\x41", /* SCSU */
1619
1620 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1621 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1622 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1623 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1624 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1625
1626 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1627 };
1628 static const char* expected[] = {
1629 "UTF-16BE",
1630 "UTF-16LE",
1631 "UTF-8",
1632 "SCSU",
1633
1634 "UTF-16BE",
1635 "UTF-16LE",
1636 "UTF-8",
1637 "SCSU",
1638
1639 "UTF-16BE",
1640 "UTF-16LE",
1641 "UTF-8",
1642 "SCSU",
1643
1644 "UTF-7",
1645 "UTF-7",
1646 "UTF-7",
1647 "UTF-7",
1648 "UTF-7",
1649 "UTF-EBCDIC"
1650 };
1651 static const int32_t expectedLength[] ={
1652 2,
1653 2,
1654 3,
1655 3,
1656
1657 2,
1658 2,
1659 3,
1660 3,
1661
1662 2,
1663 2,
1664 3,
1665 3,
1666
1667 5,
1668 4,
1669 4,
1670 4,
1671 4,
1672 4
1673 };
1674 int i=0;
1675 UErrorCode err;
1676 int32_t signatureLength = -1;
1677 const char* source = NULL;
1678 const char* enc = NULL;
1679 for( ; i<UPRV_LENGTHOF(data); i++){
1680 err = U_ZERO_ERROR;
1681 source = data[i];
1682 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1683 if(U_FAILURE(err)){
1684 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1685 continue;
1686 }
1687 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1688 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1689 continue;
1690 }
1691 if(signatureLength != expectedLength[i]){
1692 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1693 }
1694 }
1695 }
1696 {
1697 static const char* data[] = {
1698 "\xFE\xFF\x00", /* UTF-16BE */
1699 "\xFF\xFE\x00", /* UTF-16LE */
1700 "\xEF\xBB\xBF\x00", /* UTF-8 */
1701 "\x0E\xFE\xFF\x00", /* SCSU */
1702 "\x00\x00\xFE\xFF", /* UTF-32BE */
1703 "\xFF\xFE\x00\x00", /* UTF-32LE */
1704 "\xFE\xFF", /* UTF-16BE */
1705 "\xFF\xFE", /* UTF-16LE */
1706 "\xEF\xBB\xBF", /* UTF-8 */
1707 "\x0E\xFE\xFF", /* SCSU */
1708 "\x00\x00\xFE\xFF", /* UTF-32BE */
1709 "\xFF\xFE\x00\x00", /* UTF-32LE */
1710 "\xFE\xFF\x41\x42", /* UTF-16BE */
1711 "\xFF\xFE\x41\x41", /* UTF-16LE */
1712 "\xEF\xBB\xBF\x41", /* UTF-8 */
1713 "\x0E\xFE\xFF\x41", /* SCSU */
1714 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1715 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1716 "\xFB\xEE\x28", /* BOCU-1 */
1717 "\xFF\x41\x42" /* NULL */
1718 };
1719 static const int len[] = {
1720 3,
1721 3,
1722 4,
1723 4,
1724 4,
1725 4,
1726 2,
1727 2,
1728 3,
1729 3,
1730 4,
1731 4,
1732 4,
1733 4,
1734 4,
1735 4,
1736 5,
1737 5,
1738 3,
1739 3
1740 };
1741
1742 static const char* expected[] = {
1743 "UTF-16BE",
1744 "UTF-16LE",
1745 "UTF-8",
1746 "SCSU",
1747 "UTF-32BE",
1748 "UTF-32LE",
1749 "UTF-16BE",
1750 "UTF-16LE",
1751 "UTF-8",
1752 "SCSU",
1753 "UTF-32BE",
1754 "UTF-32LE",
1755 "UTF-16BE",
1756 "UTF-16LE",
1757 "UTF-8",
1758 "SCSU",
1759 "UTF-32BE",
1760 "UTF-32LE",
1761 "BOCU-1",
1762 NULL
1763 };
1764 static const int32_t expectedLength[] ={
1765 2,
1766 2,
1767 3,
1768 3,
1769 4,
1770 4,
1771 2,
1772 2,
1773 3,
1774 3,
1775 4,
1776 4,
1777 2,
1778 2,
1779 3,
1780 3,
1781 4,
1782 4,
1783 3,
1784 0
1785 };
1786 int i=0;
1787 UErrorCode err;
1788 int32_t signatureLength = -1;
1789 int32_t sourceLength=-1;
1790 const char* source = NULL;
1791 const char* enc = NULL;
1792 for( ; i<UPRV_LENGTHOF(data); i++){
1793 err = U_ZERO_ERROR;
1794 source = data[i];
1795 sourceLength = len[i];
1796 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1797 if(U_FAILURE(err)){
1798 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1799 continue;
1800 }
1801 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1802 if(expected[i] !=NULL){
1803 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1804 continue;
1805 }
1806 }
1807 if(signatureLength != expectedLength[i]){
1808 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1809 }
1810 }
1811 }
1812 }
1813
TestUTF7null1814 static void TestUTF7() {
1815 /* test input */
1816 static const uint8_t in[]={
1817 /* H - +Jjo- - ! +- +2AHcAQ */
1818 0x48,
1819 0x2d,
1820 0x2b, 0x4a, 0x6a, 0x6f,
1821 0x2d, 0x2d,
1822 0x21,
1823 0x2b, 0x2d,
1824 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1825 };
1826
1827 /* expected test results */
1828 static const int32_t results[]={
1829 /* number of bytes read, code point */
1830 1, 0x48,
1831 1, 0x2d,
1832 4, 0x263a, /* <WHITE SMILING FACE> */
1833 2, 0x2d,
1834 1, 0x21,
1835 2, 0x2b,
1836 7, 0x10401
1837 };
1838
1839 const char *cnvName;
1840 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1841 UErrorCode errorCode=U_ZERO_ERROR;
1842 UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1843 if(U_FAILURE(errorCode)) {
1844 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
1845 return;
1846 }
1847 TestNextUChar(cnv, source, limit, results, "UTF-7");
1848 /* Test the condition when source >= sourceLimit */
1849 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1850 cnvName = ucnv_getName(cnv, &errorCode);
1851 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1852 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1853 }
1854 ucnv_close(cnv);
1855 }
1856
TestIMAPnull1857 static void TestIMAP() {
1858 /* test input */
1859 static const uint8_t in[]={
1860 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1861 0x48,
1862 0x2d,
1863 0x26, 0x4a, 0x6a, 0x6f,
1864 0x2d, 0x2d,
1865 0x21,
1866 0x26, 0x2d,
1867 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1868 };
1869
1870 /* expected test results */
1871 static const int32_t results[]={
1872 /* number of bytes read, code point */
1873 1, 0x48,
1874 1, 0x2d,
1875 4, 0x263a, /* <WHITE SMILING FACE> */
1876 2, 0x2d,
1877 1, 0x21,
1878 2, 0x26,
1879 7, 0x10401
1880 };
1881
1882 const char *cnvName;
1883 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1884 UErrorCode errorCode=U_ZERO_ERROR;
1885 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1886 if(U_FAILURE(errorCode)) {
1887 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
1888 return;
1889 }
1890 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1891 /* Test the condition when source >= sourceLimit */
1892 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1893 cnvName = ucnv_getName(cnv, &errorCode);
1894 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1895 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1896 }
1897 ucnv_close(cnv);
1898 }
1899
TestUTF8null1900 static void TestUTF8() {
1901 /* test input */
1902 static const uint8_t in[]={
1903 0x61,
1904 0xc2, 0x80,
1905 0xe0, 0xa0, 0x80,
1906 0xf0, 0x90, 0x80, 0x80,
1907 0xf4, 0x84, 0x8c, 0xa1,
1908 0xf0, 0x90, 0x90, 0x81
1909 };
1910
1911 /* expected test results */
1912 static const int32_t results[]={
1913 /* number of bytes read, code point */
1914 1, 0x61,
1915 2, 0x80,
1916 3, 0x800,
1917 4, 0x10000,
1918 4, 0x104321,
1919 4, 0x10401
1920 };
1921
1922 /* error test input */
1923 static const uint8_t in2[]={
1924 0x61,
1925 0xc0, 0x80, /* illegal non-shortest form */
1926 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1927 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1928 0xc0, 0xc0, /* illegal trail byte */
1929 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1930 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1931 0xfe, /* illegal byte altogether */
1932 0x62
1933 };
1934
1935 /* expected error test results */
1936 static const int32_t results2[]={
1937 /* number of bytes read, code point */
1938 1, 0x61,
1939 22, 0x62
1940 };
1941
1942 UConverterToUCallback cb;
1943 const void *p;
1944
1945 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1946 UErrorCode errorCode=U_ZERO_ERROR;
1947 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1948 if(U_FAILURE(errorCode)) {
1949 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1950 return;
1951 }
1952 TestNextUChar(cnv, source, limit, results, "UTF-8");
1953 /* Test the condition when source >= sourceLimit */
1954 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1955
1956 /* test error behavior with a skip callback */
1957 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1958 source=(const char *)in2;
1959 limit=(const char *)(in2+sizeof(in2));
1960 TestNextUChar(cnv, source, limit, results2, "UTF-8");
1961
1962 ucnv_close(cnv);
1963 }
1964
TestCESU8null1965 static void TestCESU8() {
1966 /* test input */
1967 static const uint8_t in[]={
1968 0x61,
1969 0xc2, 0x80,
1970 0xe0, 0xa0, 0x80,
1971 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1972 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1973 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1974 0xef, 0xbf, 0xbc
1975 };
1976
1977 /* expected test results */
1978 static const int32_t results[]={
1979 /* number of bytes read, code point */
1980 1, 0x61,
1981 2, 0x80,
1982 3, 0x800,
1983 6, 0x10000,
1984 3, 0xdc01,
1985 -1,0xd802, /* may read 3 or 6 bytes */
1986 -1,0x10ffff,/* may read 0 or 3 bytes */
1987 3, 0xfffc
1988 };
1989
1990 /* error test input */
1991 static const uint8_t in2[]={
1992 0x61,
1993 0xc0, 0x80, /* illegal non-shortest form */
1994 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1995 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1996 0xc0, 0xc0, /* illegal trail byte */
1997 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
1998 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
1999 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
2000 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
2001 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
2002 0xfe, /* illegal byte altogether */
2003 0x62
2004 };
2005
2006 /* expected error test results */
2007 static const int32_t results2[]={
2008 /* number of bytes read, code point */
2009 1, 0x61,
2010 34, 0x62
2011 };
2012
2013 UConverterToUCallback cb;
2014 const void *p;
2015
2016 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2017 UErrorCode errorCode=U_ZERO_ERROR;
2018 UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2019 if(U_FAILURE(errorCode)) {
2020 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2021 return;
2022 }
2023 TestNextUChar(cnv, source, limit, results, "CESU-8");
2024 /* Test the condition when source >= sourceLimit */
2025 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2026
2027 /* test error behavior with a skip callback */
2028 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2029 source=(const char *)in2;
2030 limit=(const char *)(in2+sizeof(in2));
2031 TestNextUChar(cnv, source, limit, results2, "CESU-8");
2032
2033 ucnv_close(cnv);
2034 }
2035
TestUTF16null2036 static void TestUTF16() {
2037 /* test input */
2038 static const uint8_t in1[]={
2039 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2040 };
2041 static const uint8_t in2[]={
2042 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2043 };
2044 static const uint8_t in3[]={
2045 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2046 };
2047
2048 /* expected test results */
2049 static const int32_t results1[]={
2050 /* number of bytes read, code point */
2051 4, 0x4e00,
2052 2, 0xfeff
2053 };
2054 static const int32_t results2[]={
2055 /* number of bytes read, code point */
2056 4, 0x004e,
2057 2, 0xfffe
2058 };
2059 static const int32_t results3[]={
2060 /* number of bytes read, code point */
2061 2, 0xfefe,
2062 2, 0x4e00,
2063 2, 0xfeff,
2064 4, 0x20001
2065 };
2066
2067 const char *source, *limit;
2068
2069 UErrorCode errorCode=U_ZERO_ERROR;
2070 UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2071 if(U_FAILURE(errorCode)) {
2072 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2073 return;
2074 }
2075
2076 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2077 TestNextUChar(cnv, source, limit, results1, "UTF-16");
2078
2079 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2080 ucnv_resetToUnicode(cnv);
2081 TestNextUChar(cnv, source, limit, results2, "UTF-16");
2082
2083 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2084 ucnv_resetToUnicode(cnv);
2085 TestNextUChar(cnv, source, limit, results3, "UTF-16");
2086
2087 /* Test the condition when source >= sourceLimit */
2088 ucnv_resetToUnicode(cnv);
2089 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2090
2091 ucnv_close(cnv);
2092 }
2093
TestUTF16BEnull2094 static void TestUTF16BE() {
2095 /* test input */
2096 static const uint8_t in[]={
2097 0x00, 0x61,
2098 0x00, 0xc0,
2099 0x00, 0x31,
2100 0x00, 0xf4,
2101 0xce, 0xfe,
2102 0xd8, 0x01, 0xdc, 0x01
2103 };
2104
2105 /* expected test results */
2106 static const int32_t results[]={
2107 /* number of bytes read, code point */
2108 2, 0x61,
2109 2, 0xc0,
2110 2, 0x31,
2111 2, 0xf4,
2112 2, 0xcefe,
2113 4, 0x10401
2114 };
2115
2116 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2117 UErrorCode errorCode=U_ZERO_ERROR;
2118 UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2119 if(U_FAILURE(errorCode)) {
2120 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2121 return;
2122 }
2123 TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2124 /* Test the condition when source >= sourceLimit */
2125 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2126 /*Test for the condition where there is an invalid character*/
2127 {
2128 static const uint8_t source2[]={0x61};
2129 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2130 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2131 }
2132 #if 0
2133 /*
2134 * Test disabled because currently the UTF-16BE/LE converters are supposed
2135 * to not set errors for unpaired surrogates.
2136 * This may change with
2137 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2138 */
2139
2140 /*Test for the condition where there is a surrogate pair*/
2141 {
2142 const uint8_t source2[]={0xd8, 0x01};
2143 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2144 }
2145 #endif
2146 ucnv_close(cnv);
2147 }
2148
2149 static void
TestUTF16LEnull2150 TestUTF16LE() {
2151 /* test input */
2152 static const uint8_t in[]={
2153 0x61, 0x00,
2154 0x31, 0x00,
2155 0x4e, 0x2e,
2156 0x4e, 0x00,
2157 0x01, 0xd8, 0x01, 0xdc
2158 };
2159
2160 /* expected test results */
2161 static const int32_t results[]={
2162 /* number of bytes read, code point */
2163 2, 0x61,
2164 2, 0x31,
2165 2, 0x2e4e,
2166 2, 0x4e,
2167 4, 0x10401
2168 };
2169
2170 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2171 UErrorCode errorCode=U_ZERO_ERROR;
2172 UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2173 if(U_FAILURE(errorCode)) {
2174 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2175 return;
2176 }
2177 TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2178 /* Test the condition when source >= sourceLimit */
2179 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2180 /*Test for the condition where there is an invalid character*/
2181 {
2182 static const uint8_t source2[]={0x61};
2183 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2184 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2185 }
2186 #if 0
2187 /*
2188 * Test disabled because currently the UTF-16BE/LE converters are supposed
2189 * to not set errors for unpaired surrogates.
2190 * This may change with
2191 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2192 */
2193
2194 /*Test for the condition where there is a surrogate character*/
2195 {
2196 static const uint8_t source2[]={0x01, 0xd8};
2197 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2198 }
2199 #endif
2200
2201 ucnv_close(cnv);
2202 }
2203
TestUTF32null2204 static void TestUTF32() {
2205 /* test input */
2206 static const uint8_t in1[]={
2207 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2208 };
2209 static const uint8_t in2[]={
2210 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2211 };
2212 static const uint8_t in3[]={
2213 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2214 };
2215
2216 /* expected test results */
2217 static const int32_t results1[]={
2218 /* number of bytes read, code point */
2219 8, 0x100f00,
2220 4, 0xfeff
2221 };
2222 static const int32_t results2[]={
2223 /* number of bytes read, code point */
2224 8, 0x0f1000,
2225 4, 0xfffe
2226 };
2227 static const int32_t results3[]={
2228 /* number of bytes read, code point */
2229 4, 0xfefe,
2230 4, 0x100f00,
2231 4, 0xfffd, /* unmatched surrogate */
2232 4, 0xfffd /* unmatched surrogate */
2233 };
2234
2235 const char *source, *limit;
2236
2237 UErrorCode errorCode=U_ZERO_ERROR;
2238 UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2239 if(U_FAILURE(errorCode)) {
2240 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2241 return;
2242 }
2243
2244 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2245 TestNextUChar(cnv, source, limit, results1, "UTF-32");
2246
2247 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2248 ucnv_resetToUnicode(cnv);
2249 TestNextUChar(cnv, source, limit, results2, "UTF-32");
2250
2251 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2252 ucnv_resetToUnicode(cnv);
2253 TestNextUChar(cnv, source, limit, results3, "UTF-32");
2254
2255 /* Test the condition when source >= sourceLimit */
2256 ucnv_resetToUnicode(cnv);
2257 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2258
2259 ucnv_close(cnv);
2260 }
2261
2262 static void
TestUTF32BEnull2263 TestUTF32BE() {
2264 /* test input */
2265 static const uint8_t in[]={
2266 0x00, 0x00, 0x00, 0x61,
2267 0x00, 0x00, 0x30, 0x61,
2268 0x00, 0x00, 0xdc, 0x00,
2269 0x00, 0x00, 0xd8, 0x00,
2270 0x00, 0x00, 0xdf, 0xff,
2271 0x00, 0x00, 0xff, 0xfe,
2272 0x00, 0x10, 0xab, 0xcd,
2273 0x00, 0x10, 0xff, 0xff
2274 };
2275
2276 /* expected test results */
2277 static const int32_t results[]={
2278 /* number of bytes read, code point */
2279 4, 0x61,
2280 4, 0x3061,
2281 4, 0xfffd,
2282 4, 0xfffd,
2283 4, 0xfffd,
2284 4, 0xfffe,
2285 4, 0x10abcd,
2286 4, 0x10ffff
2287 };
2288
2289 /* error test input */
2290 static const uint8_t in2[]={
2291 0x00, 0x00, 0x00, 0x61,
2292 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2293 0x00, 0x00, 0x00, 0x62,
2294 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2295 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2296 0x00, 0x00, 0x01, 0x62,
2297 0x00, 0x00, 0x02, 0x62
2298 };
2299
2300 /* expected error test results */
2301 static const int32_t results2[]={
2302 /* number of bytes read, code point */
2303 4, 0x61,
2304 8, 0x62,
2305 12, 0x162,
2306 4, 0x262
2307 };
2308
2309 UConverterToUCallback cb;
2310 const void *p;
2311
2312 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2313 UErrorCode errorCode=U_ZERO_ERROR;
2314 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2315 if(U_FAILURE(errorCode)) {
2316 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2317 return;
2318 }
2319 TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2320
2321 /* Test the condition when source >= sourceLimit */
2322 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2323
2324 /* test error behavior with a skip callback */
2325 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2326 source=(const char *)in2;
2327 limit=(const char *)(in2+sizeof(in2));
2328 TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2329
2330 ucnv_close(cnv);
2331 }
2332
2333 static void
TestUTF32LEnull2334 TestUTF32LE() {
2335 /* test input */
2336 static const uint8_t in[]={
2337 0x61, 0x00, 0x00, 0x00,
2338 0x61, 0x30, 0x00, 0x00,
2339 0x00, 0xdc, 0x00, 0x00,
2340 0x00, 0xd8, 0x00, 0x00,
2341 0xff, 0xdf, 0x00, 0x00,
2342 0xfe, 0xff, 0x00, 0x00,
2343 0xcd, 0xab, 0x10, 0x00,
2344 0xff, 0xff, 0x10, 0x00
2345 };
2346
2347 /* expected test results */
2348 static const int32_t results[]={
2349 /* number of bytes read, code point */
2350 4, 0x61,
2351 4, 0x3061,
2352 4, 0xfffd,
2353 4, 0xfffd,
2354 4, 0xfffd,
2355 4, 0xfffe,
2356 4, 0x10abcd,
2357 4, 0x10ffff
2358 };
2359
2360 /* error test input */
2361 static const uint8_t in2[]={
2362 0x61, 0x00, 0x00, 0x00,
2363 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2364 0x62, 0x00, 0x00, 0x00,
2365 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2366 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2367 0x62, 0x01, 0x00, 0x00,
2368 0x62, 0x02, 0x00, 0x00,
2369 };
2370
2371 /* expected error test results */
2372 static const int32_t results2[]={
2373 /* number of bytes read, code point */
2374 4, 0x61,
2375 8, 0x62,
2376 12, 0x162,
2377 4, 0x262,
2378 };
2379
2380 UConverterToUCallback cb;
2381 const void *p;
2382
2383 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2384 UErrorCode errorCode=U_ZERO_ERROR;
2385 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2386 if(U_FAILURE(errorCode)) {
2387 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2388 return;
2389 }
2390 TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2391
2392 /* Test the condition when source >= sourceLimit */
2393 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2394
2395 /* test error behavior with a skip callback */
2396 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2397 source=(const char *)in2;
2398 limit=(const char *)(in2+sizeof(in2));
2399 TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2400
2401 ucnv_close(cnv);
2402 }
2403
2404 static void
TestLATIN1null2405 TestLATIN1() {
2406 /* test input */
2407 static const uint8_t in[]={
2408 0x61,
2409 0x31,
2410 0x32,
2411 0xc0,
2412 0xf0,
2413 0xf4,
2414 };
2415
2416 /* expected test results */
2417 static const int32_t results[]={
2418 /* number of bytes read, code point */
2419 1, 0x61,
2420 1, 0x31,
2421 1, 0x32,
2422 1, 0xc0,
2423 1, 0xf0,
2424 1, 0xf4,
2425 };
2426 static const uint16_t in1[] = {
2427 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2428 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2429 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2430 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2431 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2432 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2433 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2434 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2435 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2436 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2437 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2438 0xcb, 0x82
2439 };
2440 static const uint8_t out1[] = {
2441 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2442 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2443 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2444 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2445 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2446 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2447 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2448 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2449 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2450 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2451 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2452 0xcb, 0x82
2453 };
2454 static const uint16_t in2[]={
2455 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2456 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2457 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2458 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2459 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2460 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2461 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2462 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2463 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2464 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2465 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2466 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2467 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2468 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2469 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2470 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2471 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2472 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2473 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2474 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2475 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2476 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2477 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2478 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2479 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2480 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2481 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2482 0x37, 0x20, 0x2A, 0x2F,
2483 };
2484 static const unsigned char out2[]={
2485 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2486 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2487 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2488 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2489 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2490 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2491 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2492 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2493 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2494 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2495 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2496 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2497 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2498 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2499 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2500 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2501 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2502 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2503 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2504 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2505 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2506 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2507 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2508 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2509 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2510 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2511 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2512 0x37, 0x20, 0x2A, 0x2F,
2513 };
2514 const char *source=(const char *)in;
2515 const char *limit=(const char *)in+sizeof(in);
2516
2517 UErrorCode errorCode=U_ZERO_ERROR;
2518 UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2519 if(U_FAILURE(errorCode)) {
2520 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2521 return;
2522 }
2523 TestNextUChar(cnv, source, limit, results, "LATIN_1");
2524 /* Test the condition when source >= sourceLimit */
2525 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2526 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2527 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2528
2529 ucnv_close(cnv);
2530 }
2531
2532 static void
TestSBCSnull2533 TestSBCS() {
2534 /* test input */
2535 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2536 /* expected test results */
2537 static const int32_t results[]={
2538 /* number of bytes read, code point */
2539 1, 0x61,
2540 1, 0xbf,
2541 1, 0xc4,
2542 1, 0x2021,
2543 1, 0xf8ff,
2544 1, 0x00d9
2545 };
2546
2547 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2548 UErrorCode errorCode=U_ZERO_ERROR;
2549 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2550 if(U_FAILURE(errorCode)) {
2551 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2552 return;
2553 }
2554 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2555 /* Test the condition when source >= sourceLimit */
2556 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2557 /*Test for Illegal character */ /*
2558 {
2559 static const uint8_t input1[]={ 0xA1 };
2560 const char* illegalsource=(const char*)input1;
2561 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal character");
2562 }
2563 */
2564 ucnv_close(cnv);
2565 }
2566
2567 static void
TestDBCSnull2568 TestDBCS() {
2569 /* test input */
2570 static const uint8_t in[]={
2571 0x44, 0x6a,
2572 0xc4, 0x9c,
2573 0x7a, 0x74,
2574 0x46, 0xab,
2575 0x42, 0x5b,
2576
2577 };
2578
2579 /* expected test results */
2580 static const int32_t results[]={
2581 /* number of bytes read, code point */
2582 2, 0x00a7,
2583 2, 0xe1d2,
2584 2, 0x6962,
2585 2, 0xf842,
2586 2, 0xffe5,
2587 };
2588
2589 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2590 UErrorCode errorCode=U_ZERO_ERROR;
2591
2592 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2593 if(U_FAILURE(errorCode)) {
2594 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2595 return;
2596 }
2597 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2598 /* Test the condition when source >= sourceLimit */
2599 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2600 /*Test for the condition where there is an invalid character*/
2601 {
2602 static const uint8_t source2[]={0x1a, 0x1b};
2603 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2604 }
2605 /*Test for the condition where we have a truncated char*/
2606 {
2607 static const uint8_t source1[]={0xc4};
2608 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2609 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2610 }
2611 ucnv_close(cnv);
2612 }
2613
2614 static void
TestMBCSnull2615 TestMBCS() {
2616 /* test input */
2617 static const uint8_t in[]={
2618 0x01,
2619 0xa6, 0xa3,
2620 0x00,
2621 0xa6, 0xa1,
2622 0x08,
2623 0xc2, 0x76,
2624 0xc2, 0x78,
2625
2626 };
2627
2628 /* expected test results */
2629 static const int32_t results[]={
2630 /* number of bytes read, code point */
2631 1, 0x0001,
2632 2, 0x250c,
2633 1, 0x0000,
2634 2, 0x2500,
2635 1, 0x0008,
2636 2, 0xd60c,
2637 2, 0xd60e,
2638 };
2639
2640 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2641 UErrorCode errorCode=U_ZERO_ERROR;
2642
2643 UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2644 if(U_FAILURE(errorCode)) {
2645 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2646 return;
2647 }
2648 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2649 /* Test the condition when source >= sourceLimit */
2650 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2651 /*Test for the condition where there is an invalid character*/
2652 {
2653 static const uint8_t source2[]={0xa1, 0x80};
2654 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2655 }
2656 /*Test for the condition where we have a truncated char*/
2657 {
2658 static const uint8_t source1[]={0xc4};
2659 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2660 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2661 }
2662 ucnv_close(cnv);
2663
2664 }
2665
2666 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2667 static void
TestICCRunoutnull2668 TestICCRunout() {
2669 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2670
2671 const char *cnvName = "ibm-1363";
2672 UErrorCode status = U_ZERO_ERROR;
2673 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2674 /* UChar expectUData[] = { 0x00a1, 0x001a }; */
2675 const char *source = sourceData;
2676 const char *sourceLim = sourceData+sizeof(sourceData);
2677 UChar c1, c2, c3;
2678 UConverter *cnv=ucnv_open(cnvName, &status);
2679 if(U_FAILURE(status)) {
2680 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2681 return;
2682 }
2683
2684 #if 0
2685 {
2686 UChar targetBuf[256];
2687 UChar *target = targetBuf;
2688 UChar *targetLim = target+256;
2689 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, true, &status);
2690
2691 log_info("After convert: target@%d, source@%d, status%s\n",
2692 target-targetBuf, source-sourceData, u_errorName(status));
2693
2694 if(U_FAILURE(status)) {
2695 log_err("Failed to convert: %s\n", u_errorName(status));
2696 } else {
2697
2698 }
2699 }
2700 #endif
2701
2702 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2703 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2704
2705 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2706 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2707
2708 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2709 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2710
2711 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2712 log_verbose("OK\n");
2713 } else {
2714 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2715 }
2716
2717 ucnv_close(cnv);
2718
2719 }
2720 #endif
2721
2722 #ifdef U_ENABLE_GENERIC_ISO_2022
2723
2724 static void
TestISO_2022null2725 TestISO_2022() {
2726 /* test input */
2727 static const uint8_t in[]={
2728 0x1b, 0x25, 0x42,
2729 0x31,
2730 0x32,
2731 0x61,
2732 0xc2, 0x80,
2733 0xe0, 0xa0, 0x80,
2734 0xf0, 0x90, 0x80, 0x80
2735 };
2736
2737
2738
2739 /* expected test results */
2740 static const int32_t results[]={
2741 /* number of bytes read, code point */
2742 4, 0x0031, /* 4 bytes including the escape sequence */
2743 1, 0x0032,
2744 1, 0x61,
2745 2, 0x80,
2746 3, 0x800,
2747 4, 0x10000
2748 };
2749
2750 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2751 UErrorCode errorCode=U_ZERO_ERROR;
2752 UConverter *cnv;
2753
2754 cnv=ucnv_open("ISO_2022", &errorCode);
2755 if(U_FAILURE(errorCode)) {
2756 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2757 return;
2758 }
2759 TestNextUChar(cnv, source, limit, results, "ISO_2022");
2760
2761 /* Test the condition when source >= sourceLimit */
2762 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2763 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2764 /*Test for the condition where we have a truncated char*/
2765 {
2766 static const uint8_t source1[]={0xc4};
2767 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2768 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2769 }
2770 /*Test for the condition where there is an invalid character*/
2771 {
2772 static const uint8_t source2[]={0xa1, 0x01};
2773 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2774 }
2775 ucnv_close(cnv);
2776 }
2777
2778 #endif
2779
2780 static void
TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv)2781 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2782 const UChar* uSource;
2783 const UChar* uSourceLimit;
2784 const char* cSource;
2785 const char* cSourceLimit;
2786 UChar *uTargetLimit =NULL;
2787 UChar *uTarget;
2788 char *cTarget;
2789 const char *cTargetLimit;
2790 char *cBuf;
2791 UChar *uBuf; /*,*test;*/
2792 int32_t uBufSize = 120;
2793 int len=0;
2794 int i=2;
2795 UErrorCode errorCode=U_ZERO_ERROR;
2796 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2797 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2798 ucnv_reset(cnv);
2799 for(;--i>0; ){
2800 uSource = (UChar*) source;
2801 uSourceLimit=(const UChar*)sourceLimit;
2802 cTarget = cBuf;
2803 uTarget = uBuf;
2804 cSource = cBuf;
2805 cTargetLimit = cBuf;
2806 uTargetLimit = uBuf;
2807
2808 do{
2809
2810 cTargetLimit = cTargetLimit+ i;
2811 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,false, &errorCode);
2812 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2813 errorCode=U_ZERO_ERROR;
2814 continue;
2815 }
2816
2817 if(U_FAILURE(errorCode)){
2818 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2819 return;
2820 }
2821
2822 }while (uSource<uSourceLimit);
2823
2824 cSourceLimit =cTarget;
2825 do{
2826 uTargetLimit=uTargetLimit+i;
2827 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,false,&errorCode);
2828 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2829 errorCode=U_ZERO_ERROR;
2830 continue;
2831 }
2832 if(U_FAILURE(errorCode)){
2833 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2834 return;
2835 }
2836 }while(cSource<cSourceLimit);
2837
2838 uSource = source;
2839 /*test =uBuf;*/
2840 for(len=0;len<(int)(source - sourceLimit);len++){
2841 if(uBuf[len]!=uSource[len]){
2842 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2843 }
2844 }
2845 }
2846 free(uBuf);
2847 free(cBuf);
2848 }
2849 /* Test for Jitterbug 778 */
TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv)2850 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2851 const UChar* uSource;
2852 const UChar* uSourceLimit;
2853 const char* cSource;
2854 UChar *uTargetLimit =NULL;
2855 UChar *uTarget;
2856 char *cTarget;
2857 const char *cTargetLimit;
2858 char *cBuf;
2859 UChar *uBuf,*test;
2860 int32_t uBufSize = 120;
2861 int numCharsInTarget=0;
2862 UErrorCode errorCode=U_ZERO_ERROR;
2863 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2864 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2865 uSource = source;
2866 uSourceLimit=sourceLimit;
2867 cTarget = cBuf;
2868 cTargetLimit = cBuf +uBufSize*5;
2869 uTarget = uBuf;
2870 uTargetLimit = uBuf+ uBufSize*5;
2871 ucnv_reset(cnv);
2872 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2873 if(U_FAILURE(errorCode)){
2874 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2875 return;
2876 }
2877 cSource = cBuf;
2878 test =uBuf;
2879 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2880 if(U_FAILURE(errorCode)){
2881 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2882 return;
2883 }
2884 uSource = source;
2885 while(uSource<uSourceLimit){
2886 if(*test!=*uSource){
2887
2888 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2889 }
2890 uSource++;
2891 test++;
2892 }
2893 free(uBuf);
2894 free(cBuf);
2895 }
2896
TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv)2897 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2898 const UChar* uSource;
2899 const UChar* uSourceLimit;
2900 const char* cSource;
2901 const char* cSourceLimit;
2902 UChar *uTargetLimit =NULL;
2903 UChar *uTarget;
2904 char *cTarget;
2905 const char *cTargetLimit;
2906 char *cBuf;
2907 UChar *uBuf; /*,*test;*/
2908 int32_t uBufSize = 120;
2909 int len=0;
2910 int i=2;
2911 const UChar *temp = sourceLimit;
2912 UErrorCode errorCode=U_ZERO_ERROR;
2913 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2914 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2915
2916 ucnv_reset(cnv);
2917 for(;--i>0;){
2918 uSource = (UChar*) source;
2919 cTarget = cBuf;
2920 uTarget = uBuf;
2921 cSource = cBuf;
2922 cTargetLimit = cBuf;
2923 uTargetLimit = uBuf+uBufSize*5;
2924 cTargetLimit = cTargetLimit+uBufSize*10;
2925 uSourceLimit=uSource;
2926 do{
2927
2928 if (uSourceLimit < sourceLimit) {
2929 uSourceLimit = uSourceLimit+1;
2930 }
2931 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,false, &errorCode);
2932 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2933 errorCode=U_ZERO_ERROR;
2934 continue;
2935 }
2936
2937 if(U_FAILURE(errorCode)){
2938 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2939 return;
2940 }
2941
2942 }while (uSource<temp);
2943
2944 cSourceLimit =cBuf;
2945 do{
2946 if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2947 cSourceLimit = cSourceLimit+1;
2948 }
2949 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,false,&errorCode);
2950 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2951 errorCode=U_ZERO_ERROR;
2952 continue;
2953 }
2954 if(U_FAILURE(errorCode)){
2955 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2956 return;
2957 }
2958 }while(cSource<cTarget);
2959
2960 uSource = source;
2961 /*test =uBuf;*/
2962 for(;len<(int)(source - sourceLimit);len++){
2963 if(uBuf[len]!=uSource[len]){
2964 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2965 }
2966 }
2967 }
2968 free(uBuf);
2969 free(cBuf);
2970 }
2971 static void
TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, const uint16_t results[], const char* message)2972 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2973 const uint16_t results[], const char* message){
2974 /* const char* s0; */
2975 const char* s=(char*)source;
2976 const uint16_t *r=results;
2977 UErrorCode errorCode=U_ZERO_ERROR;
2978 uint32_t c,exC;
2979 ucnv_reset(cnv);
2980 while(s<limit) {
2981 /* s0=s; */
2982 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2983 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2984 break; /* no more significant input */
2985 } else if(U_FAILURE(errorCode)) {
2986 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2987 break;
2988 } else {
2989 if(U16_IS_LEAD(*r)){
2990 int i =0, len = 2;
2991 U16_NEXT(r, i, len, exC);
2992 r++;
2993 }else{
2994 exC = *r;
2995 }
2996 if(c!=(uint32_t)(exC))
2997 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c);
2998 }
2999 r++;
3000 }
3001 }
3002
TestJitterbug930(const char* enc)3003 static int TestJitterbug930(const char* enc){
3004 UErrorCode err = U_ZERO_ERROR;
3005 UConverter*converter;
3006 char out[80];
3007 char*target = out;
3008 UChar in[4];
3009 const UChar*source = in;
3010 int32_t off[80];
3011 int32_t* offsets = off;
3012 int numOffWritten=0;
3013 UBool flush = 0;
3014 converter = my_ucnv_open(enc, &err);
3015
3016 in[0] = 0x41; /* 0x4E00;*/
3017 in[1] = 0x4E01;
3018 in[2] = 0x4E02;
3019 in[3] = 0x4E03;
3020
3021 memset(off, '*', sizeof(off));
3022
3023 ucnv_fromUnicode (converter,
3024 &target,
3025 target+2,
3026 &source,
3027 source+3,
3028 offsets,
3029 flush,
3030 &err);
3031
3032 /* writes three bytes into the output buffer: 41 1B 24
3033 * but offsets contains 0 1 1
3034 */
3035 while(*offsets< off[10]){
3036 numOffWritten++;
3037 offsets++;
3038 }
3039 log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3040 if(numOffWritten!= (int)(target-out)){
3041 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3042 }
3043
3044 err = U_ZERO_ERROR;
3045
3046 memset(off,'*' , sizeof(off));
3047
3048 flush = 1;
3049 offsets=off;
3050 ucnv_fromUnicode (converter,
3051 &target,
3052 target+4,
3053 &source,
3054 source,
3055 offsets,
3056 flush,
3057 &err);
3058 numOffWritten=0;
3059 while(*offsets< off[10]){
3060 numOffWritten++;
3061 if(*offsets!= -1){
3062 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3063 }
3064 offsets++;
3065 }
3066
3067 /* writes 42 43 7A into output buffer,
3068 * offsets contains -1 -1 -1
3069 */
3070 ucnv_close(converter);
3071 return 0;
3072 }
3073
3074 static void
TestHZnull3075 TestHZ() {
3076 /* test input */
3077 static const uint16_t in[]={
3078 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3079 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3080 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3081 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3082 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3083 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3084 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3085 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3086 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3087 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3088 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3089 0x005A, 0x005B, 0x005C, 0x000A
3090 };
3091 const UChar* uSource;
3092 const UChar* uSourceLimit;
3093 const char* cSource;
3094 const char* cSourceLimit;
3095 UChar *uTargetLimit =NULL;
3096 UChar *uTarget;
3097 char *cTarget;
3098 const char *cTargetLimit;
3099 char *cBuf = NULL;
3100 UChar *uBuf = NULL;
3101 UChar *test;
3102 int32_t uBufSize = 120;
3103 UErrorCode errorCode=U_ZERO_ERROR;
3104 UConverter *cnv = NULL;
3105 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3106 int32_t* myOff= offsets;
3107 cnv=ucnv_open("HZ", &errorCode);
3108 if(U_FAILURE(errorCode)) {
3109 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3110 goto cleanup;
3111 }
3112
3113 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3114 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3115 uSource = (const UChar*)in;
3116 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3117 cTarget = cBuf;
3118 cTargetLimit = cBuf +uBufSize*5;
3119 uTarget = uBuf;
3120 uTargetLimit = uBuf+ uBufSize*5;
3121 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3122 if(U_FAILURE(errorCode)){
3123 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3124 goto cleanup;
3125 }
3126 cSource = cBuf;
3127 cSourceLimit =cTarget;
3128 test =uBuf;
3129 myOff=offsets;
3130 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3131 if(U_FAILURE(errorCode)){
3132 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3133 goto cleanup;
3134 }
3135 uSource = (const UChar*)in;
3136 while(uSource<uSourceLimit){
3137 if(*test!=*uSource){
3138
3139 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3140 }
3141 uSource++;
3142 test++;
3143 }
3144 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3145 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3146 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3147 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3148 TestJitterbug930("csISO2022JP");
3149
3150 cleanup:
3151 ucnv_close(cnv);
3152 free(offsets);
3153 free(uBuf);
3154 free(cBuf);
3155 }
3156
3157 static void
TestISCIInull3158 TestISCII(){
3159 /* test input */
3160 static const uint16_t in[]={
3161 /* test full range of Devanagari */
3162 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3163 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3164 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3165 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3166 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3167 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3168 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3169 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3170 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3171 0x096D,0x096E,0x096F,
3172 /* test Soft halant*/
3173 0x0915,0x094d, 0x200D,
3174 /* test explicit halant */
3175 0x0915,0x094d, 0x200c,
3176 /* test double danda */
3177 0x965,
3178 /* test ASCII */
3179 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3180 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3181 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3182 /* tests from Lotus */
3183 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3184 0x0930,0x094D,0x200D,
3185 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3186 0x0915,0x0921,0x002B,0x095F,
3187 /* tamil range */
3188 0x0B86, 0xB87, 0xB88,
3189 /* telugu range */
3190 0x0C05, 0x0C02, 0x0C03,0x0c31,
3191 /* kannada range */
3192 0x0C85, 0xC82, 0x0C83,
3193 /* test Abbr sign and Anudatta */
3194 0x0970, 0x952,
3195 /* 0x0958,
3196 0x0959,
3197 0x095A,
3198 0x095B,
3199 0x095C,
3200 0x095D,
3201 0x095E,
3202 0x095F,*/
3203 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3204 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3205 0x090C ,
3206 0x0962,
3207 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3208 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3209 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3210 0x093D /* Avagraha 0xEA, 0xE9*/,
3211 0x0958,
3212 0x0959,
3213 0x095A,
3214 0x095B,
3215 0x095C,
3216 0x095D,
3217 0x095E,
3218 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3219 };
3220 static const unsigned char byteArr[]={
3221
3222 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3223 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3224 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3225 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3226 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3227 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3228 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3229 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3230 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3231 0xf8,0xf9,0xfa,
3232 /* test soft halant */
3233 0xb3, 0xE8, 0xE9,
3234 /* test explicit halant */
3235 0xb3, 0xE8, 0xE8,
3236 /* test double danda */
3237 0xea, 0xea,
3238 /* test ASCII */
3239 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3240 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3241 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3242 /* test ATR code */
3243
3244 /* tests from Lotus */
3245 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3246 0xEF,0x42,0xCF,0xE8,0xD9,
3247 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3248 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3249 /* tamil range */
3250 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3251 /* telugu range */
3252 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3253 /* kannada range */
3254 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3255 /* anudatta and abbreviation sign */
3256 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3257
3258
3259 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3260
3261 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3262
3263 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3264
3265 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3266
3267 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3268
3269 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3270
3271 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3272
3273 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3274
3275 0xB3, 0xE9, /* Ka + NUKTA */
3276
3277 0xB4, 0xE9, /* Kha + NUKTA */
3278
3279 0xB5, 0xE9, /* Ga + NUKTA */
3280
3281 0xBA, 0xE9,
3282
3283 0xBF, 0xE9,
3284
3285 0xC0, 0xE9,
3286
3287 0xC9, 0xE9,
3288 /* INV halant RA */
3289 0xD9, 0xE8, 0xCF,
3290 0x00, 0x00A0,
3291 /* just consume unhandled codepoints */
3292 0xEF, 0x30,
3293
3294 };
3295 testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,true);
3296 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3297
3298 }
3299
3300 static void
TestISO_2022_JPnull3301 TestISO_2022_JP() {
3302 /* test input */
3303 static const uint16_t in[]={
3304 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3305 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3306 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3307 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3308 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3309 0x201D, 0x3014, 0x000D, 0x000A,
3310 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3311 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3312 };
3313 const UChar* uSource;
3314 const UChar* uSourceLimit;
3315 const char* cSource;
3316 const char* cSourceLimit;
3317 UChar *uTargetLimit =NULL;
3318 UChar *uTarget;
3319 char *cTarget;
3320 const char *cTargetLimit;
3321 char *cBuf = NULL;
3322 UChar *uBuf = NULL;
3323 UChar *test;
3324 int32_t uBufSize = 120;
3325 UErrorCode errorCode=U_ZERO_ERROR;
3326 UConverter *cnv = NULL;
3327 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3328 int32_t* myOff= offsets;
3329 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3330 if(U_FAILURE(errorCode)) {
3331 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3332 goto cleanup;
3333 }
3334
3335 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3336 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3337 uSource = (const UChar*)in;
3338 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3339 cTarget = cBuf;
3340 cTargetLimit = cBuf +uBufSize*5;
3341 uTarget = uBuf;
3342 uTargetLimit = uBuf+ uBufSize*5;
3343 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3344 if(U_FAILURE(errorCode)){
3345 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3346 goto cleanup;
3347 }
3348 cSource = cBuf;
3349 cSourceLimit =cTarget;
3350 test =uBuf;
3351 myOff=offsets;
3352 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3353 if(U_FAILURE(errorCode)){
3354 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3355 goto cleanup;
3356 }
3357
3358 uSource = (const UChar*)in;
3359 while(uSource<uSourceLimit){
3360 if(*test!=*uSource){
3361
3362 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3363 }
3364 uSource++;
3365 test++;
3366 }
3367
3368 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3369 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3370 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3371 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3372 TestJitterbug930("csISO2022JP");
3373
3374 cleanup:
3375 ucnv_close(cnv);
3376 free(uBuf);
3377 free(cBuf);
3378 free(offsets);
3379 }
3380
TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen)3381 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3382 const UChar* uSource;
3383 const UChar* uSourceLimit;
3384 const char* cSource;
3385 const char* cSourceLimit;
3386 UChar *uTargetLimit =NULL;
3387 UChar *uTarget;
3388 char *cTarget;
3389 const char *cTargetLimit;
3390 char *cBuf;
3391 UChar *uBuf,*test;
3392 int32_t uBufSize = 120*10;
3393 UErrorCode errorCode=U_ZERO_ERROR;
3394 UConverter *cnv;
3395 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3396 int32_t* myOff= offsets;
3397 cnv=my_ucnv_open(conv, &errorCode);
3398 if(U_FAILURE(errorCode)) {
3399 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3400 return;
3401 }
3402
3403 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
3404 cBuf =(char*)malloc(uBufSize * sizeof(char));
3405 uSource = (const UChar*)in;
3406 uSourceLimit=uSource+len;
3407 cTarget = cBuf;
3408 cTargetLimit = cBuf +uBufSize;
3409 uTarget = uBuf;
3410 uTargetLimit = uBuf+ uBufSize;
3411 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3412 if(U_FAILURE(errorCode)){
3413 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3414 return;
3415 }
3416 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3417 cSource = cBuf;
3418 cSourceLimit =cTarget;
3419 test =uBuf;
3420 myOff=offsets;
3421 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3422 if(U_FAILURE(errorCode)){
3423 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3424 return;
3425 }
3426
3427 uSource = (const UChar*)in;
3428 while(uSource<uSourceLimit){
3429 if(*test!=*uSource){
3430 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3431 }
3432 uSource++;
3433 test++;
3434 }
3435 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3436 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3437 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3438 if(byteArr && byteArrLen!=0){
3439 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3440 TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3441 {
3442 cSource = byteArr;
3443 cSourceLimit = cSource+byteArrLen;
3444 test=uBuf;
3445 myOff = offsets;
3446 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3447 if(U_FAILURE(errorCode)){
3448 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3449 return;
3450 }
3451
3452 uSource = (const UChar*)in;
3453 while(uSource<uSourceLimit){
3454 if(*test!=*uSource){
3455 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3456 }
3457 uSource++;
3458 test++;
3459 }
3460 }
3461 }
3462
3463 ucnv_close(cnv);
3464 free(uBuf);
3465 free(cBuf);
3466 free(offsets);
3467 }
3468 static UChar U_CALLCONV
_charAt(int32_t offset, void *context)3469 _charAt(int32_t offset, void *context) {
3470 return ((char*)context)[offset];
3471 }
3472
3473 static int32_t
unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status)3474 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3475 int32_t srcIndex=0;
3476 int32_t dstIndex=0;
3477 if(U_FAILURE(*status)){
3478 return 0;
3479 }
3480 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3481 *status = U_ILLEGAL_ARGUMENT_ERROR;
3482 return 0;
3483 }
3484 if(srcLen==-1){
3485 srcLen = (int32_t)uprv_strlen(src);
3486 }
3487
3488 for (; srcIndex<srcLen; ) {
3489 UChar32 c = src[srcIndex++];
3490 if (c == 0x005C /*'\\'*/) {
3491 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3492 if (c == (UChar32)0xFFFFFFFF) {
3493 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3494 break; /* invalid escape sequence */
3495 }
3496 }
3497 if(dstIndex < dstLen){
3498 if(c>0xFFFF){
3499 dst[dstIndex++] = U16_LEAD(c);
3500 if(dstIndex<dstLen){
3501 dst[dstIndex]=U16_TRAIL(c);
3502 }else{
3503 *status=U_BUFFER_OVERFLOW_ERROR;
3504 }
3505 }else{
3506 dst[dstIndex]=(UChar)c;
3507 }
3508
3509 }else{
3510 *status = U_BUFFER_OVERFLOW_ERROR;
3511 }
3512 dstIndex++; /* for preflighting */
3513 }
3514 return dstIndex;
3515 }
3516
3517 static void
TestFullRoundtrip(const char* cp)3518 TestFullRoundtrip(const char* cp){
3519 UChar usource[10] ={0};
3520 UChar nsrc[10] = {0};
3521 uint32_t i=1;
3522 int len=0, ulen;
3523 nsrc[0]=0x0061;
3524 /* Test codepoint 0 */
3525 TestConv(usource,1,cp,"",NULL,0);
3526 TestConv(usource,2,cp,"",NULL,0);
3527 nsrc[2]=0x5555;
3528 TestConv(nsrc,3,cp,"",NULL,0);
3529
3530 for(;i<=0x10FFFF;i++){
3531 if(i==0xD800){
3532 i=0xDFFF;
3533 continue;
3534 }
3535 if(i<=0xFFFF){
3536 usource[0] =(UChar) i;
3537 len=1;
3538 }else{
3539 usource[0]=U16_LEAD(i);
3540 usource[1]=U16_TRAIL(i);
3541 len=2;
3542 }
3543 ulen=len;
3544 if(i==0x80) {
3545 usource[2]=0;
3546 }
3547 /* Test only single code points */
3548 TestConv(usource,ulen,cp,"",NULL,0);
3549 /* Test codepoint repeated twice */
3550 usource[ulen]=usource[0];
3551 usource[ulen+1]=usource[1];
3552 ulen+=len;
3553 TestConv(usource,ulen,cp,"",NULL,0);
3554 /* Test codepoint repeated 3 times */
3555 usource[ulen]=usource[0];
3556 usource[ulen+1]=usource[1];
3557 ulen+=len;
3558 TestConv(usource,ulen,cp,"",NULL,0);
3559 /* Test codepoint in between 2 codepoints */
3560 nsrc[1]=usource[0];
3561 nsrc[2]=usource[1];
3562 nsrc[len+1]=0x5555;
3563 TestConv(nsrc,len+2,cp,"",NULL,0);
3564 uprv_memset(usource,0,sizeof(UChar)*10);
3565 }
3566 }
3567
3568 static void
TestRoundTrippingAllUTF(void)3569 TestRoundTrippingAllUTF(void){
3570 if(!getTestOption(QUICK_OPTION)){
3571 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3572 TestFullRoundtrip("BOCU-1");
3573 log_verbose("Running exhaustive round trip test for SCSU\n");
3574 TestFullRoundtrip("SCSU");
3575 log_verbose("Running exhaustive round trip test for UTF-8\n");
3576 TestFullRoundtrip("UTF-8");
3577 log_verbose("Running exhaustive round trip test for CESU-8\n");
3578 TestFullRoundtrip("CESU-8");
3579 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3580 TestFullRoundtrip("UTF-16BE");
3581 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3582 TestFullRoundtrip("UTF-16LE");
3583 log_verbose("Running exhaustive round trip test for UTF-16\n");
3584 TestFullRoundtrip("UTF-16");
3585 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3586 TestFullRoundtrip("UTF-32BE");
3587 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3588 TestFullRoundtrip("UTF-32LE");
3589 log_verbose("Running exhaustive round trip test for UTF-32\n");
3590 TestFullRoundtrip("UTF-32");
3591 log_verbose("Running exhaustive round trip test for UTF-7\n");
3592 TestFullRoundtrip("UTF-7");
3593 log_verbose("Running exhaustive round trip test for UTF-7\n");
3594 TestFullRoundtrip("UTF-7,version=1");
3595 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3596 TestFullRoundtrip("IMAP-mailbox-name");
3597 /*
3598 *
3599 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3600 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3601 * The old mappings remain as fallbacks.
3602 * This test may be reintroduced at a later time.
3603 *
3604 * 110118 - mow
3605 */
3606 /*
3607 log_verbose("Running exhaustive round trip test for GB18030\n");
3608 TestFullRoundtrip("GB18030");
3609 */
3610 }
3611 }
3612
3613 static void
TestSCSUnull3614 TestSCSU() {
3615
3616 static const uint16_t germanUTF16[]={
3617 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3618 };
3619
3620 static const uint8_t germanSCSU[]={
3621 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3622 };
3623
3624 static const uint16_t russianUTF16[]={
3625 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3626 };
3627
3628 static const uint8_t russianSCSU[]={
3629 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3630 };
3631
3632 static const uint16_t japaneseUTF16[]={
3633 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3634 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3635 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3636 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3637 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3638 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3639 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3640 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3641 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3642 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3643 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3644 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3645 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3646 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3647 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3648 };
3649
3650 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3651 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3652 static const uint8_t japaneseSCSU[]={
3653 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3654 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3655 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3656 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3657 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3658 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3659 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3660 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3661 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3662 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3663 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3664 0xcb, 0x82
3665 };
3666
3667 static const uint16_t allFeaturesUTF16[]={
3668 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3669 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3670 0x01df, 0xf000, 0xdbff, 0xdfff
3671 };
3672
3673 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3674 * result here (34B vs. 35B)
3675 */
3676 static const uint8_t allFeaturesSCSU[]={
3677 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3678 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3679 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3680 0xdf, 0x14, 0x80, 0x15, 0xff
3681 };
3682 static const uint16_t monkeyIn[]={
3683 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3684 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3685 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3686 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3687 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3688 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3689 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3690 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3691 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3692 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3693 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3694 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3695 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3696 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3697 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3698 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3699 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3700 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3701 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3702 /* test non-BMP code points */
3703 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3704 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3705 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3706 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3707 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3708 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3709 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3710 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3711 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3712 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3713 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3714
3715
3716 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3717 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3718 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3719 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3720 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3721 };
3722 static const char *fTestCases [] = {
3723 "\\ud800\\udc00", /* smallest surrogate*/
3724 "\\ud8ff\\udcff",
3725 "\\udBff\\udFff", /* largest surrogate pair*/
3726 "\\ud834\\udc00",
3727 "\\U0010FFFF",
3728 "Hello \\u9292 \\u9192 World!",
3729 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3730 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3731
3732 "\\u0648\\u06c8", /* catch missing reset*/
3733 "\\u0648\\u06c8",
3734
3735 "\\u4444\\uE001", /* lowest quotable*/
3736 "\\u4444\\uf2FF", /* highest quotable*/
3737 "\\u4444\\uf188\\u4444",
3738 "\\u4444\\uf188\\uf288",
3739 "\\u4444\\uf188abc\\u0429\\uf288",
3740 "\\u9292\\u2222",
3741 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3742 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3743 "Hello World!123456",
3744 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3745
3746 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3747 "abc\\u4411d", /* uses SQU*/
3748 "abc\\u4411\\u4412d",/* uses SCU*/
3749 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3750 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3751 "\\u9292\\u2222",
3752 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3753 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3754 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3755
3756 "", /* empty input*/
3757 "\\u0000", /* smallest BMP character*/
3758 "\\uFFFF", /* largest BMP character*/
3759
3760 /* regression tests*/
3761 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3762 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3763 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3764 "\\u0041\\u00df\\u0401\\u015f",
3765 "\\u9066\\u2123abc",
3766 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3767 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3768 };
3769 int i=0;
3770 for(;i<UPRV_LENGTHOF(fTestCases);i++){
3771 const char* cSrc = fTestCases[i];
3772 UErrorCode status = U_ZERO_ERROR;
3773 int32_t cSrcLen,srcLen;
3774 UChar* src;
3775 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3776 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3777 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3778 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3779 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3780 TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3781 free(src);
3782 }
3783 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3784 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3785 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3786 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3787 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3788 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3789 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3790 }
3791
3792 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug2346null3793 static void TestJitterbug2346(){
3794 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3795 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3796 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3797
3798 UChar uTarget[500]={'\0'};
3799 UChar* utarget=uTarget;
3800 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3801
3802 char cTarget[500]={'\0'};
3803 char* ctarget=cTarget;
3804 char* ctargetLimit=cTarget+sizeof(cTarget);
3805 const char* csource=source;
3806 UChar* temp = expected;
3807 UErrorCode err=U_ZERO_ERROR;
3808
3809 UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3810 if(U_FAILURE(err)) {
3811 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3812 return;
3813 }
3814 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,true,&err);
3815 if(U_FAILURE(err)) {
3816 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3817 return;
3818 }
3819 utargetLimit=utarget;
3820 utarget = uTarget;
3821 while(utarget<utargetLimit){
3822 if(*temp!=*utarget){
3823
3824 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3825 }
3826 utarget++;
3827 temp++;
3828 }
3829 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,true,&err);
3830 if(U_FAILURE(err)) {
3831 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3832 return;
3833 }
3834 ctargetLimit=ctarget;
3835 ctarget =cTarget;
3836 ucnv_close(conv);
3837
3838
3839 }
3840
3841 static void
TestISO_2022_JP_1null3842 TestISO_2022_JP_1() {
3843 /* test input */
3844 static const uint16_t in[]={
3845 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3846 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3847 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3848 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3849 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3850 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3851 0x201D, 0x000D, 0x000A,
3852 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3853 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3854 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3855 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3856 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3857 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3858 };
3859 const UChar* uSource;
3860 const UChar* uSourceLimit;
3861 const char* cSource;
3862 const char* cSourceLimit;
3863 UChar *uTargetLimit =NULL;
3864 UChar *uTarget;
3865 char *cTarget;
3866 const char *cTargetLimit;
3867 char *cBuf;
3868 UChar *uBuf,*test;
3869 int32_t uBufSize = 120;
3870 UErrorCode errorCode=U_ZERO_ERROR;
3871 UConverter *cnv;
3872
3873 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3874 if(U_FAILURE(errorCode)) {
3875 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3876 return;
3877 }
3878
3879 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3880 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3881 uSource = (const UChar*)in;
3882 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3883 cTarget = cBuf;
3884 cTargetLimit = cBuf +uBufSize*5;
3885 uTarget = uBuf;
3886 uTargetLimit = uBuf+ uBufSize*5;
3887 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,true, &errorCode);
3888 if(U_FAILURE(errorCode)){
3889 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3890 return;
3891 }
3892 cSource = cBuf;
3893 cSourceLimit =cTarget;
3894 test =uBuf;
3895 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,true,&errorCode);
3896 if(U_FAILURE(errorCode)){
3897 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3898 return;
3899 }
3900 uSource = (const UChar*)in;
3901 while(uSource<uSourceLimit){
3902 if(*test!=*uSource){
3903
3904 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3905 }
3906 uSource++;
3907 test++;
3908 }
3909 /*ucnv_close(cnv);
3910 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3911 /*Test for the condition where there is an invalid character*/
3912 ucnv_reset(cnv);
3913 {
3914 static const uint8_t source2[]={0x0e,0x24,0x053};
3915 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3916 }
3917 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3918 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3919 ucnv_close(cnv);
3920 free(uBuf);
3921 free(cBuf);
3922 }
3923
3924 static void
TestISO_2022_JP_2null3925 TestISO_2022_JP_2() {
3926 /* test input */
3927 static const uint16_t in[]={
3928 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3929 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3930 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3931 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3932 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3933 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3934 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3935 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3936 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3937 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3938 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3939 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3940 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3941 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3942 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3943 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3944 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3945 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3946 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3947 };
3948 const UChar* uSource;
3949 const UChar* uSourceLimit;
3950 const char* cSource;
3951 const char* cSourceLimit;
3952 UChar *uTargetLimit =NULL;
3953 UChar *uTarget;
3954 char *cTarget;
3955 const char *cTargetLimit;
3956 char *cBuf = NULL;
3957 UChar *uBuf = NULL;
3958 UChar *test;
3959 int32_t uBufSize = 120;
3960 UErrorCode errorCode=U_ZERO_ERROR;
3961 UConverter *cnv = NULL;
3962 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3963 int32_t* myOff= offsets;
3964 cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3965 if(U_FAILURE(errorCode)) {
3966 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3967 goto cleanup;
3968 }
3969
3970 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3971 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3972 uSource = (const UChar*)in;
3973 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3974 cTarget = cBuf;
3975 cTargetLimit = cBuf +uBufSize*5;
3976 uTarget = uBuf;
3977 uTargetLimit = uBuf+ uBufSize*5;
3978 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3979 if(U_FAILURE(errorCode)){
3980 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3981 goto cleanup;
3982 }
3983 cSource = cBuf;
3984 cSourceLimit =cTarget;
3985 test =uBuf;
3986 myOff=offsets;
3987 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3988 if(U_FAILURE(errorCode)){
3989 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3990 goto cleanup;
3991 }
3992 uSource = (const UChar*)in;
3993 while(uSource<uSourceLimit){
3994 if(*test!=*uSource){
3995
3996 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3997 }
3998 uSource++;
3999 test++;
4000 }
4001 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4002 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4003 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4004 /*Test for the condition where there is an invalid character*/
4005 ucnv_reset(cnv);
4006 {
4007 static const uint8_t source2[]={0x0e,0x24,0x053};
4008 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
4009 }
4010
4011 cleanup:
4012 ucnv_close(cnv);
4013 free(uBuf);
4014 free(cBuf);
4015 free(offsets);
4016 }
4017
4018 static void
TestISO_2022_KRnull4019 TestISO_2022_KR() {
4020 /* test input */
4021 static const uint16_t in[]={
4022 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4023 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4024 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4025 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4026 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4027 ,0x53E3,0x53E4,0x000A,0x000D};
4028 const UChar* uSource;
4029 const UChar* uSourceLimit;
4030 const char* cSource;
4031 const char* cSourceLimit;
4032 UChar *uTargetLimit =NULL;
4033 UChar *uTarget;
4034 char *cTarget;
4035 const char *cTargetLimit;
4036 char *cBuf = NULL;
4037 UChar *uBuf = NULL;
4038 UChar *test;
4039 int32_t uBufSize = 120;
4040 UErrorCode errorCode=U_ZERO_ERROR;
4041 UConverter *cnv = NULL;
4042 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4043 int32_t* myOff= offsets;
4044 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4045 if(U_FAILURE(errorCode)) {
4046 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4047 goto cleanup;
4048 }
4049
4050 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4051 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4052 uSource = (const UChar*)in;
4053 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4054 cTarget = cBuf;
4055 cTargetLimit = cBuf +uBufSize*5;
4056 uTarget = uBuf;
4057 uTargetLimit = uBuf+ uBufSize*5;
4058 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4059 if(U_FAILURE(errorCode)){
4060 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4061 goto cleanup;
4062 }
4063 cSource = cBuf;
4064 cSourceLimit =cTarget;
4065 test =uBuf;
4066 myOff=offsets;
4067 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4068 if(U_FAILURE(errorCode)){
4069 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4070 goto cleanup;
4071 }
4072 uSource = (const UChar*)in;
4073 while(uSource<uSourceLimit){
4074 if(*test!=*uSource){
4075 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4076 }
4077 uSource++;
4078 test++;
4079 }
4080 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4081 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4082 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4083 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4084 TestJitterbug930("csISO2022KR");
4085 /*Test for the condition where there is an invalid character*/
4086 ucnv_reset(cnv);
4087 {
4088 static const uint8_t source2[]={0x1b,0x24,0x053};
4089 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4090 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4091 }
4092
4093 cleanup:
4094 ucnv_close(cnv);
4095 free(uBuf);
4096 free(cBuf);
4097 free(offsets);
4098 }
4099
4100 static void
TestISO_2022_KR_1null4101 TestISO_2022_KR_1() {
4102 /* test input */
4103 static const uint16_t in[]={
4104 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4105 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4106 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4107 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4108 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4109 ,0x53E3,0x53E4,0x000A,0x000D};
4110 const UChar* uSource;
4111 const UChar* uSourceLimit;
4112 const char* cSource;
4113 const char* cSourceLimit;
4114 UChar *uTargetLimit =NULL;
4115 UChar *uTarget;
4116 char *cTarget;
4117 const char *cTargetLimit;
4118 char *cBuf = NULL;
4119 UChar *uBuf = NULL;
4120 UChar *test;
4121 int32_t uBufSize = 120;
4122 UErrorCode errorCode=U_ZERO_ERROR;
4123 UConverter *cnv = NULL;
4124 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4125 int32_t* myOff= offsets;
4126 cnv=ucnv_open("ibm-25546", &errorCode);
4127 if(U_FAILURE(errorCode)) {
4128 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4129 goto cleanup;
4130 }
4131
4132 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4133 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4134 uSource = (const UChar*)in;
4135 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4136 cTarget = cBuf;
4137 cTargetLimit = cBuf +uBufSize*5;
4138 uTarget = uBuf;
4139 uTargetLimit = uBuf+ uBufSize*5;
4140 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4141 if(U_FAILURE(errorCode)){
4142 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4143 goto cleanup;
4144 }
4145 cSource = cBuf;
4146 cSourceLimit =cTarget;
4147 test =uBuf;
4148 myOff=offsets;
4149 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4150 if(U_FAILURE(errorCode)){
4151 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4152 goto cleanup;
4153 }
4154 uSource = (const UChar*)in;
4155 while(uSource<uSourceLimit){
4156 if(*test!=*uSource){
4157 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4158 }
4159 uSource++;
4160 test++;
4161 }
4162 ucnv_reset(cnv);
4163 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4164 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4165 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4166 ucnv_reset(cnv);
4167 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4168 /*Test for the condition where there is an invalid character*/
4169 ucnv_reset(cnv);
4170 {
4171 static const uint8_t source2[]={0x1b,0x24,0x053};
4172 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4173 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4174 }
4175
4176 cleanup:
4177 ucnv_close(cnv);
4178 free(uBuf);
4179 free(cBuf);
4180 free(offsets);
4181 }
4182
TestJitterbug2411null4183 static void TestJitterbug2411(){
4184 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4185 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4186 UConverter* kr=NULL, *kr1=NULL;
4187 UErrorCode errorCode = U_ZERO_ERROR;
4188 UChar tgt[100]={'\0'};
4189 UChar* target = tgt;
4190 UChar* targetLimit = target+100;
4191 kr=ucnv_open("iso-2022-kr", &errorCode);
4192 if(U_FAILURE(errorCode)) {
4193 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4194 return;
4195 }
4196 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,true,&errorCode);
4197 if(U_FAILURE(errorCode)) {
4198 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4199 return;
4200 }
4201 kr1 = ucnv_open("ibm-25546", &errorCode);
4202 if(U_FAILURE(errorCode)) {
4203 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4204 return;
4205 }
4206 target = tgt;
4207 targetLimit = target+100;
4208 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,true,&errorCode);
4209
4210 if(U_FAILURE(errorCode)) {
4211 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4212 return;
4213 }
4214
4215 ucnv_close(kr);
4216 ucnv_close(kr1);
4217
4218 }
4219
4220 static void
TestJISnull4221 TestJIS(){
4222 /* From Unicode moved to testdata/conversion.txt */
4223 /*To Unicode*/
4224 {
4225 static const uint8_t sampleTextJIS[] = {
4226 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4227 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4228 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4229 };
4230 static const uint16_t expectedISO2022JIS[] = {
4231 0x0041, 0x0042,
4232 0xFF81, 0xFF82,
4233 0x3000
4234 };
4235 static const int32_t toISO2022JISOffs[]={
4236 3,4,
4237 8,9,
4238 16
4239 };
4240
4241 static const uint8_t sampleTextJIS7[] = {
4242 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4243 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4244 0x1b,0x24,0x42,0x21,0x21,
4245 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4246 0x21,0x22,
4247 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4248 };
4249 static const uint16_t expectedISO2022JIS7[] = {
4250 0x0041, 0x0042,
4251 0xFF81, 0xFF82,
4252 0x3000,
4253 0xFF81, 0xFF82,
4254 0x3001,
4255 0x3000
4256 };
4257 static const int32_t toISO2022JIS7Offs[]={
4258 3,4,
4259 8,9,
4260 13,16,
4261 17,
4262 19,27
4263 };
4264 static const uint8_t sampleTextJIS8[] = {
4265 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4266 0xa1,0xc8,0xd9,/*Katakana Set*/
4267 0x1b,0x28,0x42,
4268 0x41,0x42,
4269 0xb1,0xc3, /*Katakana Set*/
4270 0x1b,0x24,0x42,0x21,0x21
4271 };
4272 static const uint16_t expectedISO2022JIS8[] = {
4273 0x0041, 0x0042,
4274 0xff61, 0xff88, 0xff99,
4275 0x0041, 0x0042,
4276 0xff71, 0xff83,
4277 0x3000
4278 };
4279 static const int32_t toISO2022JIS8Offs[]={
4280 3, 4, 5, 6,
4281 7, 11, 12, 13,
4282 14, 18,
4283 };
4284
4285 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4286 UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,true);
4287 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4288 UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,true);
4289 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4290 UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,true);
4291 }
4292
4293 }
4294
4295
4296 #if 0
4297 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4298
4299 static void TestJitterbug915(){
4300 /* tests for roundtripping of the below sequence
4301 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4302 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4303 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4304 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4305 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4306 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4307 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4308 */
4309 static const char cSource[]={
4310 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4311 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4312 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4313 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4314 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4315 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4316 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4317 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4318 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4319 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4320 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4321 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4322 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4323 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4324 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4325 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4326 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4327 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4328 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4329 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4330 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4331 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4332 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4333 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4334 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4335 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4336 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4337 0x37, 0x20, 0x2A, 0x2F
4338 };
4339 UChar uTarget[500]={'\0'};
4340 UChar* utarget=uTarget;
4341 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4342
4343 char cTarget[500]={'\0'};
4344 char* ctarget=cTarget;
4345 char* ctargetLimit=cTarget+sizeof(cTarget);
4346 const char* csource=cSource;
4347 const char* tempSrc = cSource;
4348 UErrorCode err=U_ZERO_ERROR;
4349
4350 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4351 if(U_FAILURE(err)) {
4352 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4353 return;
4354 }
4355 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,true,&err);
4356 if(U_FAILURE(err)) {
4357 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4358 return;
4359 }
4360 utargetLimit=utarget;
4361 utarget = uTarget;
4362 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,true,&err);
4363 if(U_FAILURE(err)) {
4364 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4365 return;
4366 }
4367 ctargetLimit=ctarget;
4368 ctarget =cTarget;
4369 while(ctarget<ctargetLimit){
4370 if(*ctarget != *tempSrc){
4371 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4372 }
4373 ++ctarget;
4374 ++tempSrc;
4375 }
4376
4377 ucnv_close(conv);
4378 }
4379
4380 static void
4381 TestISO_2022_CN_EXT() {
4382 /* test input */
4383 static const uint16_t in[]={
4384 /* test Non-BMP code points */
4385 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4386 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4387 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4388 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4389 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4390 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4391 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4392 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4393 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4394 0xD869, 0xDED5,
4395
4396 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4397 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4398 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4399 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4400 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4401 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4402 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4403 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4404 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4405 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4406 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4407 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4408 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4409 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4410 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4411 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4412 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4413 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4414
4415 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4416
4417 };
4418
4419 const UChar* uSource;
4420 const UChar* uSourceLimit;
4421 const char* cSource;
4422 const char* cSourceLimit;
4423 UChar *uTargetLimit =NULL;
4424 UChar *uTarget;
4425 char *cTarget;
4426 const char *cTargetLimit;
4427 char *cBuf = NULL;
4428 UChar *uBuf = NULL;
4429 UChar *test;
4430 int32_t uBufSize = 180;
4431 UErrorCode errorCode=U_ZERO_ERROR;
4432 UConverter *cnv = NULL;
4433 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4434 int32_t* myOff= offsets;
4435 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4436 if(U_FAILURE(errorCode)) {
4437 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4438 goto cleanup;
4439 }
4440
4441 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4442 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4443 uSource = (const UChar*)in;
4444 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4445 cTarget = cBuf;
4446 cTargetLimit = cBuf +uBufSize*5;
4447 uTarget = uBuf;
4448 uTargetLimit = uBuf+ uBufSize*5;
4449 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4450 if(U_FAILURE(errorCode)){
4451 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4452 goto cleanup;
4453 }
4454 cSource = cBuf;
4455 cSourceLimit =cTarget;
4456 test =uBuf;
4457 myOff=offsets;
4458 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4459 if(U_FAILURE(errorCode)){
4460 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4461 goto cleanup;
4462 }
4463 uSource = (const UChar*)in;
4464 while(uSource<uSourceLimit){
4465 if(*test!=*uSource){
4466 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4467 }
4468 else{
4469 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4470 }
4471 uSource++;
4472 test++;
4473 }
4474 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4475 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4476 /*Test for the condition where there is an invalid character*/
4477 ucnv_reset(cnv);
4478 {
4479 static const uint8_t source2[]={0x0e,0x24,0x053};
4480 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4481 }
4482
4483 cleanup:
4484 ucnv_close(cnv);
4485 free(uBuf);
4486 free(cBuf);
4487 free(offsets);
4488 }
4489 #endif
4490
4491 static void
TestISO_2022_CNnull4492 TestISO_2022_CN() {
4493 /* test input */
4494 static const uint16_t in[]={
4495 /* jitterbug 951 */
4496 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4497 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4498 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4499 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4500 0x0020, 0x0045, 0x004e, 0x0044,
4501 /**/
4502 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4503 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4504 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4505 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4506 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4507 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4508 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4509 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4510 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4511 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4512 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4513 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4514 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4515 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4516 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4517 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4518 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4519
4520 };
4521 const UChar* uSource;
4522 const UChar* uSourceLimit;
4523 const char* cSource;
4524 const char* cSourceLimit;
4525 UChar *uTargetLimit =NULL;
4526 UChar *uTarget;
4527 char *cTarget;
4528 const char *cTargetLimit;
4529 char *cBuf = NULL;
4530 UChar *uBuf = NULL;
4531 UChar *test;
4532 int32_t uBufSize = 180;
4533 UErrorCode errorCode=U_ZERO_ERROR;
4534 UConverter *cnv = NULL;
4535 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4536 int32_t* myOff= offsets;
4537 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4538 if(U_FAILURE(errorCode)) {
4539 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4540 goto cleanup;
4541 }
4542
4543 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4544 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4545 uSource = (const UChar*)in;
4546 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4547 cTarget = cBuf;
4548 cTargetLimit = cBuf +uBufSize*5;
4549 uTarget = uBuf;
4550 uTargetLimit = uBuf+ uBufSize*5;
4551 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4552 if(U_FAILURE(errorCode)){
4553 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4554 goto cleanup;
4555 }
4556 cSource = cBuf;
4557 cSourceLimit =cTarget;
4558 test =uBuf;
4559 myOff=offsets;
4560 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4561 if(U_FAILURE(errorCode)){
4562 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4563 goto cleanup;
4564 }
4565 uSource = (const UChar*)in;
4566 while(uSource<uSourceLimit){
4567 if(*test!=*uSource){
4568 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4569 }
4570 else{
4571 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4572 }
4573 uSource++;
4574 test++;
4575 }
4576 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4577 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4578 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4579 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4580 TestJitterbug930("csISO2022CN");
4581 /*Test for the condition where there is an invalid character*/
4582 ucnv_reset(cnv);
4583 {
4584 static const uint8_t source2[]={0x0e,0x24,0x053};
4585 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4586 }
4587
4588 cleanup:
4589 ucnv_close(cnv);
4590 free(uBuf);
4591 free(cBuf);
4592 free(offsets);
4593 }
4594
4595 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4596 typedef struct {
4597 const char * converterName;
4598 const char * inputText;
4599 int inputTextLength;
4600 } EmptySegmentTest;
4601
4602 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, int32_t length, UConverterCallbackReason reason, UErrorCode * err )4603 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4604 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4605 // suppress compiler warnings about unused variables
4606 (void)context;
4607 (void)codeUnits;
4608 (void)length;
4609 if (reason > UCNV_IRREGULAR) {
4610 return;
4611 }
4612 if (reason != UCNV_IRREGULAR) {
4613 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4614 }
4615 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4616 *err = U_ZERO_ERROR;
4617 ucnv_cbToUWriteSub(toArgs,0,err);
4618 }
4619
4620 enum { kEmptySegmentToUCharsMax = 64 };
TestJitterbug6175(void)4621 static void TestJitterbug6175(void) {
4622 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4623 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4624 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4625 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4626 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4627 static const EmptySegmentTest emptySegmentTests[] = {
4628 /* converterName inputText inputTextLength */
4629 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4630 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4631 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4632 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4633 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) },
4634 /* terminator: */
4635 { NULL, NULL, 0, }
4636 };
4637 const EmptySegmentTest * testPtr;
4638 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4639 UErrorCode err = U_ZERO_ERROR;
4640 UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4641 if (U_FAILURE(err)) {
4642 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4643 return;
4644 }
4645 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4646 if (U_FAILURE(err)) {
4647 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4648 ucnv_close(cnv);
4649 return;
4650 }
4651 {
4652 UChar toUChars[kEmptySegmentToUCharsMax];
4653 UChar * toUCharsPtr = toUChars;
4654 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4655 const char * inCharsPtr = testPtr->inputText;
4656 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4657 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, true, &err);
4658 }
4659 ucnv_close(cnv);
4660 }
4661 }
4662
4663 static void
TestEBCDIC_STATEFULnull4664 TestEBCDIC_STATEFUL() {
4665 /* test input */
4666 static const uint8_t in[]={
4667 0x61,
4668 0x1a,
4669 0x0f, 0x4b,
4670 0x42,
4671 0x40,
4672 0x36,
4673 };
4674
4675 /* expected test results */
4676 static const int32_t results[]={
4677 /* number of bytes read, code point */
4678 1, 0x002f,
4679 1, 0x0092,
4680 2, 0x002e,
4681 1, 0xff62,
4682 1, 0x0020,
4683 1, 0x0096,
4684
4685 };
4686 static const uint8_t in2[]={
4687 0x0f,
4688 0xa1,
4689 0x01
4690 };
4691
4692 /* expected test results */
4693 static const int32_t results2[]={
4694 /* number of bytes read, code point */
4695 2, 0x203E,
4696 1, 0x0001,
4697 };
4698
4699 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4700 UErrorCode errorCode=U_ZERO_ERROR;
4701 UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4702 if(U_FAILURE(errorCode)) {
4703 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4704 return;
4705 }
4706 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4707 ucnv_reset(cnv);
4708 /* Test the condition when source >= sourceLimit */
4709 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4710 ucnv_reset(cnv);
4711 /*Test for the condition where source > sourcelimit after consuming the shift character */
4712 {
4713 static const uint8_t source1[]={0x0f};
4714 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4715 }
4716 /*Test for the condition where there is an invalid character*/
4717 ucnv_reset(cnv);
4718 {
4719 static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4720 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4721 }
4722 ucnv_reset(cnv);
4723 source=(const char*)in2;
4724 limit=(const char*)in2+sizeof(in2);
4725 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4726 ucnv_close(cnv);
4727
4728 }
4729
4730 static void
TestGB18030null4731 TestGB18030() {
4732 /* test input */
4733 static const uint8_t in[]={
4734 0x24,
4735 0x7f,
4736 0x81, 0x30, 0x81, 0x30,
4737 0xa8, 0xbf,
4738 0xa2, 0xe3,
4739 0xd2, 0xbb,
4740 0x82, 0x35, 0x8f, 0x33,
4741 0x84, 0x31, 0xa4, 0x39,
4742 0x90, 0x30, 0x81, 0x30,
4743 0xe3, 0x32, 0x9a, 0x35
4744 #if 0
4745 /*
4746 * Feature removed markus 2000-oct-26
4747 * Only some codepages must match surrogate pairs into supplementary code points -
4748 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4749 * GB 18030 provides direct encodings for supplementary code points, therefore
4750 * it must not combine two single-encoded surrogates into one code point.
4751 */
4752 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4753 #endif
4754 };
4755
4756 /* expected test results */
4757 static const int32_t results[]={
4758 /* number of bytes read, code point */
4759 1, 0x24,
4760 1, 0x7f,
4761 4, 0x80,
4762 2, 0x1f9,
4763 2, 0x20ac,
4764 2, 0x4e00,
4765 4, 0x9fa6,
4766 4, 0xffff,
4767 4, 0x10000,
4768 4, 0x10ffff
4769 #if 0
4770 /* Feature removed. See comment above. */
4771 8, 0x10000
4772 #endif
4773 };
4774
4775 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4776 UErrorCode errorCode=U_ZERO_ERROR;
4777 UConverter *cnv=ucnv_open("gb18030", &errorCode);
4778 if(U_FAILURE(errorCode)) {
4779 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4780 return;
4781 }
4782 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4783 ucnv_close(cnv);
4784 }
4785
4786 static void
TestLMBCSnull4787 TestLMBCS() {
4788 /* LMBCS-1 string */
4789 static const uint8_t pszLMBCS[]={
4790 0x61,
4791 0x01, 0x29,
4792 0x81,
4793 0xA0,
4794 0x0F, 0x27,
4795 0x0F, 0x91,
4796 0x14, 0x0a, 0x74,
4797 0x14, 0xF6, 0x02,
4798 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4799 0x10, 0x88, 0xA0,
4800 };
4801
4802 /* Unicode UChar32 equivalents */
4803 static const UChar32 pszUnicode32[]={
4804 /* code point */
4805 0x00000061,
4806 0x00002013,
4807 0x000000FC,
4808 0x000000E1,
4809 0x00000007,
4810 0x00000091,
4811 0x00000a74,
4812 0x00000200,
4813 0x00023456, /* code point for surrogate pair */
4814 0x00005516
4815 };
4816
4817 /* Unicode UChar equivalents */
4818 static const UChar pszUnicode[]={
4819 /* code point */
4820 0x0061,
4821 0x2013,
4822 0x00FC,
4823 0x00E1,
4824 0x0007,
4825 0x0091,
4826 0x0a74,
4827 0x0200,
4828 0xD84D, /* low surrogate */
4829 0xDC56, /* high surrogate */
4830 0x5516
4831 };
4832
4833 /* expected test results */
4834 static const int offsets32[]={
4835 /* number of bytes read, code point */
4836 0,
4837 1,
4838 3,
4839 4,
4840 5,
4841 7,
4842 9,
4843 12,
4844 15,
4845 21,
4846 24
4847 };
4848
4849 /* expected test results */
4850 static const int offsets[]={
4851 /* number of bytes read, code point */
4852 0,
4853 1,
4854 3,
4855 4,
4856 5,
4857 7,
4858 9,
4859 12,
4860 15,
4861 18,
4862 21,
4863 24
4864 };
4865
4866
4867 UConverter *cnv;
4868
4869 #define NAME_LMBCS_1 "LMBCS-1"
4870 #define NAME_LMBCS_2 "LMBCS-2"
4871
4872
4873 /* Some basic open/close/property tests on some LMBCS converters */
4874 {
4875
4876 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */
4877 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/
4878 char get_subchars [1];
4879 const char * get_name;
4880 UConverter *cnv1;
4881 UConverter *cnv2;
4882
4883 int8_t len = sizeof(get_subchars);
4884
4885 UErrorCode errorCode=U_ZERO_ERROR;
4886
4887 /* Open */
4888 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4889 if(U_FAILURE(errorCode)) {
4890 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4891 return;
4892 }
4893 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4894 if(U_FAILURE(errorCode)) {
4895 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4896 return;
4897 }
4898
4899 /* Name */
4900 get_name = ucnv_getName (cnv1, &errorCode);
4901 if (strcmp(NAME_LMBCS_1,get_name)){
4902 log_err("Unexpected converter name: %s\n", get_name);
4903 }
4904 get_name = ucnv_getName (cnv2, &errorCode);
4905 if (strcmp(NAME_LMBCS_2,get_name)){
4906 log_err("Unexpected converter name: %s\n", get_name);
4907 }
4908
4909 /* substitution chars */
4910 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4911 if(U_FAILURE(errorCode)) {
4912 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4913 }
4914 if (len!=1){
4915 log_err("Unexpected length of sub chars\n");
4916 }
4917 if (get_subchars[0] != expected_subchars[0]){
4918 log_err("Unexpected value of sub chars\n");
4919 }
4920 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4921 if(U_FAILURE(errorCode)) {
4922 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4923 }
4924 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4925 if(U_FAILURE(errorCode)) {
4926 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4927 }
4928 if (len!=1){
4929 log_err("Unexpected length of sub chars\n");
4930 }
4931 if (get_subchars[0] != new_subchars[0]){
4932 log_err("Unexpected value of sub chars\n");
4933 }
4934 ucnv_close(cnv1);
4935 ucnv_close(cnv2);
4936
4937 }
4938
4939 /* LMBCS to Unicode - offsets */
4940 {
4941 UErrorCode errorCode=U_ZERO_ERROR;
4942
4943 const char * pSource = (const char *)pszLMBCS;
4944 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4945
4946 UChar Out [sizeof(pszUnicode) + 1];
4947 UChar * pOut = Out;
4948 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
4949
4950 int32_t off [sizeof(offsets)];
4951
4952 /* last 'offset' in expected results is just the final size.
4953 (Makes other tests easier). Compensate here: */
4954
4955 off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
4956
4957
4958
4959 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4960 if(U_FAILURE(errorCode)) {
4961 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4962 return;
4963 }
4964
4965
4966
4967 ucnv_toUnicode (cnv,
4968 &pOut,
4969 OutLimit,
4970 &pSource,
4971 sourceLimit,
4972 off,
4973 true,
4974 &errorCode);
4975
4976
4977 if (memcmp(off,offsets,sizeof(offsets)))
4978 {
4979 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4980 }
4981 if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4982 {
4983 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4984 }
4985 ucnv_close(cnv);
4986 }
4987 {
4988 /* LMBCS to Unicode - getNextUChar */
4989 const char * sourceStart;
4990 const char *source=(const char *)pszLMBCS;
4991 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4992 const UChar32 *results= pszUnicode32;
4993 const int *off = offsets32;
4994
4995 UErrorCode errorCode=U_ZERO_ERROR;
4996 UChar32 uniChar;
4997
4998 cnv=ucnv_open("LMBCS-1", &errorCode);
4999 if(U_FAILURE(errorCode)) {
5000 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5001 return;
5002 }
5003 else
5004 {
5005
5006 while(source<limit) {
5007 sourceStart=source;
5008 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
5009 if(U_FAILURE(errorCode)) {
5010 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
5011 break;
5012 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
5013 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
5014 uniChar, (source-sourceStart), *results, *off);
5015 break;
5016 }
5017 results++;
5018 off++;
5019 }
5020 }
5021 ucnv_close(cnv);
5022 }
5023 { /* test locale & optimization group operations: Unicode to LMBCS */
5024
5025 UErrorCode errorCode=U_ZERO_ERROR;
5026 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
5027 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
5028 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
5029 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5030 const UChar * pUniOut = uniString;
5031 UChar * pUniIn = uniString;
5032 uint8_t lmbcsString [4];
5033 const char * pLMBCSOut = (const char *)lmbcsString;
5034 char * pLMBCSIn = (char *)lmbcsString;
5035
5036 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5037 ucnv_fromUnicode (cnv16he,
5038 &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
5039 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5040 NULL, 1, &errorCode);
5041
5042 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5043 {
5044 log_err("LMBCS-16,locale=he gives unexpected translation\n");
5045 }
5046
5047 pLMBCSIn= (char *)lmbcsString;
5048 pUniOut = uniString;
5049 ucnv_fromUnicode (cnv01us,
5050 &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
5051 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5052 NULL, 1, &errorCode);
5053
5054 if (lmbcsString[0] != 0x9F)
5055 {
5056 log_err("LMBCS-1,locale=US gives unexpected translation\n");
5057 }
5058
5059 /* single byte char from mbcs char set */
5060 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */
5061 pLMBCSOut = (const char *)lmbcsString;
5062 pUniIn = uniString;
5063 ucnv_toUnicode (cnv16jp,
5064 &pUniIn, pUniIn + 1,
5065 &pLMBCSOut, (pLMBCSOut + 1),
5066 NULL, 1, &errorCode);
5067 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5068 {
5069 log_err("Unexpected results from LMBCS-16 single byte char\n");
5070 }
5071 /* convert to group 1: should be 3 bytes */
5072 pLMBCSIn = (char *)lmbcsString;
5073 pUniOut = uniString;
5074 ucnv_fromUnicode (cnv01us,
5075 &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5076 &pUniOut, pUniOut + 1,
5077 NULL, 1, &errorCode);
5078 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5079 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5080 {
5081 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5082 }
5083 pLMBCSOut = (const char *)lmbcsString;
5084 pUniIn = uniString;
5085 ucnv_toUnicode (cnv01us,
5086 &pUniIn, pUniIn + 1,
5087 &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5088 NULL, 1, &errorCode);
5089 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5090 {
5091 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5092 }
5093 pLMBCSIn = (char *)lmbcsString;
5094 pUniOut = uniString;
5095 ucnv_fromUnicode (cnv16jp,
5096 &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5097 &pUniOut, pUniOut + 1,
5098 NULL, 1, &errorCode);
5099 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5100 {
5101 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5102 }
5103 ucnv_close(cnv16he);
5104 ucnv_close(cnv16jp);
5105 ucnv_close(cnv01us);
5106 }
5107 {
5108 /* Small source buffer testing, LMBCS -> Unicode */
5109
5110 UErrorCode errorCode=U_ZERO_ERROR;
5111
5112 const char * pSource = (const char *)pszLMBCS;
5113 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5114 int codepointCount = 0;
5115
5116 UChar Out [sizeof(pszUnicode) + 1];
5117 UChar * pOut = Out;
5118 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
5119
5120
5121 cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5122 if(U_FAILURE(errorCode)) {
5123 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5124 return;
5125 }
5126
5127
5128 while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5129 {
5130 ucnv_toUnicode (cnv,
5131 &pOut,
5132 OutLimit,
5133 &pSource,
5134 (pSource+1), /* claim that this is a 1- byte buffer */
5135 NULL,
5136 false, /* false means there might be more chars in the next buffer */
5137 &errorCode);
5138
5139 if (U_SUCCESS (errorCode))
5140 {
5141 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5142 {
5143 /* we are on to the next code point: check value */
5144
5145 if (Out[0] != pszUnicode[codepointCount]){
5146 log_err("LMBCS->Uni result %lx should have been %lx \n",
5147 Out[0], pszUnicode[codepointCount]);
5148 }
5149
5150 pOut = Out; /* reset for accumulating next code point */
5151 codepointCount++;
5152 }
5153 }
5154 else
5155 {
5156 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5157 }
5158 }
5159 {
5160 /* limits & surrogate error testing */
5161 char LIn [sizeof(pszLMBCS)];
5162 const char * pLIn = LIn;
5163
5164 char LOut [sizeof(pszLMBCS)];
5165 char * pLOut = LOut;
5166
5167 UChar UOut [sizeof(pszUnicode)];
5168 UChar * pUOut = UOut;
5169
5170 UChar UIn [sizeof(pszUnicode)];
5171 const UChar * pUIn = UIn;
5172
5173 int32_t off [sizeof(offsets)];
5174 UChar32 uniChar;
5175
5176 errorCode=U_ZERO_ERROR;
5177
5178 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5179 pUIn++;
5180 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, false, &errorCode);
5181 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5182 {
5183 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5184 }
5185 pUIn--;
5186
5187 errorCode=U_ZERO_ERROR;
5188 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,false, &errorCode);
5189 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5190 {
5191 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5192 }
5193 errorCode=U_ZERO_ERROR;
5194
5195 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5196 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5197 {
5198 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5199 }
5200 errorCode=U_ZERO_ERROR;
5201
5202 /* 0 byte source request - no error, no pointer movement */
5203 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,false, &errorCode);
5204 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,false, &errorCode);
5205 if(U_FAILURE(errorCode)) {
5206 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5207 }
5208 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5209 {
5210 log_err("Unexpected pointer move in 0 byte source request \n");
5211 }
5212 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5213 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5214 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5215 {
5216 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5217 }
5218 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5219 {
5220 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5221 }
5222 errorCode = U_ZERO_ERROR;
5223
5224 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5225
5226 pUIn = pszUnicode;
5227 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,false, &errorCode);
5228 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5229 {
5230 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5231 }
5232
5233 errorCode = U_ZERO_ERROR;
5234
5235 pLIn = (const char *)pszLMBCS;
5236 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,false, &errorCode);
5237 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5238 {
5239 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5240 }
5241
5242 /* unpaired or chopped LMBCS surrogates */
5243
5244 /* OK high surrogate, Low surrogate is chopped */
5245 LIn [0] = (char)0x14;
5246 LIn [1] = (char)0xD8;
5247 LIn [2] = (char)0x01;
5248 LIn [3] = (char)0x14;
5249 LIn [4] = (char)0xDC;
5250 pLIn = LIn;
5251 errorCode = U_ZERO_ERROR;
5252 pUOut = UOut;
5253
5254 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5255 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,true, &errorCode);
5256 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5257 {
5258 log_err("Unexpected results on chopped low surrogate\n");
5259 }
5260
5261 /* chopped at surrogate boundary */
5262 LIn [0] = (char)0x14;
5263 LIn [1] = (char)0xD8;
5264 LIn [2] = (char)0x01;
5265 pLIn = LIn;
5266 errorCode = U_ZERO_ERROR;
5267 pUOut = UOut;
5268
5269 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,true, &errorCode);
5270 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5271 {
5272 log_err("Unexpected results on chopped at surrogate boundary \n");
5273 }
5274
5275 /* unpaired surrogate plus valid Unichar */
5276 LIn [0] = (char)0x14;
5277 LIn [1] = (char)0xD8;
5278 LIn [2] = (char)0x01;
5279 LIn [3] = (char)0x14;
5280 LIn [4] = (char)0xC9;
5281 LIn [5] = (char)0xD0;
5282 pLIn = LIn;
5283 errorCode = U_ZERO_ERROR;
5284 pUOut = UOut;
5285
5286 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,true, &errorCode);
5287 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5288 {
5289 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5290 }
5291
5292 /* unpaired surrogate plus chopped Unichar */
5293 LIn [0] = (char)0x14;
5294 LIn [1] = (char)0xD8;
5295 LIn [2] = (char)0x01;
5296 LIn [3] = (char)0x14;
5297 LIn [4] = (char)0xC9;
5298
5299 pLIn = LIn;
5300 errorCode = U_ZERO_ERROR;
5301 pUOut = UOut;
5302
5303 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,true, &errorCode);
5304 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5305 {
5306 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5307 }
5308
5309 /* unpaired surrogate plus valid non-Unichar */
5310 LIn [0] = (char)0x14;
5311 LIn [1] = (char)0xD8;
5312 LIn [2] = (char)0x01;
5313 LIn [3] = (char)0x0F;
5314 LIn [4] = (char)0x3B;
5315
5316 pLIn = LIn;
5317 errorCode = U_ZERO_ERROR;
5318 pUOut = UOut;
5319
5320 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,true, &errorCode);
5321 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5322 {
5323 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5324 }
5325
5326 /* unpaired surrogate plus chopped non-Unichar */
5327 LIn [0] = (char)0x14;
5328 LIn [1] = (char)0xD8;
5329 LIn [2] = (char)0x01;
5330 LIn [3] = (char)0x0F;
5331
5332 pLIn = LIn;
5333 errorCode = U_ZERO_ERROR;
5334 pUOut = UOut;
5335
5336 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,true, &errorCode);
5337
5338 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5339 {
5340 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5341 }
5342 }
5343 }
5344 ucnv_close(cnv); /* final cleanup */
5345 }
5346
5347
TestJitterbug255null5348 static void TestJitterbug255()
5349 {
5350 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5351 const char *testBuffer = (const char *)testBytes;
5352 const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5353 UErrorCode status = U_ZERO_ERROR;
5354 /*UChar32 result;*/
5355 UConverter *cnv = 0;
5356
5357 cnv = ucnv_open("shift-jis", &status);
5358 if (U_FAILURE(status) || cnv == 0) {
5359 log_data_err("Failed to open the converter for SJIS.\n");
5360 return;
5361 }
5362 while (testBuffer != testEnd)
5363 {
5364 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5365 if (U_FAILURE(status))
5366 {
5367 log_err("Failed to convert the next UChar for SJIS.\n");
5368 break;
5369 }
5370 }
5371 ucnv_close(cnv);
5372 }
5373
TestEBCDICUS4XMLnull5374 static void TestEBCDICUS4XML()
5375 {
5376 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5377 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5378 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5379 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5380 char target_x[] = {0x00, 0x00, 0x00, 0x00};
5381 UChar *unicodes = unicodes_x;
5382 const UChar *toUnicodeMaps = toUnicodeMaps_x;
5383 char *target = target_x;
5384 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5385 UErrorCode status = U_ZERO_ERROR;
5386 UConverter *cnv = 0;
5387
5388 cnv = ucnv_open("ebcdic-xml-us", &status);
5389 if (U_FAILURE(status) || cnv == 0) {
5390 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5391 return;
5392 }
5393 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, true, &status);
5394 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5395 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5396 u_errorName(status));
5397 printUSeqErr(unicodes_x, 3);
5398 printUSeqErr(toUnicodeMaps, 3);
5399 }
5400 status = U_ZERO_ERROR;
5401 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, true, &status);
5402 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5403 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5404 u_errorName(status));
5405 printSeqErr((const unsigned char*)target_x, 3);
5406 printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5407 }
5408 ucnv_close(cnv);
5409 }
5410 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5411
5412 #if !UCONFIG_NO_COLLATION
5413
TestJitterbug981null5414 static void TestJitterbug981(){
5415 const UChar* rules;
5416 int32_t rules_length, target_cap, bytes_needed, buff_size;
5417 UErrorCode status = U_ZERO_ERROR;
5418 UConverter *utf8cnv;
5419 UCollator* myCollator;
5420 char *buff;
5421 int numNeeded=0;
5422 utf8cnv = ucnv_open ("utf8", &status);
5423 if(U_FAILURE(status)){
5424 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5425 return;
5426 }
5427 myCollator = ucol_open("zh", &status);
5428 if(U_FAILURE(status)){
5429 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5430 ucnv_close(utf8cnv);
5431 return;
5432 }
5433
5434 rules = ucol_getRules(myCollator, &rules_length);
5435 if(rules_length == 0) {
5436 log_data_err("missing zh tailoring rule string\n");
5437 ucol_close(myCollator);
5438 ucnv_close(utf8cnv);
5439 return;
5440 }
5441 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5442 buff = malloc(buff_size);
5443
5444 target_cap = 0;
5445 do {
5446 ucnv_reset(utf8cnv);
5447 status = U_ZERO_ERROR;
5448 if(target_cap >= buff_size) {
5449 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5450 break;
5451 }
5452 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5453 rules, rules_length, &status);
5454 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5455 if(numNeeded!=0 && numNeeded!= bytes_needed){
5456 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5457 break;
5458 }
5459 numNeeded = bytes_needed;
5460 } while (status == U_BUFFER_OVERFLOW_ERROR);
5461 ucol_close(myCollator);
5462 ucnv_close(utf8cnv);
5463 free(buff);
5464 }
5465
5466 #endif
5467
5468 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug1293null5469 static void TestJitterbug1293(){
5470 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5471 char target[256];
5472 UErrorCode status = U_ZERO_ERROR;
5473 UConverter* conv=NULL;
5474 int32_t target_cap, bytes_needed, numNeeded = 0;
5475 conv = ucnv_open("shift-jis",&status);
5476 if(U_FAILURE(status)){
5477 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5478 return;
5479 }
5480
5481 do{
5482 target_cap =0;
5483 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5484 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5485 if(numNeeded!=0 && numNeeded!= bytes_needed){
5486 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5487 }
5488 numNeeded = bytes_needed;
5489 } while (status == U_BUFFER_OVERFLOW_ERROR);
5490 if(U_FAILURE(status)){
5491 log_err("An error occurred in ucnv_fromUChars. Error: %s", u_errorName(status));
5492 return;
5493 }
5494 ucnv_close(conv);
5495 }
5496 #endif
5497
TestJB5275_1null5498 static void TestJB5275_1(){
5499
5500 static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5501 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5502 /* Switch script: */
5503 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5504 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5505 "\xEF\x40\x3B\xB3\x0A";
5506 static const UChar expected[] ={
5507 0x003b, 0x0a15, 0x000a, /* Easy characters */
5508 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5509 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5510 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5511 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5512 };
5513
5514 UErrorCode status = U_ZERO_ERROR;
5515 UConverter* conv = ucnv_open("iscii-gur", &status);
5516 UChar dest[100] = {'\0'};
5517 UChar* target = dest;
5518 UChar* targetLimit = dest+100;
5519 const char* source = data;
5520 const char* sourceLimit = data+strlen(data);
5521 const UChar* exp = expected;
5522
5523 if (U_FAILURE(status)) {
5524 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5525 return;
5526 }
5527
5528 log_verbose("Testing switching back to default script when new line is encountered.\n");
5529 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, true, &status);
5530 if(U_FAILURE(status)){
5531 log_err("conversion failed: %s \n", u_errorName(status));
5532 }
5533 targetLimit = target;
5534 target = dest;
5535 printUSeq(target, (int)(targetLimit-target));
5536 while(target<targetLimit){
5537 if(*exp!=*target){
5538 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5539 }
5540 target++;
5541 exp++;
5542 }
5543 ucnv_close(conv);
5544 }
5545
TestJB5275null5546 static void TestJB5275(){
5547 static const char* data =
5548 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
5549 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
5550 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
5551 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5552 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
5553 "\xEF\x48\x38\xB3\x0A" /* Kannada test */
5554 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
5555 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
5556 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
5557 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
5558 static const UChar expected[] ={
5559 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5560 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
5561 0x0038, 0x0C95, 0x000A, /* Kannada test */
5562 0x0039, 0x0D15, 0x000A, /* Malayalam test */
5563 0x003A, 0x0A95, 0x000A, /* Gujarati test */
5564 0x003B, 0x0A15, 0x000A, /* Punjabi test */
5565 };
5566
5567 UErrorCode status = U_ZERO_ERROR;
5568 UConverter* conv = ucnv_open("iscii", &status);
5569 UChar dest[100] = {'\0'};
5570 UChar* target = dest;
5571 UChar* targetLimit = dest+100;
5572 const char* source = data;
5573 const char* sourceLimit = data+strlen(data);
5574 const UChar* exp = expected;
5575 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, true, &status);
5576 if(U_FAILURE(status)){
5577 log_data_err("conversion failed: %s \n", u_errorName(status));
5578 }
5579 targetLimit = target;
5580 target = dest;
5581
5582 printUSeq(target, (int)(targetLimit-target));
5583
5584 while(target<targetLimit){
5585 if(*exp!=*target){
5586 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5587 }
5588 target++;
5589 exp++;
5590 }
5591 ucnv_close(conv);
5592 }
5593
5594 static void
TestIsFixedWidthnull5595 TestIsFixedWidth() {
5596 UErrorCode status = U_ZERO_ERROR;
5597 UConverter *cnv = NULL;
5598 int32_t i;
5599
5600 const char *fixedWidth[] = {
5601 "US-ASCII",
5602 "UTF32",
5603 "ibm-5478_P100-1995"
5604 };
5605
5606 const char *notFixedWidth[] = {
5607 "GB18030",
5608 "UTF8",
5609 "windows-949-2000",
5610 "UTF16"
5611 };
5612
5613 for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
5614 cnv = ucnv_open(fixedWidth[i], &status);
5615 if (cnv == NULL || U_FAILURE(status)) {
5616 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5617 continue;
5618 }
5619
5620 if (!ucnv_isFixedWidth(cnv, &status)) {
5621 log_err("%s is a fixedWidth converter but returned false.\n", fixedWidth[i]);
5622 }
5623 ucnv_close(cnv);
5624 }
5625
5626 for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
5627 cnv = ucnv_open(notFixedWidth[i], &status);
5628 if (cnv == NULL || U_FAILURE(status)) {
5629 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5630 continue;
5631 }
5632
5633 if (ucnv_isFixedWidth(cnv, &status)) {
5634 log_err("%s is NOT a fixedWidth converter but returned true.\n", notFixedWidth[i]);
5635 }
5636 ucnv_close(cnv);
5637 }
5638 }
5639