1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*******************************************************************************
9 *
10 * File CUCDTST.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *     Madhu Katragadda            Ported for C API, added tests for string functions
15 ********************************************************************************
16 */
17 
18 #include <math.h>
19 #include <stdbool.h>
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include "unicode/utypes.h"
24 #include "unicode/uchar.h"
25 #include "unicode/putil.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uloc.h"
28 #include "unicode/unorm2.h"
29 #include "unicode/utf16.h"
30 #include "unicode/utf_old.h"
31 #include "cintltst.h"
32 #include "putilimp.h"
33 #include "uparse.h"
34 #include "ucase.h"
35 #include "ubidi_props.h"
36 #include "uprops.h"
37 #include "uset_imp.h"
38 #include "usc_impl.h"
39 #include "udatamem.h"
40 #include "cucdapi.h"
41 #include "cmemory.h"
42 
43 /* prototypes --------------------------------------------------------------- */
44 
45 static void TestUpperLower(void);
46 static void TestLetterNumber(void);
47 static void TestMisc(void);
48 static void TestPOSIX(void);
49 static void TestControlPrint(void);
50 static void TestIdentifier(void);
51 static void TestUnicodeData(void);
52 static void TestCodeUnit(void);
53 static void TestCodePoint(void);
54 static void TestCharLength(void);
55 static void TestCharNames(void);
56 static void TestUCharFromNameUnderflow(void);
57 static void TestMirroring(void);
58 static void TestUScriptRunAPI(void);
59 static void TestAdditionalProperties(void);
60 static void TestNumericProperties(void);
61 static void TestPropertyNames(void);
62 static void TestPropertyValues(void);
63 static void TestConsistency(void);
64 static void TestCaseFolding(void);
65 static void TestBinaryCharacterPropertiesAPI(void);
66 static void TestIntCharacterPropertiesAPI(void);
67 
68 /* internal methods used */
69 static int32_t MakeProp(char* str);
70 static int32_t MakeDir(char* str);
71 
72 /* helpers ------------------------------------------------------------------ */
73 
74 static void
parseUCDFile(const char *filename, char *fields[][2], int32_t fieldCount, UParseLineFn *lineFn, void *context, UErrorCode *pErrorCode)75 parseUCDFile(const char *filename,
76              char *fields[][2], int32_t fieldCount,
77              UParseLineFn *lineFn, void *context,
78              UErrorCode *pErrorCode) {
79     char path[512];
80     char backupPath[512];
81 
82     if(U_FAILURE(*pErrorCode)) {
83         return;
84     }
85 
86     /* Look inside ICU_DATA first */
87     strcpy(path, u_getDataDirectory());
88     strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
89     strcat(path, filename);
90 
91     /* As a fallback, try to guess where the source data was located
92      *    at the time ICU was built, and look there.
93      */
94     strcpy(backupPath, ctest_dataSrcDir());
95     strcat(backupPath, U_FILE_SEP_STRING);
96     strcat(backupPath, "unidata" U_FILE_SEP_STRING);
97     strcat(backupPath, filename);
98 
99     u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
100     if(*pErrorCode==U_FILE_ACCESS_ERROR) {
101         *pErrorCode=U_ZERO_ERROR;
102         u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
103     }
104     if(U_FAILURE(*pErrorCode)) {
105         log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
106     }
107 }
108 
109 /* test data ---------------------------------------------------------------- */
110 
111 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
112 static const int32_t tagValues[] =
113     {
114     /* Mn */ U_NON_SPACING_MARK,
115     /* Mc */ U_COMBINING_SPACING_MARK,
116     /* Me */ U_ENCLOSING_MARK,
117     /* Nd */ U_DECIMAL_DIGIT_NUMBER,
118     /* Nl */ U_LETTER_NUMBER,
119     /* No */ U_OTHER_NUMBER,
120     /* Zs */ U_SPACE_SEPARATOR,
121     /* Zl */ U_LINE_SEPARATOR,
122     /* Zp */ U_PARAGRAPH_SEPARATOR,
123     /* Cc */ U_CONTROL_CHAR,
124     /* Cf */ U_FORMAT_CHAR,
125     /* Cs */ U_SURROGATE,
126     /* Co */ U_PRIVATE_USE_CHAR,
127     /* Cn */ U_UNASSIGNED,
128     /* Lu */ U_UPPERCASE_LETTER,
129     /* Ll */ U_LOWERCASE_LETTER,
130     /* Lt */ U_TITLECASE_LETTER,
131     /* Lm */ U_MODIFIER_LETTER,
132     /* Lo */ U_OTHER_LETTER,
133     /* Pc */ U_CONNECTOR_PUNCTUATION,
134     /* Pd */ U_DASH_PUNCTUATION,
135     /* Ps */ U_START_PUNCTUATION,
136     /* Pe */ U_END_PUNCTUATION,
137     /* Po */ U_OTHER_PUNCTUATION,
138     /* Sm */ U_MATH_SYMBOL,
139     /* Sc */ U_CURRENCY_SYMBOL,
140     /* Sk */ U_MODIFIER_SYMBOL,
141     /* So */ U_OTHER_SYMBOL,
142     /* Pi */ U_INITIAL_PUNCTUATION,
143     /* Pf */ U_FINAL_PUNCTUATION
144     };
145 
146 static const char dirStrings[][5] = {
147     "L",
148     "R",
149     "EN",
150     "ES",
151     "ET",
152     "AN",
153     "CS",
154     "B",
155     "S",
156     "WS",
157     "ON",
158     "LRE",
159     "LRO",
160     "AL",
161     "RLE",
162     "RLO",
163     "PDF",
164     "NSM",
165     "BN",
166     /* new in Unicode 6.3/ICU 52 */
167     "FSI",
168     "LRI",
169     "RLI",
170     "PDI"
171 };
172 
173 void addUnicodeTest(TestNode** root);
174 
addUnicodeTest(TestNode** root)175 void addUnicodeTest(TestNode** root)
176 {
177     addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
178     addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
179     addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
180     addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
181     addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
182     addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
183     addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
184     addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
185     addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
186     addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
187     addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
188     addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
189     addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
190     addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
191     addTest(root, &TestUCharFromNameUnderflow, "tsutil/cucdtst/TestUCharFromNameUnderflow");
192     addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
193     addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
194     addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
195     addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
196     addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
197     addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
198     addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
199     addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
200     addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
201     addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
202     addTest(root, &TestBinaryCharacterPropertiesAPI,
203             "tsutil/cucdtst/TestBinaryCharacterPropertiesAPI");
204     addTest(root, &TestIntCharacterPropertiesAPI,
205             "tsutil/cucdtst/TestIntCharacterPropertiesAPI");
206 }
207 
208 /*==================================================== */
209 /* test u_toupper() and u_tolower()                    */
210 /*==================================================== */
TestUpperLowernull211 static void TestUpperLower()
212 {
213     const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
214     const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
215     U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
216     U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
217     int32_t i;
218 
219     U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
220     U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
221 
222 /*
223 Checks LetterLike Symbols which were previously a source of confusion
224 [Bertrand A. D. 02/04/98]
225 */
226     for (i=0x2100;i<0x2138;i++)
227     {
228         /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
229         if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
230         {
231             if (i != (int)u_tolower(i)) /* itself */
232                 log_err("Failed case conversion with itself: U+%04x\n", i);
233             if (i != (int)u_toupper(i))
234                 log_err("Failed case conversion with itself: U+%04x\n", i);
235         }
236     }
237 
238     for(i=0; i < u_strlen(upper); i++){
239         if(u_tolower(upper[i]) != lower[i]){
240             log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
241         }
242     }
243 
244     log_verbose("testing upper lower\n");
245     for (i = 0; i < 21; i++) {
246 
247         if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
248         {
249             log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
250         }
251         else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
252          {
253             log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
254         }
255         else if (upperTest[i] != u_tolower(lowerTest[i]))
256         {
257             log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
258         }
259         else if (lowerTest[i] != u_toupper(upperTest[i]))
260          {
261             log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
262         }
263         else if (upperTest[i] != u_tolower(upperTest[i]))
264         {
265             log_err("Failed case conversion with itself: %c\n", upperTest[i]);
266         }
267         else if (lowerTest[i] != u_toupper(lowerTest[i]))
268         {
269             log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
270         }
271     }
272     log_verbose("done testing upper lower\n");
273 
274     log_verbose("testing u_istitle\n");
275     {
276         static const UChar expected[] = {
277             0x1F88,
278             0x1F89,
279             0x1F8A,
280             0x1F8B,
281             0x1F8C,
282             0x1F8D,
283             0x1F8E,
284             0x1F8F,
285             0x1F88,
286             0x1F89,
287             0x1F8A,
288             0x1F8B,
289             0x1F8C,
290             0x1F8D,
291             0x1F8E,
292             0x1F8F,
293             0x1F98,
294             0x1F99,
295             0x1F9A,
296             0x1F9B,
297             0x1F9C,
298             0x1F9D,
299             0x1F9E,
300             0x1F9F,
301             0x1F98,
302             0x1F99,
303             0x1F9A,
304             0x1F9B,
305             0x1F9C,
306             0x1F9D,
307             0x1F9E,
308             0x1F9F,
309             0x1FA8,
310             0x1FA9,
311             0x1FAA,
312             0x1FAB,
313             0x1FAC,
314             0x1FAD,
315             0x1FAE,
316             0x1FAF,
317             0x1FA8,
318             0x1FA9,
319             0x1FAA,
320             0x1FAB,
321             0x1FAC,
322             0x1FAD,
323             0x1FAE,
324             0x1FAF,
325             0x1FBC,
326             0x1FBC,
327             0x1FCC,
328             0x1FCC,
329             0x1FFC,
330             0x1FFC,
331         };
332         int32_t num = UPRV_LENGTHOF(expected);
333         for(i=0; i<num; i++){
334             if(!u_istitle(expected[i])){
335                 log_err("u_istitle failed for 0x%4X. Expected true, got false\n",expected[i]);
336             }
337         }
338 
339     }
340 }
341 
342 /* compare two sets and verify that their difference or intersection is empty */
343 static UBool
showADiffB(const USet *a, const USet *b, const char *a_name, const char *b_name, UBool expect, UBool diffIsError)344 showADiffB(const USet *a, const USet *b,
345            const char *a_name, const char *b_name,
346            UBool expect, UBool diffIsError) {
347     USet *aa;
348     int32_t i, start, end, length;
349     UErrorCode errorCode;
350 
351     /*
352      * expect:
353      * true  -> a-b should be empty, that is, b should contain all of a
354      * false -> a&b should be empty, that is, a should contain none of b (and vice versa)
355      */
356     if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
357         return true;
358     }
359 
360     /* clone a to aa because a is const */
361     aa=uset_open(1, 0);
362     if(aa==NULL) {
363         /* unusual problem - out of memory? */
364         return false;
365     }
366     uset_addAll(aa, a);
367 
368     /* compute the set in question */
369     if(expect) {
370         /* a-b */
371         uset_removeAll(aa, b);
372     } else {
373         /* a&b */
374         uset_retainAll(aa, b);
375     }
376 
377     /* aa is not empty because of the initial tests above; show its contents */
378     errorCode=U_ZERO_ERROR;
379     i=0;
380     for(;;) {
381         length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
382         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
383             break; /* done */
384         }
385         if(U_FAILURE(errorCode)) {
386             log_err("error comparing %s with %s at difference item %d: %s\n",
387                 a_name, b_name, i, u_errorName(errorCode));
388             break;
389         }
390         if(length!=0) {
391             break; /* done with code points, got a string or -1 */
392         }
393 
394         if(diffIsError) {
395             if(expect) {
396                 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
397             } else {
398                 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
399             }
400         } else {
401             if(expect) {
402                 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
403             } else {
404                 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
405             }
406         }
407 
408         ++i;
409     }
410 
411     uset_close(aa);
412     return false;
413 }
414 
415 static UBool
showAMinusB(const USet *a, const USet *b, const char *a_name, const char *b_name, UBool diffIsError)416 showAMinusB(const USet *a, const USet *b,
417             const char *a_name, const char *b_name,
418             UBool diffIsError) {
419     return showADiffB(a, b, a_name, b_name, true, diffIsError);
420 }
421 
422 static UBool
showAIntersectB(const USet *a, const USet *b, const char *a_name, const char *b_name, UBool diffIsError)423 showAIntersectB(const USet *a, const USet *b,
424                 const char *a_name, const char *b_name,
425                 UBool diffIsError) {
426     return showADiffB(a, b, a_name, b_name, false, diffIsError);
427 }
428 
429 static UBool
compareUSets(const USet *a, const USet *b, const char *a_name, const char *b_name, UBool diffIsError)430 compareUSets(const USet *a, const USet *b,
431              const char *a_name, const char *b_name,
432              UBool diffIsError) {
433     /*
434      * Use an arithmetic & not a logical && so that both branches
435      * are always taken and all differences are shown.
436      */
437     return
438         showAMinusB(a, b, a_name, b_name, diffIsError) &
439         showAMinusB(b, a, b_name, a_name, diffIsError);
440 }
441 
442 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
TestLetterNumbernull443 static void TestLetterNumber()
444 {
445     UChar i = 0x0000;
446 
447     log_verbose("Testing for isalpha\n");
448     for (i = 0x0041; i < 0x005B; i++) {
449         if (!u_isalpha(i))
450         {
451             log_err("Failed isLetter test at  %.4X\n", i);
452         }
453     }
454     for (i = 0x0660; i < 0x066A; i++) {
455         if (u_isalpha(i))
456         {
457             log_err("Failed isLetter test with numbers at %.4X\n", i);
458         }
459     }
460 
461     log_verbose("Testing for isdigit\n");
462     for (i = 0x0660; i < 0x066A; i++) {
463         if (!u_isdigit(i))
464         {
465             log_verbose("Failed isNumber test at %.4X\n", i);
466         }
467     }
468 
469     log_verbose("Testing for isalnum\n");
470     for (i = 0x0041; i < 0x005B; i++) {
471         if (!u_isalnum(i))
472         {
473             log_err("Failed isAlNum test at  %.4X\n", i);
474         }
475     }
476     for (i = 0x0660; i < 0x066A; i++) {
477         if (!u_isalnum(i))
478         {
479             log_err("Failed isAlNum test at  %.4X\n", i);
480         }
481     }
482 
483     {
484         /*
485          * The following checks work only starting from Unicode 4.0.
486          * Check the version number here.
487          */
488         static UVersionInfo u401={ 4, 0, 1, 0 };
489         UVersionInfo version;
490         u_getUnicodeVersion(version);
491         if(version[0]<4 || 0==memcmp(version, u401, 4)) {
492             return;
493         }
494     }
495 
496     {
497         /*
498          * Sanity check:
499          * Verify that exactly the digit characters have decimal digit values.
500          * This assumption is used in the implementation of u_digit()
501          * (which checks nt=de)
502          * compared with the parallel java.lang.Character.digit()
503          * (which checks Nd).
504          *
505          * This was not true in Unicode 3.2 and earlier.
506          * Unicode 4.0 fixed discrepancies.
507          * Unicode 4.0.1 re-introduced problems in this area due to an
508          * unintentionally incomplete last-minute change.
509          */
510         U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
511         U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
512 
513         USet *digits, *decimalValues;
514         UErrorCode errorCode;
515 
516         U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
517         U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
518         errorCode=U_ZERO_ERROR;
519         digits=uset_openPattern(digitsPattern, 6, &errorCode);
520         decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
521 
522         if(U_SUCCESS(errorCode)) {
523             compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", true);
524         }
525 
526         uset_close(digits);
527         uset_close(decimalValues);
528     }
529 }
530 
531 static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
532                                 const UChar32 *sampleChars, int32_t sampleCharsLength,
533                                 UBool expected) {
534     int32_t i;
535     for (i = 0; i < sampleCharsLength; ++i) {
536         UBool result = propFn(sampleChars[i]);
537         if (result != expected) {
538             log_err("error: character property function %s(U+%04x)=%d is wrong\n",
539                     propName, sampleChars[i], result);
540         }
541     }
542 }
543 
544 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
TestMiscnull545 static void TestMisc()
546 {
547     static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
548     static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
549     static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
550     static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
551     static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
552     static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
553 /*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
554     static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
555     static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
556     static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
557     static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
558 
559     static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
560 
561     uint32_t mask;
562 
563     int32_t i;
564     char icuVersion[U_MAX_VERSION_STRING_LENGTH];
565     UVersionInfo realVersion;
566 
567     memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
568 
569     testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, UPRV_LENGTHOF(sampleSpaces), true);
570     testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), false);
571 
572     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
573                         sampleSpaces, UPRV_LENGTHOF(sampleSpaces), true);
574     testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
575                         sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), false);
576 
577     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
578                         sampleWhiteSpaces, UPRV_LENGTHOF(sampleWhiteSpaces), true);
579     testSampleCharProps(u_isWhitespace, "u_isWhitespace",
580                         sampleNonWhiteSpaces, UPRV_LENGTHOF(sampleNonWhiteSpaces), false);
581 
582     testSampleCharProps(u_isdefined, "u_isdefined",
583                         sampleDefined, UPRV_LENGTHOF(sampleDefined), true);
584     testSampleCharProps(u_isdefined, "u_isdefined",
585                         sampleUndefined, UPRV_LENGTHOF(sampleUndefined), false);
586 
587     testSampleCharProps(u_isbase, "u_isbase", sampleBase, UPRV_LENGTHOF(sampleBase), true);
588     testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, UPRV_LENGTHOF(sampleNonBase), false);
589 
590     testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, UPRV_LENGTHOF(sampleDigits), true);
591     testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, UPRV_LENGTHOF(sampleNonDigits), false);
592 
593     for (i = 0; i < UPRV_LENGTHOF(sampleDigits); i++) {
594         if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
595             log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
596                     sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
597         }
598     }
599 
600     /* Tests the ICU version #*/
601     u_getVersion(realVersion);
602     u_versionToString(realVersion, icuVersion);
603     if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
604     {
605         log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
606     }
607 #if defined(ICU_VERSION)
608     /* test only happens where we have configure.in with VERSION - sanity check. */
609     if(strcmp(U_ICU_VERSION, ICU_VERSION))
610     {
611         log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
612     }
613 #endif
614 
615     /* test U_GC_... */
616     if(
617         U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
618         U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
619         U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
620         U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
621         U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
622         U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
623     ) {
624         log_err("error: U_GET_GC_MASK does not work properly\n");
625     }
626 
627     mask=0;
628     mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
629 
630     mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
631     mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
632     mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
633     mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
634     mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
635 
636     mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
637     mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
638     mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
639 
640     mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
641     mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
642     mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
643 
644     mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
645     mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
646     mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
647 
648     mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
649     mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
650     mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
651     mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
652 
653     mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
654     mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
655     mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
656     mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
657     mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
658 
659     mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
660     mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
661     mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
662     mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
663 
664     mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
665     mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
666 
667     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
668         log_err("error: problems with U_GC_XX_MASK constants\n");
669     }
670 
671     mask=0;
672     mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
673     mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
674     mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
675     mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
676     mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
677     mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
678     mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
679 
680     if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
681         log_err("error: problems with U_GC_Y_MASK constants\n");
682     }
683     {
684         static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
685         for(i=0; i<10; i++){
686             if(digit[i]!=u_forDigit(i,10)){
687                 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
688             }
689         }
690     }
691 
692     /* test u_digit() */
693     {
694         static const struct {
695             UChar32 c;
696             int8_t radix, value;
697         } data[]={
698             /* base 16 */
699             { 0x0031, 16, 1 },
700             { 0x0038, 16, 8 },
701             { 0x0043, 16, 12 },
702             { 0x0066, 16, 15 },
703             { 0x00e4, 16, -1 },
704             { 0x0662, 16, 2 },
705             { 0x06f5, 16, 5 },
706             { 0xff13, 16, 3 },
707             { 0xff41, 16, 10 },
708 
709             /* base 8 */
710             { 0x0031, 8, 1 },
711             { 0x0038, 8, -1 },
712             { 0x0043, 8, -1 },
713             { 0x0066, 8, -1 },
714             { 0x00e4, 8, -1 },
715             { 0x0662, 8, 2 },
716             { 0x06f5, 8, 5 },
717             { 0xff13, 8, 3 },
718             { 0xff41, 8, -1 },
719 
720             /* base 36 */
721             { 0x5a, 36, 35 },
722             { 0x7a, 36, 35 },
723             { 0xff3a, 36, 35 },
724             { 0xff5a, 36, 35 },
725 
726             /* wrong radix values */
727             { 0x0031, 1, -1 },
728             { 0xff3a, 37, -1 }
729         };
730 
731         for(i=0; i<UPRV_LENGTHOF(data); ++i) {
732             if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
733                 log_err("u_digit(U+%04x, %d)=%d expected %d\n",
734                         data[i].c,
735                         data[i].radix,
736                         u_digit(data[i].c, data[i].radix),
737                         data[i].value);
738             }
739         }
740     }
741 }
742 
743 /* test C/POSIX-style functions --------------------------------------------- */
744 
745 /* bit flags */
746 #define ISAL     1
747 #define ISLO     2
748 #define ISUP     4
749 
750 #define ISDI     8
751 #define ISXD  0x10
752 
753 #define ISAN  0x20
754 
755 #define ISPU  0x40
756 #define ISGR  0x80
757 #define ISPR 0x100
758 
759 #define ISSP 0x200
760 #define ISBL 0x400
761 #define ISCN 0x800
762 
763 /* C/POSIX-style functions, in the same order as the bit flags */
764 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
765 
766 static const struct {
767     IsPOSIXClass *fn;
768     const char *name;
769 } posixClasses[]={
770     { u_isalpha, "isalpha" },
771     { u_islower, "islower" },
772     { u_isupper, "isupper" },
773     { u_isdigit, "isdigit" },
774     { u_isxdigit, "isxdigit" },
775     { u_isalnum, "isalnum" },
776     { u_ispunct, "ispunct" },
777     { u_isgraph, "isgraph" },
778     { u_isprint, "isprint" },
779     { u_isspace, "isspace" },
780     { u_isblank, "isblank" },
781     { u_iscntrl, "iscntrl" }
782 };
783 
784 static const struct {
785     UChar32 c;
786     uint32_t posixResults;
787 } posixData[]={
788     { 0x0008,                                                        ISCN },    /* backspace */
789     { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
790     { 0x000a,                                              ISSP|     ISCN },    /* LF */
791     { 0x000c,                                              ISSP|     ISCN },    /* FF */
792     { 0x000d,                                              ISSP|     ISCN },    /* CR */
793     { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
794     { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
795     { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
796     { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
797     { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
798     { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
799     { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
800     { 0x0085,                                              ISSP|     ISCN },    /* NEL */
801     { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
802     { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
803     { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
804     { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
805     { 0x0600,                                                        ISCN },    /* arabic number sign */
806     { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
807     { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
808     { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
809     { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
810     { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
811     { 0x200b,                                                        ISCN },    /* ZWSP */
812   /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
813     { 0x200e,                                                        ISCN },    /* LRM */
814     { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
815     { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
816     { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
817     { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
818     { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
819     { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
820     { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
821     { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
822 };
823 
824 static void
TestPOSIXnull825 TestPOSIX() {
826     uint32_t mask;
827     int32_t cl, i;
828     UBool expect;
829 
830     mask=1;
831     for(cl=0; cl<12; ++cl) {
832         for(i=0; i<UPRV_LENGTHOF(posixData); ++i) {
833             expect=(UBool)((posixData[i].posixResults&mask)!=0);
834             if(posixClasses[cl].fn(posixData[i].c)!=expect) {
835                 log_err("u_%s(U+%04x)=%s is wrong\n",
836                     posixClasses[cl].name, posixData[i].c, expect ? "false" : "true");
837             }
838         }
839         mask<<=1;
840     }
841 }
842 
843 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
TestControlPrintnull844 static void TestControlPrint()
845 {
846     const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
847     const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
848     const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
849     const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
850     UChar32 c;
851 
852     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, UPRV_LENGTHOF(sampleControl), true);
853     testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, UPRV_LENGTHOF(sampleNonControl), false);
854 
855     testSampleCharProps(u_isprint, "u_isprint",
856                         samplePrintable, UPRV_LENGTHOF(samplePrintable), true);
857     testSampleCharProps(u_isprint, "u_isprint",
858                         sampleNonPrintable, UPRV_LENGTHOF(sampleNonPrintable), false);
859 
860     /* test all ISO 8 controls */
861     for(c=0; c<=0x9f; ++c) {
862         if(c==0x20) {
863             /* skip ASCII graphic characters and continue with DEL */
864             c=0x7f;
865         }
866         if(!u_iscntrl(c)) {
867             log_err("error: u_iscntrl(ISO 8 control U+%04x)=false\n", c);
868         }
869         if(!u_isISOControl(c)) {
870             log_err("error: u_isISOControl(ISO 8 control U+%04x)=false\n", c);
871         }
872         if(u_isprint(c)) {
873             log_err("error: u_isprint(ISO 8 control U+%04x)=true\n", c);
874         }
875     }
876 
877     /* test all Latin-1 graphic characters */
878     for(c=0x20; c<=0xff; ++c) {
879         if(c==0x7f) {
880             c=0xa0;
881         } else if(c==0xad) {
882             /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
883             ++c;
884         }
885         if(!u_isprint(c)) {
886             log_err("error: u_isprint(Latin-1 graphic character U+%04x)=false\n", c);
887         }
888     }
889 }
890 
891 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
TestIdentifiernull892 static void TestIdentifier()
893 {
894     const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
895     const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
896     const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
897     const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
898     const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
899     const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
900     const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
901     const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
902     const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
903     const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
904 
905     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
906                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), true);
907     testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
908                         sampleNonJavaIDStart, UPRV_LENGTHOF(sampleNonJavaIDStart), false);
909 
910     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
911                         sampleJavaIDPart, UPRV_LENGTHOF(sampleJavaIDPart), true);
912     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
913                         sampleNonJavaIDPart, UPRV_LENGTHOF(sampleNonJavaIDPart), false);
914 
915     /* IDPart should imply IDStart */
916     testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
917                         sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), true);
918 
919     testSampleCharProps(u_isIDStart, "u_isIDStart",
920                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), true);
921     testSampleCharProps(u_isIDStart, "u_isIDStart",
922                         sampleNonUnicodeIDStart, UPRV_LENGTHOF(sampleNonUnicodeIDStart), false);
923 
924     testSampleCharProps(u_isIDPart, "u_isIDPart",
925                         sampleUnicodeIDPart, UPRV_LENGTHOF(sampleUnicodeIDPart), true);
926     testSampleCharProps(u_isIDPart, "u_isIDPart",
927                         sampleNonUnicodeIDPart, UPRV_LENGTHOF(sampleNonUnicodeIDPart), false);
928 
929     /* IDPart should imply IDStart */
930     testSampleCharProps(u_isIDPart, "u_isIDPart",
931                         sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), true);
932 
933     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
934                         sampleIDIgnore, UPRV_LENGTHOF(sampleIDIgnore), true);
935     testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
936                         sampleNonIDIgnore, UPRV_LENGTHOF(sampleNonIDIgnore), false);
937 }
938 
939 /* for each line of UnicodeData.txt, check some of the properties */
940 typedef struct UnicodeDataContext {
941 #if UCONFIG_NO_NORMALIZATION
942     const void *dummy;
943 #else
944     const UNormalizer2 *nfc;
945     const UNormalizer2 *nfkc;
946 #endif
947 } UnicodeDataContext;
948 
949 /*
950  * ### TODO
951  * This test fails incorrectly if the First or Last code point of a repetitive area
952  * is overridden, which is allowed and is encouraged for the PUAs.
953  * Currently, this means that both area First/Last and override lines are
954  * tested against the properties from the API,
955  * and the area boundary will not match and cause an error.
956  *
957  * This function should detect area boundaries and skip them for the test of individual
958  * code points' properties.
959  * Then it should check that the areas contain all the same properties except where overridden.
960  * For this, it would have had to set a flag for which code points were listed explicitly.
961  */
962 static void U_CALLCONV
unicodeDataLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode)963 unicodeDataLineFn(void *context,
964                   char *fields[][2], int32_t fieldCount,
965                   UErrorCode *pErrorCode)
966 {
967     (void)fieldCount; // suppress compiler warnings about unused variable
968     char buffer[100];
969     const char *d;
970     char *end;
971     uint32_t value;
972     UChar32 c;
973     int32_t i;
974     int8_t type;
975     int32_t dt;
976     UChar dm[32], s[32];
977     int32_t dmLength, length;
978 
979 #if !UCONFIG_NO_NORMALIZATION
980     const UNormalizer2 *nfc, *nfkc;
981 #endif
982 
983     /* get the character code, field 0 */
984     c=strtoul(fields[0][0], &end, 16);
985     if(end<=fields[0][0] || end!=fields[0][1]) {
986         log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
987         return;
988     }
989     if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
990         log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
991         return;
992     }
993 
994     /* get general category, field 2 */
995     *fields[2][1]=0;
996     type = (int8_t)tagValues[MakeProp(fields[2][0])];
997     if(u_charType(c)!=type) {
998         log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
999     }
1000     if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1001         log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1002     }
1003 
1004     /* get canonical combining class, field 3 */
1005     value=strtoul(fields[3][0], &end, 10);
1006     if(end<=fields[3][0] || end!=fields[3][1]) {
1007         log_err("error: syntax error in field 3 at code 0x%lx\n", c);
1008         return;
1009     }
1010     if(value>255) {
1011         log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
1012         return;
1013     }
1014 #if !UCONFIG_NO_NORMALIZATION
1015     if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1016         log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1017     }
1018     nfkc=((UnicodeDataContext *)context)->nfkc;
1019     if(value!=unorm2_getCombiningClass(nfkc, c)) {
1020         log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
1021     }
1022 #endif
1023 
1024     /* get BiDi category, field 4 */
1025     *fields[4][1]=0;
1026     i=MakeDir(fields[4][0]);
1027     if(i!=(int32_t)u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1028         log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1029     }
1030 
1031     /* get Decomposition_Type & Decomposition_Mapping, field 5 */
1032     d=NULL;
1033     if(fields[5][0]==fields[5][1]) {
1034         /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
1035         if(c==0xac00 || c==0xd7a3) {
1036             dt=U_DT_CANONICAL;
1037         } else {
1038             dt=U_DT_NONE;
1039         }
1040     } else {
1041         d=fields[5][0];
1042         *fields[5][1]=0;
1043         dt=UCHAR_INVALID_CODE;
1044         if(*d=='<') {
1045             end=strchr(++d, '>');
1046             if(end!=NULL) {
1047                 *end=0;
1048                 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
1049                 d=u_skipWhitespace(end+1);
1050             }
1051         } else {
1052             dt=U_DT_CANONICAL;
1053         }
1054     }
1055     if(dt>U_DT_NONE) {
1056         if(c==0xac00) {
1057             dm[0]=0x1100;
1058             dm[1]=0x1161;
1059             dm[2]=0;
1060             dmLength=2;
1061         } else if(c==0xd7a3) {
1062             dm[0]=0xd788;
1063             dm[1]=0x11c2;
1064             dm[2]=0;
1065             dmLength=2;
1066         } else {
1067             dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
1068         }
1069     } else {
1070         dmLength=-1;
1071     }
1072     if(dt<0 || U_FAILURE(*pErrorCode)) {
1073         log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
1074         return;
1075     }
1076 #if !UCONFIG_NO_NORMALIZATION
1077     i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
1078     if(i!=dt) {
1079         log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
1080     }
1081     /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
1082     length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
1083     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1084         log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
1085                 "or the Decomposition_Mapping is different (%s)\n",
1086                 c, length, dmLength, u_errorName(*pErrorCode));
1087         return;
1088     }
1089     /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
1090     if(dt!=U_DT_CANONICAL) {
1091         dmLength=-1;
1092     }
1093     nfc=((UnicodeDataContext *)context)->nfc;
1094     length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
1095     if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1096         log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
1097                 "or the Decomposition_Mapping is different (%s)\n",
1098                 c, length, dmLength, u_errorName(*pErrorCode));
1099         return;
1100     }
1101     /* recompose */
1102     if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
1103         UChar32 a, b, composite;
1104         i=0;
1105         U16_NEXT(dm, i, dmLength, a);
1106         U16_NEXT(dm, i, dmLength, b);
1107         /* i==dmLength */
1108         composite=unorm2_composePair(nfc, a, b);
1109         if(composite!=c) {
1110             log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
1111                     (long)c, (long)a, (long)b, (long)composite);
1112         }
1113         /*
1114          * Note: NFKC has fewer round-trip mappings than NFC,
1115          * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
1116          */
1117     }
1118 #endif
1119 
1120     /* get ISO Comment, field 11 */
1121     *fields[11][1]=0;
1122     i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1123     if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
1124         log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
1125             c, u_errorName(*pErrorCode),
1126             U_FAILURE(*pErrorCode) ? buffer : "[error]",
1127             fields[11][0]);
1128     }
1129 
1130     /* get uppercase mapping, field 12 */
1131     if(fields[12][0]!=fields[12][1]) {
1132         value=strtoul(fields[12][0], &end, 16);
1133         if(end!=fields[12][1]) {
1134             log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1135             return;
1136         }
1137         if((UChar32)value!=u_toupper(c)) {
1138             log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1139         }
1140     } else {
1141         /* no case mapping: the API must map the code point to itself */
1142         if(c!=u_toupper(c)) {
1143             log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1144         }
1145     }
1146 
1147     /* get lowercase mapping, field 13 */
1148     if(fields[13][0]!=fields[13][1]) {
1149         value=strtoul(fields[13][0], &end, 16);
1150         if(end!=fields[13][1]) {
1151             log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1152             return;
1153         }
1154         if((UChar32)value!=u_tolower(c)) {
1155             log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1156         }
1157     } else {
1158         /* no case mapping: the API must map the code point to itself */
1159         if(c!=u_tolower(c)) {
1160             log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1161         }
1162     }
1163 
1164     /* get titlecase mapping, field 14 */
1165     if(fields[14][0]!=fields[14][1]) {
1166         value=strtoul(fields[14][0], &end, 16);
1167         if(end!=fields[14][1]) {
1168             log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1169             return;
1170         }
1171         if((UChar32)value!=u_totitle(c)) {
1172             log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1173         }
1174     } else {
1175         /* no case mapping: the API must map the code point to itself */
1176         if(c!=u_totitle(c)) {
1177             log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1178         }
1179     }
1180 }
1181 
1182 static UBool U_CALLCONV
enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)1183 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1184     static const UChar32 test[][2]={
1185         {0x41, U_UPPERCASE_LETTER},
1186         {0x308, U_NON_SPACING_MARK},
1187         {0xfffe, U_GENERAL_OTHER_TYPES},
1188         {0xe0041, U_FORMAT_CHAR},
1189         {0xeffff, U_UNASSIGNED}
1190     };
1191 
1192     int32_t i, count;
1193 
1194     if(0!=strcmp((const char *)context, "a1")) {
1195         log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1196         return false;
1197     }
1198 
1199     count=UPRV_LENGTHOF(test);
1200     for(i=0; i<count; ++i) {
1201         if(start<=test[i][0] && test[i][0]<limit) {
1202             if(type!=(UCharCategory)test[i][1]) {
1203                 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1204                         start, limit, (long)type, test[i][0], test[i][1]);
1205             }
1206             /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1207             return i==(count-1) ? false : true;
1208         }
1209     }
1210 
1211     if(start>test[count-1][0]) {
1212         log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1213                 start, limit, (long)type);
1214         return false;
1215     }
1216 
1217     return true;
1218 }
1219 
1220 static UBool U_CALLCONV
enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)1221 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1222     (void)context; // suppress compiler warnings about unused variable
1223 
1224     /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
1225     static const int32_t defaultBidi[][2]={ /* { limit, class } */
1226         { 0x0590, U_LEFT_TO_RIGHT },
1227         { 0x0600, U_RIGHT_TO_LEFT },
1228         { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1229         { 0x0860, U_RIGHT_TO_LEFT },
1230         { 0x0870, U_RIGHT_TO_LEFT_ARABIC },  // Unicode 10 changes U+0860..U+086F from R to AL.
1231         { 0x08A0, U_RIGHT_TO_LEFT_ARABIC },  // Unicode 14 changes U+0870..U+089F from R to AL.
1232         { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
1233         { 0x20A0, U_LEFT_TO_RIGHT },
1234         { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR },  /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */
1235         { 0xFB1D, U_LEFT_TO_RIGHT },
1236         { 0xFB50, U_RIGHT_TO_LEFT },
1237         { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1238         { 0xFE70, U_LEFT_TO_RIGHT },
1239         { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1240 
1241         { 0x10800, U_LEFT_TO_RIGHT },
1242         { 0x10D00, U_RIGHT_TO_LEFT },  // Unicode 11 changes U+10D00..U+10D3F from R to AL.
1243         { 0x10D40, U_RIGHT_TO_LEFT_ARABIC },
1244         { 0x10EC0, U_RIGHT_TO_LEFT },  // Unicode 15 changes U+10EC0..U+10EFF from R to AL.
1245         { 0x10F00, U_RIGHT_TO_LEFT_ARABIC },
1246         { 0x10F30, U_RIGHT_TO_LEFT },  // Unicode 11 changes U+10F30..U+10F6F from R to AL.
1247         { 0x10F70, U_RIGHT_TO_LEFT_ARABIC },
1248         { 0x11000, U_RIGHT_TO_LEFT },
1249 
1250         { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1251         { 0x1EC70, U_RIGHT_TO_LEFT },  // Unicode 11 changes U+1EC70..U+1ECBF from R to AL.
1252         { 0x1ECC0, U_RIGHT_TO_LEFT_ARABIC },
1253         { 0x1ED00, U_RIGHT_TO_LEFT },  // Unicode 12 changes U+1ED00..U+1ED4F from R to AL.
1254         { 0x1ED50, U_RIGHT_TO_LEFT_ARABIC },
1255         { 0x1EE00, U_RIGHT_TO_LEFT },
1256         { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
1257         { 0x1F000, U_RIGHT_TO_LEFT },
1258         { 0x110000, U_LEFT_TO_RIGHT }
1259     };
1260 
1261     UChar32 c;
1262     int32_t i;
1263     UCharDirection shouldBeDir;
1264 
1265     /*
1266      * LineBreak.txt specifies:
1267      *   #  - Assigned characters that are not listed explicitly are given the value
1268      *   #    "AL".
1269      *   #  - Unassigned characters are given the value "XX".
1270      *
1271      * PUA characters are listed explicitly with "XX".
1272      * Verify that no assigned character has "XX".
1273      */
1274     if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1275         c=start;
1276         while(c<limit) {
1277             if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1278                 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1279             }
1280             ++c;
1281         }
1282     }
1283 
1284     /*
1285      * Verify default Bidi classes.
1286      * See DerivedBidiClass.txt, especially for unassigned code points.
1287      */
1288     if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1289         /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1290         c=start;
1291         for(i=0; i<UPRV_LENGTHOF(defaultBidi) && c<limit; ++i) {
1292             if((int32_t)c<defaultBidi[i][0]) {
1293                 while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1294                     if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1295                         shouldBeDir=U_BOUNDARY_NEUTRAL;
1296                     } else {
1297                         shouldBeDir=(UCharDirection)defaultBidi[i][1];
1298                     }
1299 
1300                     if( u_charDirection(c)!=shouldBeDir ||
1301                         (UCharDirection)u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1302                     ) {
1303                         log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1304                             c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1305                     }
1306                     ++c;
1307                 }
1308             }
1309         }
1310     }
1311 
1312     return true;
1313 }
1314 
1315 /* tests for several properties */
TestUnicodeDatanull1316 static void TestUnicodeData()
1317 {
1318     UVersionInfo expectVersionArray;
1319     UVersionInfo versionArray;
1320     char *fields[15][2];
1321     UErrorCode errorCode;
1322     UChar32 c;
1323     int8_t type;
1324 
1325     UnicodeDataContext context;
1326 
1327     u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1328     u_getUnicodeVersion(versionArray);
1329     if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1330     {
1331         log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1332         versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1333     }
1334 
1335 #if defined(ICU_UNICODE_VERSION)
1336     /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1337     if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1338     {
1339          log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1340     }
1341 #endif
1342 
1343     if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1344         log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1345     }
1346 
1347     errorCode=U_ZERO_ERROR;
1348 #if !UCONFIG_NO_NORMALIZATION
1349     context.nfc=unorm2_getNFCInstance(&errorCode);
1350     context.nfkc=unorm2_getNFKCInstance(&errorCode);
1351     if(U_FAILURE(errorCode)) {
1352         log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
1353         return;
1354     }
1355 #endif
1356     parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
1357     if(U_FAILURE(errorCode)) {
1358         return; /* if we couldn't parse UnicodeData.txt, we should return */
1359     }
1360 
1361     /* sanity check on repeated properties */
1362     for(c=0xfffe; c<=0x10ffff;) {
1363         type=u_charType(c);
1364         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1365             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1366         }
1367         if(type!=U_UNASSIGNED) {
1368             log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1369         }
1370         if((c&0xffff)==0xfffe) {
1371             ++c;
1372         } else {
1373             c+=0xffff;
1374         }
1375     }
1376 
1377     /* test that PUA is not "unassigned" */
1378     for(c=0xe000; c<=0x10fffd;) {
1379         type=u_charType(c);
1380         if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1381             log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1382         }
1383         if(type==U_UNASSIGNED) {
1384             log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1385         } else if(type!=U_PRIVATE_USE_CHAR) {
1386             log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1387         }
1388         if(c==0xf8ff) {
1389             c=0xf0000;
1390         } else if(c==0xffffd) {
1391             c=0x100000;
1392         } else {
1393             ++c;
1394         }
1395     }
1396 
1397     /* test u_enumCharTypes() */
1398     u_enumCharTypes(enumTypeRange, "a1");
1399 
1400     /* check default properties */
1401     u_enumCharTypes(enumDefaultsRange, NULL);
1402 }
1403 
TestCodeUnitnull1404 static void TestCodeUnit(){
1405     const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1406 
1407     int32_t i;
1408 
1409     for(i=0; i<UPRV_LENGTHOF(codeunit); i++){
1410         UChar c=codeunit[i];
1411         if(i<4){
1412             if(!(U16_IS_SINGLE(c)) || (U16_IS_LEAD(c)) || (U16_IS_TRAIL(c)) ||
1413                     U16_IS_SURROGATE(c) || U_IS_SURROGATE(c)) {
1414                 log_err("ERROR: U+%04x is a single", c);
1415             }
1416 
1417         }
1418         if(i >= 4 && i< 8){
1419             if(!(U16_IS_LEAD(c)) || U16_IS_SINGLE(c) || U16_IS_TRAIL(c) ||
1420                     !U16_IS_SURROGATE(c) || !U_IS_SURROGATE(c)){
1421                 log_err("ERROR: U+%04x is a first surrogate", c);
1422             }
1423         }
1424         if(i >= 8 && i< 12){
1425             if(!(U16_IS_TRAIL(c)) || U16_IS_SINGLE(c) || U16_IS_LEAD(c) ||
1426                     !U16_IS_SURROGATE(c) || !U_IS_SURROGATE(c)){
1427                 log_err("ERROR: U+%04x is a second surrogate", c);
1428             }
1429         }
1430 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1431         if(i<4){
1432             if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1433                 log_err("ERROR: U+%04x is a single", c);
1434             }
1435 
1436         }
1437         if(i >= 4 && i< 8){
1438             if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1439                 log_err("ERROR: U+%04x is a first surrogate", c);
1440             }
1441         }
1442         if(i >= 8 && i< 12){
1443             if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1444                 log_err("ERROR: U+%04x is a second surrogate", c);
1445             }
1446         }
1447 #endif
1448     }
1449 }
1450 
TestCodePointnull1451 static void TestCodePoint(){
1452     const UChar32 codePoint[]={
1453         /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1454         0xd800,
1455         0xdbff,
1456         0xdc00,
1457         0xdfff,
1458         0xdc04,
1459         0xd821,
1460         /*not a surrogate, valid, isUnicodeChar , not Error*/
1461         0x20ac,
1462         0xd7ff,
1463         0xe000,
1464         0xe123,
1465         0x0061,
1466         0xe065,
1467         0x20402,
1468         0x24506,
1469         0x23456,
1470         0x20402,
1471         0x10402,
1472         0x23456,
1473         /*not a surrogate, not valid, isUnicodeChar, isError */
1474         0x0015,
1475         0x009f,
1476         /*not a surrogate, not valid, not isUnicodeChar, isError */
1477         0xffff,
1478         0xfffe,
1479     };
1480     int32_t i;
1481     for(i=0; i<UPRV_LENGTHOF(codePoint); i++) {
1482         UChar32 c=codePoint[i];
1483         if(i<6) {
1484             if(!U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)) {
1485                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1486             }
1487             if(U_IS_UNICODE_CHAR(c)) {
1488                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1489             }
1490         } else if(i >=6 && i<18) {
1491             if(U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)) {
1492                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1493             }
1494             if(!U_IS_UNICODE_CHAR(c)) {
1495                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1496             }
1497         } else if(i >=18 && i<20) {
1498             if(U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)) {
1499                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1500             }
1501             if(!U_IS_UNICODE_CHAR(c)) {
1502                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1503             }
1504         } else if(i >=18 && i<UPRV_LENGTHOF(codePoint)) {
1505             if(U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)) {
1506                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1507             }
1508             if(U_IS_UNICODE_CHAR(c)) {
1509                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1510             }
1511         }
1512 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1513         if(i<6){
1514             if(!UTF_IS_SURROGATE(c)){
1515                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1516             }
1517             if(UTF_IS_VALID(c)){
1518                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1519             }
1520             if(UTF_IS_UNICODE_CHAR(c)){
1521                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1522             }
1523             if(UTF_IS_ERROR(c)){
1524                 log_err("ERROR: isError() failed for U+%04x\n", c);
1525             }
1526         }else if(i >=6 && i<18){
1527             if(UTF_IS_SURROGATE(c)){
1528                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1529             }
1530             if(!UTF_IS_VALID(c)){
1531                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1532             }
1533             if(!UTF_IS_UNICODE_CHAR(c)){
1534                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1535             }
1536             if(UTF_IS_ERROR(c)){
1537                 log_err("ERROR: isError() failed for U+%04x\n", c);
1538             }
1539         }else if(i >=18 && i<20){
1540             if(UTF_IS_SURROGATE(c)){
1541                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1542             }
1543             if(UTF_IS_VALID(c)){
1544                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1545             }
1546             if(!UTF_IS_UNICODE_CHAR(c)){
1547                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1548             }
1549             if(!UTF_IS_ERROR(c)){
1550                 log_err("ERROR: isError() failed for U+%04x\n", c);
1551             }
1552         }
1553         else if(i >=18 && i<UPRV_LENGTHOF(codePoint)){
1554             if(UTF_IS_SURROGATE(c)){
1555                 log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1556             }
1557             if(UTF_IS_VALID(c)){
1558                 log_err("ERROR: isValid() failed for U+%04x\n", c);
1559             }
1560             if(UTF_IS_UNICODE_CHAR(c)){
1561                 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1562             }
1563             if(!UTF_IS_ERROR(c)){
1564                 log_err("ERROR: isError() failed for U+%04x\n", c);
1565             }
1566         }
1567 #endif
1568     }
1569 
1570     if(
1571         !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1572         !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1573         U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1574         U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1575     ) {
1576         log_err("error with U_IS_BMP()\n");
1577     }
1578 
1579     if(
1580         U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1581         U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1582         U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1583         !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1584     ) {
1585         log_err("error with U_IS_SUPPLEMENTARY()\n");
1586     }
1587 }
1588 
TestCharLengthnull1589 static void TestCharLength()
1590 {
1591     const int32_t codepoint[]={
1592         1, 0x0061,
1593         1, 0xe065,
1594         1, 0x20ac,
1595         2, 0x20402,
1596         2, 0x23456,
1597         2, 0x24506,
1598         2, 0x20402,
1599         2, 0x10402,
1600         1, 0xd7ff,
1601         1, 0xe000
1602     };
1603 
1604     int32_t i;
1605 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1606     UBool multiple;
1607 #endif
1608     for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){
1609         UChar32 c=codepoint[i+1];
1610         if(
1611 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1612                 UTF_CHAR_LENGTH(c) != codepoint[i] ||
1613 #endif
1614                 U16_LENGTH(c) != codepoint[i]) {
1615             log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
1616         }
1617 #if !U_HIDE_OBSOLETE_UTF_OLD_H
1618         multiple=(UBool)(codepoint[i] == 1 ? false : true);
1619         if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1620             log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1621         }
1622 #endif
1623     }
1624 }
1625 
1626 /*internal functions ----*/
MakeProp(char* str)1627 static int32_t MakeProp(char* str)
1628 {
1629     int32_t result = 0;
1630     char* matchPosition =0;
1631 
1632     matchPosition = strstr(tagStrings, str);
1633     if (matchPosition == 0)
1634     {
1635         log_err("unrecognized type letter ");
1636         log_err(str);
1637     }
1638     else
1639         result = (int32_t)((matchPosition - tagStrings) / 2);
1640     return result;
1641 }
1642 
MakeDir(char* str)1643 static int32_t MakeDir(char* str)
1644 {
1645     int32_t pos = 0;
1646     for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) {
1647         if (strcmp(str, dirStrings[pos]) == 0) {
1648             return pos;
1649         }
1650     }
1651     return -1;
1652 }
1653 
1654 /* test u_charName() -------------------------------------------------------- */
1655 
1656 static const struct {
1657     uint32_t code;
1658     const char *name, *oldName, *extName, *alias;
1659 } names[]={
1660     {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A", NULL},
1661     {0x01a2, "LATIN CAPITAL LETTER OI", "",
1662              "LATIN CAPITAL LETTER OI",
1663              "LATIN CAPITAL LETTER GHA"},
1664     {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
1665              "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", NULL},
1666     {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1667              "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1668              "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
1669     {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401", NULL},
1670     {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED", NULL},
1671     {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA", NULL},
1672     {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH", NULL},
1673     {0xd800, "", "", "<lead surrogate-D800>", NULL},
1674     {0xdc00, "", "", "<trail surrogate-DC00>", NULL},
1675     {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS", NULL},
1676     {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN", NULL},
1677     {0xffff, "", "", "<noncharacter-FFFF>", NULL},
1678     {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1679               "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1680               "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
1681     {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456", NULL}
1682 };
1683 
1684 static UBool
enumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)1685 enumCharNamesFn(void *context,
1686                 UChar32 code, UCharNameChoice nameChoice,
1687                 const char *name, int32_t length) {
1688     int32_t *pCount=(int32_t *)context;
1689     const char *expected;
1690     int i;
1691 
1692     if(length<=0 || length!=(int32_t)strlen(name)) {
1693         /* should not be called with an empty string or invalid length */
1694         log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1695         return true;
1696     }
1697 
1698     ++*pCount;
1699     for(i=0; i<UPRV_LENGTHOF(names); ++i) {
1700         if(code==(UChar32)names[i].code) {
1701             switch (nameChoice) {
1702                 case U_EXTENDED_CHAR_NAME:
1703                     if(0!=strcmp(name, names[i].extName)) {
1704                         log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1705                     }
1706                     break;
1707                 case U_UNICODE_CHAR_NAME:
1708                     if(0!=strcmp(name, names[i].name)) {
1709                         log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1710                     }
1711                     break;
1712                 case U_UNICODE_10_CHAR_NAME:
1713                     expected=names[i].oldName;
1714                     if(expected[0]==0 || 0!=strcmp(name, expected)) {
1715                         log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1716                     }
1717                     break;
1718                 case U_CHAR_NAME_ALIAS:
1719                     expected=names[i].alias;
1720                     if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1721                         log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
1722                     }
1723                     break;
1724                 case U_CHAR_NAME_CHOICE_COUNT:
1725                     break;
1726             }
1727             break;
1728         }
1729     }
1730     return true;
1731 }
1732 
1733 struct enumExtCharNamesContext {
1734     uint32_t length;
1735     int32_t last;
1736 };
1737 
1738 static UBool
enumExtCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)1739 enumExtCharNamesFn(void *context,
1740                 UChar32 code, UCharNameChoice nameChoice,
1741                 const char *name, int32_t length) {
1742     struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1743 
1744     if (ecncp->last != (int32_t) code - 1) {
1745         if (ecncp->last < 0) {
1746             log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1747         } else {
1748             log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1749         }
1750     }
1751     ecncp->last = (int32_t) code;
1752 
1753     if (!*name) {
1754         log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1755     }
1756 
1757     return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1758 }
1759 
1760 /**
1761  * This can be made more efficient by moving it into putil.c and having
1762  * it directly access the ebcdic translation tables.
1763  * TODO: If we get this method in putil.c, then delete it from here.
1764  */
1765 static UChar
u_charToUChar(char c)1766 u_charToUChar(char c) {
1767     UChar uc;
1768     u_charsToUChars(&c, &uc, 1);
1769     return uc;
1770 }
1771 
1772 static void
TestCharNamesnull1773 TestCharNames() {
1774     static char name[80];
1775     UErrorCode errorCode=U_ZERO_ERROR;
1776     struct enumExtCharNamesContext extContext;
1777     const char *expected;
1778     int32_t length;
1779     UChar32 c;
1780     int32_t i;
1781 
1782     log_verbose("Testing uprv_getMaxCharNameLength()\n");
1783     length=uprv_getMaxCharNameLength();
1784     if(length==0) {
1785         /* no names data available */
1786         return;
1787     }
1788     if(length<83) { /* Unicode 3.2 max char name length */
1789         log_err("uprv_getMaxCharNameLength()=%d is too short");
1790     }
1791     /* ### TODO same tests for max ISO comment length as for max name length */
1792 
1793     log_verbose("Testing u_charName()\n");
1794     for(i=0; i<UPRV_LENGTHOF(names); ++i) {
1795         /* modern Unicode character name */
1796         length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1797         if(U_FAILURE(errorCode)) {
1798             log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1799             return;
1800         }
1801         if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1802             log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1803         }
1804 
1805         /* find the modern name */
1806         if (*names[i].name) {
1807             c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1808             if(U_FAILURE(errorCode)) {
1809                 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1810                 return;
1811             }
1812             if(c!=(UChar32)names[i].code) {
1813                 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1814             }
1815         }
1816 
1817         /* Unicode 1.0 character name */
1818         length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1819         if(U_FAILURE(errorCode)) {
1820             log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1821             return;
1822         }
1823         if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1824             log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1825         }
1826 
1827         /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1828         if(names[i].oldName[0]!=0 /* && length>0 */) {
1829             c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1830             if(U_FAILURE(errorCode)) {
1831                 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1832                 return;
1833             }
1834             if(c!=(UChar32)names[i].code) {
1835                 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1836             }
1837         }
1838 
1839         /* Unicode character name alias */
1840         length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1841         if(U_FAILURE(errorCode)) {
1842             log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1843             return;
1844         }
1845         expected=names[i].alias;
1846         if(expected==NULL) {
1847             expected="";
1848         }
1849         if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1850             log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1851                     names[i].code, name, length, expected);
1852         }
1853 
1854         /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1855         if(expected[0]!=0 /* && length>0 */) {
1856             c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1857             if(U_FAILURE(errorCode)) {
1858                 log_err("u_charFromName(%s - alias) error %s\n",
1859                         expected, u_errorName(errorCode));
1860                 return;
1861             }
1862             if(c!=(UChar32)names[i].code) {
1863                 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1864                         expected, c, names[i].code);
1865             }
1866         }
1867     }
1868 
1869     /* test u_enumCharNames() */
1870     length=0;
1871     errorCode=U_ZERO_ERROR;
1872     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1873     if(U_FAILURE(errorCode) || length<94140) {
1874         log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1875     }
1876 
1877     extContext.length = 0;
1878     extContext.last = -1;
1879     errorCode=U_ZERO_ERROR;
1880     u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1881     if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1882         log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1883     }
1884 
1885     /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1886     if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1887         log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1888     }
1889 
1890     /* Test getCharNameCharacters */
1891     if(!getTestOption(QUICK_OPTION)) {
1892         enum { BUFSIZE = 256 };
1893         UErrorCode ec = U_ZERO_ERROR;
1894         char buf[BUFSIZE];
1895         int32_t maxLength;
1896         UChar32 cp;
1897         UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1898         int32_t l1, l2;
1899         UBool map[256];
1900         UBool ok;
1901 
1902         USet* set = uset_open(1, 0); /* empty set */
1903         USet* dumb = uset_open(1, 0); /* empty set */
1904 
1905         /*
1906          * uprv_getCharNameCharacters() will likely return more lowercase
1907          * letters than actual character names contain because
1908          * it includes all the characters in lowercased names of
1909          * general categories, for the full possible set of extended names.
1910          */
1911         {
1912             USetAdder sa={
1913                 NULL,
1914                 uset_add,
1915                 uset_addRange,
1916                 uset_addString,
1917                 NULL, /* don't need remove() */
1918                 NULL  /* don't need removeRange() */
1919             };
1920             sa.set=set;
1921             uprv_getCharNameCharacters(&sa);
1922         }
1923 
1924         /* build set the dumb (but sure-fire) way */
1925         for (i=0; i<256; ++i) {
1926             map[i] = false;
1927         }
1928 
1929         maxLength=0;
1930         for (cp=0; cp<0x110000; ++cp) {
1931             int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1932                                      buf, BUFSIZE, &ec);
1933             if (U_FAILURE(ec)) {
1934                 log_err("FAIL: u_charName failed when it shouldn't\n");
1935                 uset_close(set);
1936                 uset_close(dumb);
1937                 return;
1938             }
1939             if(len>maxLength) {
1940                 maxLength=len;
1941             }
1942 
1943             for (i=0; i<len; ++i) {
1944                 if (!map[(uint8_t) buf[i]]) {
1945                     uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1946                     map[(uint8_t) buf[i]] = true;
1947                 }
1948             }
1949 
1950             /* test for leading/trailing whitespace */
1951             if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1952                 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1953             }
1954         }
1955 
1956         if(map[(uint8_t)'\t']) {
1957             log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1958         }
1959 
1960         length=uprv_getMaxCharNameLength();
1961         if(length!=maxLength) {
1962             log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1963                     length, maxLength);
1964         }
1965 
1966         /* compare the sets.  Where is my uset_equals?!! */
1967         ok=true;
1968         for(i=0; i<256; ++i) {
1969             if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1970                 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1971                     /* ignore lowercase a-z that are in set but not in dumb */
1972                     ok=true;
1973                 } else {
1974                     ok=false;
1975                     break;
1976                 }
1977             }
1978         }
1979 
1980         l1 = uset_toPattern(set, pat, BUFSIZE, true, &ec);
1981         l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, true, &ec);
1982         if (U_FAILURE(ec)) {
1983             log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1984             uset_close(set);
1985             uset_close(dumb);
1986             return;
1987         }
1988 
1989         if (l1 >= BUFSIZE) {
1990             l1 = BUFSIZE-1;
1991             pat[l1] = 0;
1992         }
1993         if (l2 >= BUFSIZE) {
1994             l2 = BUFSIZE-1;
1995             dumbPat[l2] = 0;
1996         }
1997 
1998         if (!ok) {
1999             log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
2000                     aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
2001         } else if(getTestOption(VERBOSITY_OPTION)) {
2002             log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
2003         }
2004 
2005         uset_close(set);
2006         uset_close(dumb);
2007     }
2008 
2009     /* ### TODO: test error cases and other interesting things */
2010 }
2011 
2012 static void
TestUCharFromNameUnderflownull2013 TestUCharFromNameUnderflow() {
2014     // Ticket #10889: Underflow crash when there is no dash.
2015     const char *name="<NO BREAK SPACE>";
2016     UErrorCode errorCode=U_ZERO_ERROR;
2017     UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2018     if(U_SUCCESS(errorCode)) {
2019         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2020                 name, c, u_errorName(errorCode));
2021     }
2022 
2023     // Test related edge cases.
2024     name="<-00a0>";
2025     errorCode=U_ZERO_ERROR;
2026     c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2027     if(U_SUCCESS(errorCode)) {
2028         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2029                 name, c, u_errorName(errorCode));
2030     }
2031 
2032     errorCode=U_ZERO_ERROR;
2033     name="<control->";
2034     c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2035     if(U_SUCCESS(errorCode)) {
2036         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2037                 name, c, u_errorName(errorCode));
2038     }
2039 
2040     errorCode=U_ZERO_ERROR;
2041     name="<control-111111>";
2042     c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2043     if(U_SUCCESS(errorCode)) {
2044         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2045                 name, c, u_errorName(errorCode));
2046     }
2047 
2048     // ICU-20292: integer overflow
2049     errorCode=U_ZERO_ERROR;
2050     name="<noncharacter-10010FFFF>";
2051     c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2052     if(U_SUCCESS(errorCode)) {
2053         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2054                 name, c, u_errorName(errorCode));
2055     }
2056 
2057     errorCode=U_ZERO_ERROR;
2058     name="<noncharacter-00010FFFF>";  // too many digits even if only leading 0s
2059     c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2060     if(U_SUCCESS(errorCode)) {
2061         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2062                 name, c, u_errorName(errorCode));
2063     }
2064 
2065     errorCode=U_ZERO_ERROR;
2066     name="<noncharacter-fFFf>>";
2067     c=u_charFromName(U_EXTENDED_CHAR_NAME, name, &errorCode);
2068     if(U_SUCCESS(errorCode)) {
2069         log_err("u_charFromName(%s) = U+%04x but should fail - %s\n",
2070                 name, c, u_errorName(errorCode));
2071     }
2072 }
2073 
2074 /* test u_isMirrored() and u_charMirror() ----------------------------------- */
2075 
2076 static void
TestMirroringnull2077 TestMirroring() {
2078     USet *set;
2079     UErrorCode errorCode;
2080 
2081     UChar32 start, end, c2, c3;
2082     int32_t i;
2083 
2084     U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
2085 
2086     U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
2087 
2088     log_verbose("Testing u_isMirrored()\n");
2089     if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
2090          !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
2091         )
2092     ) {
2093         log_err("u_isMirrored() does not work correctly\n");
2094     }
2095 
2096     log_verbose("Testing u_charMirror()\n");
2097     if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
2098          u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
2099          u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
2100          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2101          u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
2102          )
2103     ) {
2104         log_err("u_charMirror() does not work correctly\n");
2105     }
2106 
2107     /* verify that Bidi_Mirroring_Glyph roundtrips */
2108     errorCode=U_ZERO_ERROR;
2109     set=uset_openPattern(mirroredPattern, 17, &errorCode);
2110 
2111     if (U_FAILURE(errorCode)) {
2112         log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
2113     } else {
2114         for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
2115             do {
2116                 c2=u_charMirror(start);
2117                 c3=u_charMirror(c2);
2118                 if(c3!=start) {
2119                     log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
2120                 }
2121                 c3=u_getBidiPairedBracket(start);
2122                 if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) {
2123                     if(c3!=start) {
2124                         log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n",
2125                                 (long)start);
2126                     }
2127                 } else {
2128                     if(c3!=c2) {
2129                         log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n",
2130                                 (long)start, (long)c2);
2131                     }
2132                 }
2133             } while(++start<=end);
2134         }
2135     }
2136 
2137     uset_close(set);
2138 }
2139 
2140 
2141 struct RunTestData
2142 {
2143     const char *runText;
2144     UScriptCode runCode;
2145 };
2146 
2147 typedef struct RunTestData RunTestData;
2148 
2149 static void
CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns, const char *prefix)2150 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
2151                 const char *prefix)
2152 {
2153     int32_t run, runStart, runLimit;
2154     UScriptCode runCode;
2155 
2156     /* iterate over all the runs */
2157     run = 0;
2158     while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
2159         if (runStart != runStarts[run]) {
2160             log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
2161                 prefix, run, runStarts[run], runStart);
2162         }
2163 
2164         if (runLimit != runStarts[run + 1]) {
2165             log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
2166                 prefix, run, runStarts[run + 1], runLimit);
2167         }
2168 
2169         if (runCode != testData[run].runCode) {
2170             log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
2171                 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
2172         }
2173 
2174         run += 1;
2175 
2176         /* stop when we've seen all the runs we expect to see */
2177         if (run >= nRuns) {
2178             break;
2179         }
2180     }
2181 
2182     /* Complain if we didn't see then number of runs we expected */
2183     if (run != nRuns) {
2184         log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
2185     }
2186 }
2187 
2188 static void
TestUScriptRunAPInull2189 TestUScriptRunAPI()
2190 {
2191     static const RunTestData testData1[] = {
2192         {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
2193         {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
2194         {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
2195         {"English (", USCRIPT_LATIN},
2196         {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
2197         {") ", USCRIPT_LATIN},
2198         {"\\u6F22\\u5B75", USCRIPT_HAN},
2199         {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
2200         {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
2201         {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
2202     };
2203 
2204     static const RunTestData testData2[] = {
2205        {"((((((((((abc))))))))))", USCRIPT_LATIN}
2206     };
2207 
2208     static const struct {
2209       const RunTestData *testData;
2210       int32_t nRuns;
2211     } testDataEntries[] = {
2212         {testData1, UPRV_LENGTHOF(testData1)},
2213         {testData2, UPRV_LENGTHOF(testData2)}
2214     };
2215 
2216     static const int32_t nTestEntries = UPRV_LENGTHOF(testDataEntries);
2217     int32_t testEntry;
2218 
2219     for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
2220         UChar testString[1024];
2221         int32_t runStarts[256];
2222         int32_t nTestRuns = testDataEntries[testEntry].nRuns;
2223         const RunTestData *testData = testDataEntries[testEntry].testData;
2224 
2225         int32_t run, stringLimit;
2226         UScriptRun *scriptRun = NULL;
2227         UErrorCode err;
2228 
2229         /*
2230          * Fill in the test string and the runStarts array.
2231          */
2232         stringLimit = 0;
2233         for (run = 0; run < nTestRuns; run += 1) {
2234             runStarts[run] = stringLimit;
2235             stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
2236             /*stringLimit -= 1;*/
2237         }
2238 
2239         /* The limit of the last run */
2240         runStarts[nTestRuns] = stringLimit;
2241 
2242         /*
2243          * Make sure that calling uscript_OpenRun with a NULL text pointer
2244          * and a non-zero text length returns the correct error.
2245          */
2246         err = U_ZERO_ERROR;
2247         scriptRun = uscript_openRun(NULL, stringLimit, &err);
2248 
2249         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2250             log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2251         }
2252 
2253         if (scriptRun != NULL) {
2254             log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
2255             uscript_closeRun(scriptRun);
2256         }
2257 
2258         /*
2259          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2260          * and a zero text length returns the correct error.
2261          */
2262         err = U_ZERO_ERROR;
2263         scriptRun = uscript_openRun(testString, 0, &err);
2264 
2265         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2266             log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2267         }
2268 
2269         if (scriptRun != NULL) {
2270             log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
2271             uscript_closeRun(scriptRun);
2272         }
2273 
2274         /*
2275          * Make sure that calling uscript_openRun with a NULL text pointer
2276          * and a zero text length doesn't return an error.
2277          */
2278         err = U_ZERO_ERROR;
2279         scriptRun = uscript_openRun(NULL, 0, &err);
2280 
2281         if (U_FAILURE(err)) {
2282             log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2283         }
2284 
2285         /* Make sure that the empty iterator doesn't find any runs */
2286         if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2287             log_err("uscript_nextRun(...) returned true for an empty iterator.\n");
2288         }
2289 
2290         /*
2291          * Make sure that calling uscript_setRunText with a NULL text pointer
2292          * and a non-zero text length returns the correct error.
2293          */
2294         err = U_ZERO_ERROR;
2295         uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2296 
2297         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2298             log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2299         }
2300 
2301         /*
2302          * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2303          * and a zero text length returns the correct error.
2304          */
2305         err = U_ZERO_ERROR;
2306         uscript_setRunText(scriptRun, testString, 0, &err);
2307 
2308         if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2309             log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2310         }
2311 
2312         /*
2313          * Now call uscript_setRunText on the empty iterator
2314          * and make sure that it works.
2315          */
2316         err = U_ZERO_ERROR;
2317         uscript_setRunText(scriptRun, testString, stringLimit, &err);
2318 
2319         if (U_FAILURE(err)) {
2320             log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2321         } else {
2322             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2323         }
2324 
2325         uscript_closeRun(scriptRun);
2326 
2327         /*
2328          * Now open an iterator over the testString
2329          * using uscript_openRun and make sure that it works
2330          */
2331         scriptRun = uscript_openRun(testString, stringLimit, &err);
2332 
2333         if (U_FAILURE(err)) {
2334             log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2335         } else {
2336             CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2337         }
2338 
2339         /* Now reset the iterator, and make sure
2340          * that it still works.
2341          */
2342         uscript_resetRun(scriptRun);
2343 
2344         CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2345 
2346         /* Close the iterator */
2347         uscript_closeRun(scriptRun);
2348     }
2349 }
2350 
2351 /* test additional, non-core properties */
2352 static void
TestAdditionalPropertiesnull2353 TestAdditionalProperties() {
2354     /* test data for u_charAge() */
2355     static const struct {
2356         UChar32 c;
2357         UVersionInfo version;
2358     } charAges[]={
2359         {0x41,    { 1, 1, 0, 0 }},
2360         {0xffff,  { 1, 1, 0, 0 }},
2361         {0x20ab,  { 2, 0, 0, 0 }},
2362         {0x2fffe, { 2, 0, 0, 0 }},
2363         {0x20ac,  { 2, 1, 0, 0 }},
2364         {0xfb1d,  { 3, 0, 0, 0 }},
2365         {0x3f4,   { 3, 1, 0, 0 }},
2366         {0x10300, { 3, 1, 0, 0 }},
2367         {0x220,   { 3, 2, 0, 0 }},
2368         {0xff60,  { 3, 2, 0, 0 }}
2369     };
2370 
2371     /* test data for u_hasBinaryProperty() */
2372     static const int32_t
2373     props[][3]={ /* code point, property, value */
2374         { 0x0627, UCHAR_ALPHABETIC, true },
2375         { 0x1034a, UCHAR_ALPHABETIC, true },
2376         { 0x2028, UCHAR_ALPHABETIC, false },
2377 
2378         { 0x0066, UCHAR_ASCII_HEX_DIGIT, true },
2379         { 0x0067, UCHAR_ASCII_HEX_DIGIT, false },
2380 
2381         { 0x202c, UCHAR_BIDI_CONTROL, true },
2382         { 0x202f, UCHAR_BIDI_CONTROL, false },
2383 
2384         { 0x003c, UCHAR_BIDI_MIRRORED, true },
2385         { 0x003d, UCHAR_BIDI_MIRRORED, false },
2386 
2387         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2388         { 0x2018, UCHAR_BIDI_MIRRORED, false },
2389         { 0x201d, UCHAR_BIDI_MIRRORED, false },
2390         { 0x201f, UCHAR_BIDI_MIRRORED, false },
2391         { 0x301e, UCHAR_BIDI_MIRRORED, false },
2392 
2393         { 0x058a, UCHAR_DASH, true },
2394         { 0x007e, UCHAR_DASH, false },
2395 
2396         { 0x0c4d, UCHAR_DIACRITIC, true },
2397         { 0x3000, UCHAR_DIACRITIC, false },
2398 
2399         { 0x0e46, UCHAR_EXTENDER, true },
2400         { 0x0020, UCHAR_EXTENDER, false },
2401 
2402 #if !UCONFIG_NO_NORMALIZATION
2403         { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, true },
2404         { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, true },
2405         { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, false },
2406 
2407         { 0x110a, UCHAR_NFD_INERT, true },      /* Jamo L */
2408         { 0x0308, UCHAR_NFD_INERT, false },
2409 
2410         { 0x1164, UCHAR_NFKD_INERT, true },     /* Jamo V */
2411         { 0x1d79d, UCHAR_NFKD_INERT, false },   /* math compat version of xi */
2412 
2413         { 0x0021, UCHAR_NFC_INERT, true },      /* ! */
2414         { 0x0061, UCHAR_NFC_INERT, false },     /* a */
2415         { 0x00e4, UCHAR_NFC_INERT, false },     /* a-umlaut */
2416         { 0x0102, UCHAR_NFC_INERT, false },     /* a-breve */
2417         { 0xac1c, UCHAR_NFC_INERT, false },     /* Hangul LV */
2418         { 0xac1d, UCHAR_NFC_INERT, true },      /* Hangul LVT */
2419 
2420         { 0x1d79d, UCHAR_NFKC_INERT, false },   /* math compat version of xi */
2421         { 0x2a6d6, UCHAR_NFKC_INERT, true },    /* Han, last of CJK ext. B */
2422 
2423         { 0x00e4, UCHAR_SEGMENT_STARTER, true },
2424         { 0x0308, UCHAR_SEGMENT_STARTER, false },
2425         { 0x110a, UCHAR_SEGMENT_STARTER, true }, /* Jamo L */
2426         { 0x1164, UCHAR_SEGMENT_STARTER, false },/* Jamo V */
2427         { 0xac1c, UCHAR_SEGMENT_STARTER, true }, /* Hangul LV */
2428         { 0xac1d, UCHAR_SEGMENT_STARTER, true }, /* Hangul LVT */
2429 #endif
2430 
2431         { 0x0044, UCHAR_HEX_DIGIT, true },
2432         { 0xff46, UCHAR_HEX_DIGIT, true },
2433         { 0x0047, UCHAR_HEX_DIGIT, false },
2434 
2435         { 0x30fb, UCHAR_HYPHEN, true },
2436         { 0xfe58, UCHAR_HYPHEN, false },
2437 
2438         { 0x2172, UCHAR_ID_CONTINUE, true },
2439         { 0x0307, UCHAR_ID_CONTINUE, true },
2440         { 0x005c, UCHAR_ID_CONTINUE, false },
2441 
2442         { 0x2172, UCHAR_ID_START, true },
2443         { 0x007a, UCHAR_ID_START, true },
2444         { 0x0039, UCHAR_ID_START, false },
2445 
2446         { 0x4db5, UCHAR_IDEOGRAPHIC, true },
2447         { 0x2f999, UCHAR_IDEOGRAPHIC, true },
2448         { 0x2f99, UCHAR_IDEOGRAPHIC, false },
2449 
2450         { 0x200c, UCHAR_JOIN_CONTROL, true },
2451         { 0x2029, UCHAR_JOIN_CONTROL, false },
2452 
2453         { 0x1d7bc, UCHAR_LOWERCASE, true },
2454         { 0x0345, UCHAR_LOWERCASE, true },
2455         { 0x0030, UCHAR_LOWERCASE, false },
2456 
2457         { 0x1d7a9, UCHAR_MATH, true },
2458         { 0x2135, UCHAR_MATH, true },
2459         { 0x0062, UCHAR_MATH, false },
2460 
2461         { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, true },
2462         { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, true },
2463         { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, false },
2464 
2465         { 0x0022, UCHAR_QUOTATION_MARK, true },
2466         { 0xff62, UCHAR_QUOTATION_MARK, true },
2467         { 0xd840, UCHAR_QUOTATION_MARK, false },
2468 
2469         { 0x061f, UCHAR_TERMINAL_PUNCTUATION, true },
2470         { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, false },
2471 
2472         { 0x1d44a, UCHAR_UPPERCASE, true },
2473         { 0x2162, UCHAR_UPPERCASE, true },
2474         { 0x0345, UCHAR_UPPERCASE, false },
2475 
2476         { 0x0020, UCHAR_WHITE_SPACE, true },
2477         { 0x202f, UCHAR_WHITE_SPACE, true },
2478         { 0x3001, UCHAR_WHITE_SPACE, false },
2479 
2480         { 0x0711, UCHAR_XID_CONTINUE, true },
2481         { 0x1d1aa, UCHAR_XID_CONTINUE, true },
2482         { 0x007c, UCHAR_XID_CONTINUE, false },
2483 
2484         { 0x16ee, UCHAR_XID_START, true },
2485         { 0x23456, UCHAR_XID_START, true },
2486         { 0x1d1aa, UCHAR_XID_START, false },
2487 
2488         /*
2489          * Version break:
2490          * The following properties are only supported starting with the
2491          * Unicode version indicated in the second field.
2492          */
2493         { -1, 0x320, 0 },
2494 
2495         { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, true },
2496         { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, true },
2497         { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, false },
2498 
2499         { 0x0149, UCHAR_DEPRECATED, true },         /* changed in Unicode 5.2 */
2500         { 0x0341, UCHAR_DEPRECATED, false },        /* changed in Unicode 5.2 */
2501         { 0xe0001, UCHAR_DEPRECATED, true },        /* changed from Unicode 5 to 5.1 */
2502         { 0xe0100, UCHAR_DEPRECATED, false },
2503 
2504         { 0x00a0, UCHAR_GRAPHEME_BASE, true },
2505         { 0x0a4d, UCHAR_GRAPHEME_BASE, false },
2506         { 0xff9d, UCHAR_GRAPHEME_BASE, true },
2507         { 0xff9f, UCHAR_GRAPHEME_BASE, false },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2508 
2509         { 0x0300, UCHAR_GRAPHEME_EXTEND, true },
2510         { 0xff9d, UCHAR_GRAPHEME_EXTEND, false },
2511         { 0xff9f, UCHAR_GRAPHEME_EXTEND, true },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2512         { 0x0603, UCHAR_GRAPHEME_EXTEND, false },
2513 
2514         { 0x0a4d, UCHAR_GRAPHEME_LINK, true },
2515         { 0xff9f, UCHAR_GRAPHEME_LINK, false },
2516 
2517         { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, true },
2518         { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, false },
2519 
2520         { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, true },
2521         { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, false },
2522 
2523         { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, true },
2524         { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, false },
2525 
2526         { 0x2e9b, UCHAR_RADICAL, true },
2527         { 0x4e00, UCHAR_RADICAL, false },
2528 
2529         { 0x012f, UCHAR_SOFT_DOTTED, true },
2530         { 0x0049, UCHAR_SOFT_DOTTED, false },
2531 
2532         { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, true },
2533         { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, false },
2534 
2535         { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
2536 
2537         { 0x002e, UCHAR_S_TERM, true },
2538         { 0x0061, UCHAR_S_TERM, false },
2539 
2540         { 0x180c, UCHAR_VARIATION_SELECTOR, true },
2541         { 0xfe03, UCHAR_VARIATION_SELECTOR, true },
2542         { 0xe01ef, UCHAR_VARIATION_SELECTOR, true },
2543         { 0xe0200, UCHAR_VARIATION_SELECTOR, false },
2544 
2545         /* enum/integer type properties */
2546 
2547         /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2548         /* test default Bidi classes for unassigned code points */
2549         { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2550         { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2551         { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2552         { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2553         { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
2554         { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2555         { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2556         { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2557         { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2558         { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2559 
2560         { 0x061d, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2561         { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2562         { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2563         { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2564         { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2565         { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2566         { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2567 
2568         { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2569         { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2570         { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2571         { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2572         { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2573         { 0x2FE0, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2574         { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2575         { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2576         { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2577         { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2578         { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2579 
2580         /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2581         { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2582 
2583         { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2584         { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2585         { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2586         { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2587         { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2588         { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2589         { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2590         { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2591         { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2592 
2593         { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2594         { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2595         { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2596         { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2597         { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2598         { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2599         { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2600         { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2601         { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2602         { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2603         { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2604         { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2605         { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2606         { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2607         { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2608         { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2609         { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2610 
2611         /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2612         { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2613         { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
2614 
2615         { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2616         { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2617         { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2618         { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2619         { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2620 
2621         { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2622         { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2623         { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2624         { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2625         { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2626         { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2627         { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2628         { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2629 
2630         /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2631         { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2632         { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2633         { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2634         { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2635         { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2636         { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2637         { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2638         { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2639         { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2640         { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2641         { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2642         { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2643         { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2644         { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2645         { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2646 
2647         /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2648 
2649         /* UCHAR_SCRIPT tested in cucdapi.c TestUScriptCodeAPI() */
2650 
2651         { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2652         { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2653         { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2654         { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2655         { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2656         { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2657         { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2658 
2659         { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2660         { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2661         { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2662         { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2663 
2664         { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2665         { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2666         { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2667         { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2668         { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2669         { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2670 
2671         { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2672         { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2673         { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2674         { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2675 
2676         { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2677         { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2678         { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2679         { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2680         { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2681         { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2682         { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2683 
2684         { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2685         { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2686         { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2687         { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2688 
2689         { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2690         { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2691         { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2692         { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2693 
2694         { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2695         { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2696         { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2697         { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2698         { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2699 
2700         { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2701 
2702         { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2703 
2704         { 0x00d7, UCHAR_PATTERN_SYNTAX, true },
2705         { 0xfe45, UCHAR_PATTERN_SYNTAX, true },
2706         { 0x0061, UCHAR_PATTERN_SYNTAX, false },
2707 
2708         { 0x0020, UCHAR_PATTERN_WHITE_SPACE, true },
2709         { 0x0085, UCHAR_PATTERN_WHITE_SPACE, true },
2710         { 0x200f, UCHAR_PATTERN_WHITE_SPACE, true },
2711         { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, false },
2712         { 0x3000, UCHAR_PATTERN_WHITE_SPACE, false },
2713 
2714         { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2715         { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
2716         { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2717 
2718         { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
2719         { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
2720         { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2721         { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2722 
2723         { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2724         { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2725         { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2726         { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2727         { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2728         { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2729 
2730         { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2731         { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2732         { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2733         { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2734 
2735         { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2736         { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2737         { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2738         { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2739 
2740         { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2741         { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2742         { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2743         { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2744 
2745         { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2746 
2747         /* unassigned code points in new default Bidi R blocks */
2748         { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2749         { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2750 
2751         /* test some script codes >127 */
2752         { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
2753         { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
2754         { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2755 
2756         { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2757 
2758         /* value changed in Unicode 6.0 */
2759         { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2760 
2761         { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2762 
2763         /* unassigned code points in new/changed default Bidi AL blocks */
2764         { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2765         { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2766 
2767         { -1, 0x630, 0 }, /* version break for Unicode 6.3 */
2768 
2769         /* unassigned code points in the currency symbols block now default to ET */
2770         { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2771         { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR },
2772 
2773         /* new property in Unicode 6.3 */
2774         { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2775         { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2776         { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2777         { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE },
2778         { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN },
2779         { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE },
2780 
2781         { -1, 0x700, 0 }, /* version break for Unicode 7.0 */
2782 
2783         /* new character range with Joining_Group values */
2784         { 0x10ABF, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2785         { 0x10AC0, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_ALEPH },
2786         { 0x10AC1, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_BETH },
2787         { 0x10AEF, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_HUNDRED },
2788         { 0x10AF0, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2789 
2790         { -1, 0xa00, 0 },  // version break for Unicode 10
2791 
2792         { 0x1F1E5, UCHAR_REGIONAL_INDICATOR, false },
2793         { 0x1F1E7, UCHAR_REGIONAL_INDICATOR, true },
2794         { 0x1F1FF, UCHAR_REGIONAL_INDICATOR, true },
2795         { 0x1F200, UCHAR_REGIONAL_INDICATOR, false },
2796 
2797         { 0x0600, UCHAR_PREPENDED_CONCATENATION_MARK, true },
2798         { 0x0606, UCHAR_PREPENDED_CONCATENATION_MARK, false },
2799         { 0x110BD, UCHAR_PREPENDED_CONCATENATION_MARK, true },
2800 
2801         /* undefined UProperty values */
2802         { 0x61, 0x4a7, 0 },
2803         { 0x234bc, 0x15ed, 0 }
2804     };
2805 
2806     UVersionInfo version;
2807     UChar32 c;
2808     int32_t i, result, uVersion;
2809     UProperty which;
2810 
2811     /* what is our Unicode version? */
2812     u_getUnicodeVersion(version);
2813     uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2814 
2815     u_charAge(0x20, version);
2816     if(version[0]==0) {
2817         /* no additional properties available */
2818         log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2819         return;
2820     }
2821 
2822     /* test u_charAge() */
2823     for(i=0; i<UPRV_LENGTHOF(charAges); ++i) {
2824         u_charAge(charAges[i].c, version);
2825         if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2826             log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2827                 charAges[i].c,
2828                 version[0], version[1], version[2], version[3],
2829                 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2830         }
2831     }
2832 
2833     if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2834         u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2835         u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
2836         u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2837         u_getIntPropertyMinValue(0x2345)!=0
2838     ) {
2839         log_err("error: u_getIntPropertyMinValue() wrong\n");
2840     }
2841     if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2842         log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2843     }
2844     if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2845         log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2846     }
2847     if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
2848         log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2849     }
2850     if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2851         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2852     }
2853     if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2854         log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2855     }
2856     if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2857         log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2858     }
2859     if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2860         log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2861     }
2862     if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2863         log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2864     }
2865     if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2866         log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2867     }
2868     if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2869         log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2870     }
2871     if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2872         log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2873     }
2874     if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2875         log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2876     }
2877     if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2878         log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2879     }
2880     if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_COUNT-1) {
2881         log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n");
2882     }
2883     /*JB#2410*/
2884     if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2885         log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2886     }
2887     if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2888         log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2889     }
2890     if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
2891         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2892     }
2893     if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2894         log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2895     }
2896     if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2897         log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
2898     }
2899 
2900     /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2901     for(i=0; i<UPRV_LENGTHOF(props); ++i) {
2902         const char *whichName;
2903 
2904         if(props[i][0]<0) {
2905             /* Unicode version break */
2906             if(uVersion<props[i][1]) {
2907                 break; /* do not test properties that are not yet supported */
2908             } else {
2909                 continue; /* skip this row */
2910             }
2911         }
2912 
2913         c=(UChar32)props[i][0];
2914         which=(UProperty)props[i][1];
2915         whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
2916 
2917         if(which<UCHAR_INT_START) {
2918             result=u_hasBinaryProperty(c, which);
2919             if(result!=props[i][2]) {
2920                 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2921                         c, whichName, result, i);
2922             }
2923         }
2924 
2925         result=u_getIntPropertyValue(c, which);
2926         if(result!=props[i][2]) {
2927             log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2928                     c, whichName, result, props[i][2], i);
2929         }
2930 
2931         /* test separate functions, too */
2932         switch((UProperty)props[i][1]) {
2933         case UCHAR_ALPHABETIC:
2934             if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2935                 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2936                         props[i][0], result, i);
2937             }
2938             break;
2939         case UCHAR_LOWERCASE:
2940             if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2941                 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2942                         props[i][0], result, i);
2943             }
2944             break;
2945         case UCHAR_UPPERCASE:
2946             if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2947                 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2948                         props[i][0], result, i);
2949             }
2950             break;
2951         case UCHAR_WHITE_SPACE:
2952             if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2953                 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2954                         props[i][0], result, i);
2955             }
2956             break;
2957         default:
2958             break;
2959         }
2960     }
2961 
2962     // C API coverage
2963     if (u_stringHasBinaryProperty(u"⏱", 1, UCHAR_BASIC_EMOJI) ||
2964             u_stringHasBinaryProperty(u"⏱", -1, UCHAR_BASIC_EMOJI) ||
2965             !u_stringHasBinaryProperty(u"⏱\uFE0F", 2, UCHAR_BASIC_EMOJI) ||
2966             !u_stringHasBinaryProperty(u"⏱\uFE0F", -1, UCHAR_BASIC_EMOJI)) {
2967         log_data_err("error: u_stringHasBinaryProperty(stopwatch variants) is wrong\n");
2968     }
2969 }
2970 
2971 static void
TestNumericProperties(void)2972 TestNumericProperties(void) {
2973     /* see UnicodeData.txt, DerivedNumericValues.txt */
2974     static const struct {
2975         UChar32 c;
2976         int32_t type;
2977         double numValue;
2978     } values[]={
2979         { 0x0F33, U_NT_NUMERIC, -1./2. },
2980         { 0x0C66, U_NT_DECIMAL, 0 },
2981         { 0x96f6, U_NT_NUMERIC, 0 },
2982         { 0xa833, U_NT_NUMERIC, 1./16. },
2983         { 0x2152, U_NT_NUMERIC, 1./10. },
2984         { 0x2151, U_NT_NUMERIC, 1./9. },
2985         { 0x1245f, U_NT_NUMERIC, 1./8. },
2986         { 0x2150, U_NT_NUMERIC, 1./7. },
2987         { 0x2159, U_NT_NUMERIC, 1./6. },
2988         { 0x09f6, U_NT_NUMERIC, 3./16. },
2989         { 0x2155, U_NT_NUMERIC, 1./5. },
2990         { 0x00BD, U_NT_NUMERIC, 1./2. },
2991         { 0x0031, U_NT_DECIMAL, 1. },
2992         { 0x4e00, U_NT_NUMERIC, 1. },
2993         { 0x58f1, U_NT_NUMERIC, 1. },
2994         { 0x10320, U_NT_NUMERIC, 1. },
2995         { 0x0F2B, U_NT_NUMERIC, 3./2. },
2996         { 0x00B2, U_NT_DIGIT, 2. },
2997         { 0x5f10, U_NT_NUMERIC, 2. },
2998         { 0x1813, U_NT_DECIMAL, 3. },
2999         { 0x5f0e, U_NT_NUMERIC, 3. },
3000         { 0x2173, U_NT_NUMERIC, 4. },
3001         { 0x8086, U_NT_NUMERIC, 4. },
3002         { 0x278E, U_NT_DIGIT, 5. },
3003         { 0x1D7F2, U_NT_DECIMAL, 6. },
3004         { 0x247A, U_NT_DIGIT, 7. },
3005         { 0x7396, U_NT_NUMERIC, 9. },
3006         { 0x1372, U_NT_NUMERIC, 10. },
3007         { 0x216B, U_NT_NUMERIC, 12. },
3008         { 0x16EE, U_NT_NUMERIC, 17. },
3009         { 0x249A, U_NT_NUMERIC, 19. },
3010         { 0x303A, U_NT_NUMERIC, 30. },
3011         { 0x5345, U_NT_NUMERIC, 30. },
3012         { 0x32B2, U_NT_NUMERIC, 37. },
3013         { 0x1375, U_NT_NUMERIC, 40. },
3014         { 0x10323, U_NT_NUMERIC, 50. },
3015         { 0x0BF1, U_NT_NUMERIC, 100. },
3016         { 0x964c, U_NT_NUMERIC, 100. },
3017         { 0x217E, U_NT_NUMERIC, 500. },
3018         { 0x2180, U_NT_NUMERIC, 1000. },
3019         { 0x4edf, U_NT_NUMERIC, 1000. },
3020         { 0x2181, U_NT_NUMERIC, 5000. },
3021         { 0x137C, U_NT_NUMERIC, 10000. },
3022         { 0x4e07, U_NT_NUMERIC, 10000. },
3023         { 0x12432, U_NT_NUMERIC, 216000. },
3024         { 0x12433, U_NT_NUMERIC, 432000. },
3025         { 0x4ebf, U_NT_NUMERIC, 100000000. },
3026         { 0x5146, U_NT_NUMERIC, 1000000000000. },
3027         { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
3028         { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
3029         { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
3030         { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
3031         { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
3032         { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
3033         { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
3034         { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
3035     };
3036 
3037     double nv;
3038     UChar32 c;
3039     int32_t i, type;
3040 
3041     for(i=0; i<UPRV_LENGTHOF(values); ++i) {
3042         c=values[i].c;
3043         type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
3044         nv=u_getNumericValue(c);
3045 
3046         if(type!=values[i].type) {
3047             log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
3048         }
3049         if(0.000001 <= fabs(nv - values[i].numValue)) {
3050             log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
3051         }
3052     }
3053 }
3054 
3055 /**
3056  * Test the property names and property value names API.
3057  */
3058 static void
TestPropertyNames(void)3059 TestPropertyNames(void) {
3060     int32_t p, v, choice=0, rev;
3061     UBool atLeastSomething = false;
3062 
3063     for (p=0; ; ++p) {
3064         UProperty propEnum = (UProperty)p;
3065         UBool sawProp = false;
3066         if(p > 10 && !atLeastSomething) {
3067           log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
3068           return;
3069         }
3070 
3071         for (choice=0; ; ++choice) {
3072             const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
3073             if (name) {
3074                 if (!sawProp)
3075                     log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
3076                 log_verbose("%d=\"%s\"", choice, name);
3077                 sawProp = true;
3078                 atLeastSomething = true;
3079 
3080                 /* test reverse mapping */
3081                 rev = u_getPropertyEnum(name);
3082                 if (rev != p) {
3083                     log_err("Property round-trip failure: %d -> %s -> %d\n",
3084                             p, name, rev);
3085                 }
3086             }
3087             if (!name && choice>0) break;
3088         }
3089         if (sawProp) {
3090             /* looks like a valid property; check the values */
3091             const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
3092             int32_t max = 0;
3093             if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
3094                 max = 255;
3095             } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
3096                 /* it's far too slow to iterate all the way up to
3097                    the real max, U_GC_P_MASK */
3098                 max = U_GC_NL_MASK;
3099             } else if (p == UCHAR_BLOCK) {
3100                 /* UBlockCodes, unlike other values, start at 1 */
3101                 max = 1;
3102             }
3103             log_verbose("\n");
3104             for (v=-1; ; ++v) {
3105                 UBool sawValue = false;
3106                 for (choice=0; ; ++choice) {
3107                     const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
3108                     if (vname) {
3109                         if (!sawValue) log_verbose(" %s, value %d:", pname, v);
3110                         log_verbose("%d=\"%s\"", choice, vname);
3111                         sawValue = true;
3112 
3113                         /* test reverse mapping */
3114                         rev = u_getPropertyValueEnum(propEnum, vname);
3115                         if (rev != v) {
3116                             log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
3117                                     pname, v, vname, rev);
3118                         }
3119                     }
3120                     if (!vname && choice>0) break;
3121                 }
3122                 if (sawValue) {
3123                     log_verbose("\n");
3124                 }
3125                 if (!sawValue && v>=max) break;
3126             }
3127         }
3128         if (!sawProp) {
3129             if (p>=UCHAR_STRING_LIMIT) {
3130                 break;
3131             } else if (p>=UCHAR_DOUBLE_LIMIT) {
3132                 p = UCHAR_STRING_START - 1;
3133             } else if (p>=UCHAR_MASK_LIMIT) {
3134                 p = UCHAR_DOUBLE_START - 1;
3135             } else if (p>=UCHAR_INT_LIMIT) {
3136                 p = UCHAR_MASK_START - 1;
3137             } else if (p>=UCHAR_BINARY_LIMIT) {
3138                 p = UCHAR_INT_START - 1;
3139             }
3140         }
3141     }
3142 }
3143 
3144 /**
3145  * Test the property values API.  See JB#2410.
3146  */
3147 static void
TestPropertyValues(void)3148 TestPropertyValues(void) {
3149     int32_t i, p, min, max;
3150     UErrorCode ec;
3151 
3152     /* Min should be 0 for everything. */
3153     /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
3154     for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
3155         UProperty propEnum = (UProperty)p;
3156         min = u_getIntPropertyMinValue(propEnum);
3157         if (min != 0) {
3158             if (p == UCHAR_BLOCK) {
3159                 /* This is okay...for now.  See JB#2487.
3160                    TODO Update this for JB#2487. */
3161             } else {
3162                 const char* name;
3163                 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
3164                 if (name == NULL)
3165                     name = "<ERROR>";
3166                 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
3167                         name, min);
3168             }
3169         }
3170     }
3171 
3172     if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
3173         u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
3174         log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
3175     }
3176 
3177     /* Max should be -1 for invalid properties. */
3178     max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
3179     if (max != -1) {
3180         log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
3181                 max);
3182     }
3183 
3184     /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
3185     for (i=0; i<2; ++i) {
3186         int32_t script;
3187         const char* desc;
3188         ec = U_ZERO_ERROR;
3189         switch (i) {
3190         case 0:
3191             script = uscript_getScript(-1, &ec);
3192             desc = "uscript_getScript(-1)";
3193             break;
3194         case 1:
3195             script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
3196             desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
3197             break;
3198         default:
3199             log_err("Internal test error. Too many scripts\n");
3200             return;
3201         }
3202         /* We don't explicitly test ec.  It should be U_FAILURE but it
3203            isn't documented as such. */
3204         if (script != (int32_t)USCRIPT_INVALID_CODE) {
3205             log_err("FAIL: %s = %d, exp. 0\n",
3206                     desc, script);
3207         }
3208     }
3209 }
3210 
3211 /* various tests for consistency of UCD data and API behavior */
3212 static void
TestConsistencynull3213 TestConsistency() {
3214     char buffer[300];
3215     USet *set1, *set2, *set3, *set4;
3216     UErrorCode errorCode;
3217 
3218     UChar32 start, end;
3219     int32_t i, length;
3220 
3221     U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
3222     U_STRING_DECL(dashPattern, "[:Dash:]", 8);
3223     U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
3224     U_STRING_DECL(formatPattern, "[:Cf:]", 6);
3225     U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
3226 
3227     U_STRING_DECL(mathBlocksPattern,
3228         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3229         214);
3230     U_STRING_DECL(mathPattern, "[:Math:]", 8);
3231     U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
3232     U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
3233     U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3234 
3235     U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
3236     U_STRING_INIT(dashPattern, "[:Dash:]", 8);
3237     U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
3238     U_STRING_INIT(formatPattern, "[:Cf:]", 6);
3239     U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
3240 
3241     U_STRING_INIT(mathBlocksPattern,
3242         "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3243         214);
3244     U_STRING_INIT(mathPattern, "[:Math:]", 8);
3245     U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
3246     U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
3247     U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3248 
3249     /*
3250      * It used to be that UCD.html and its precursors said
3251      * "Those dashes used to mark connections between pieces of words,
3252      *  plus the Katakana middle dot."
3253      *
3254      * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
3255      * but not from Hyphen.
3256      * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
3257      * Therefore, do not show errors when testing the Hyphen property.
3258      */
3259     log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
3260                 "known to the UTC and not considered errors.\n");
3261 
3262     errorCode=U_ZERO_ERROR;
3263     set1=uset_openPattern(hyphenPattern, 10, &errorCode);
3264     set2=uset_openPattern(dashPattern, 8, &errorCode);
3265     if(U_SUCCESS(errorCode)) {
3266         /* remove the Katakana middle dot(s) from set1 */
3267         uset_remove(set1, 0x30fb);
3268         uset_remove(set1, 0xff65); /* halfwidth variant */
3269         showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", false);
3270     } else {
3271         log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3272     }
3273 
3274     /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
3275     set3=uset_openPattern(formatPattern, 6, &errorCode);
3276     set4=uset_openPattern(alphaPattern, 14, &errorCode);
3277     if(U_SUCCESS(errorCode)) {
3278         showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", false);
3279         showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", true);
3280         showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", true);
3281     } else {
3282         log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3283     }
3284 
3285     uset_close(set1);
3286     uset_close(set2);
3287     uset_close(set3);
3288     uset_close(set4);
3289 
3290     /*
3291      * Check that each lowercase character has "small" in its name
3292      * and not "capital".
3293      * There are some such characters, some of which seem odd.
3294      * Use the verbose flag to see these notices.
3295      */
3296     errorCode=U_ZERO_ERROR;
3297     set1=uset_openPattern(lowerPattern, 13, &errorCode);
3298     if(U_SUCCESS(errorCode)) {
3299         for(i=0;; ++i) {
3300             length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
3301             if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
3302                 break; /* done */
3303             }
3304             if(U_FAILURE(errorCode)) {
3305                 log_err("error iterating over [:Lowercase:] at item %d: %s\n",
3306                         i, u_errorName(errorCode));
3307                 break;
3308             }
3309             if(length!=0) {
3310                 break; /* done with code points, got a string or -1 */
3311             }
3312 
3313             while(start<=end) {
3314                 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
3315                 if(U_FAILURE(errorCode)) {
3316                     log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
3317                     errorCode=U_ZERO_ERROR;
3318                 }
3319                 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
3320                     strstr(buffer, "SMALL CAPITAL")==NULL
3321                 ) {
3322                     log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
3323                 }
3324                 ++start;
3325             }
3326         }
3327     } else {
3328         log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3329     }
3330     uset_close(set1);
3331 
3332     /* verify that all assigned characters in Math blocks are exactly Math characters */
3333     errorCode=U_ZERO_ERROR;
3334     set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3335     set2=uset_openPattern(mathPattern, 8, &errorCode);
3336     set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3337     if(U_SUCCESS(errorCode)) {
3338         uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3339         uset_complement(set3);      /* assigned characters */
3340         uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3341         compareUSets(set1, set2,
3342                      "[assigned Math block chars]", "[math blocks]&[:Math:]",
3343                      true);
3344     } else {
3345         log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3346     }
3347     uset_close(set1);
3348     uset_close(set2);
3349     uset_close(set3);
3350 
3351     /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3352     errorCode=U_ZERO_ERROR;
3353     set1=uset_openPattern(unknownPattern, 14, &errorCode);
3354     set2=uset_openPattern(reservedPattern, 20, &errorCode);
3355     if(U_SUCCESS(errorCode)) {
3356         compareUSets(set1, set2,
3357                      "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3358                      true);
3359     } else {
3360         log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
3361     }
3362     uset_close(set1);
3363     uset_close(set2);
3364 }
3365 
3366 /* test case folding, compare return values with CaseFolding.txt ------------ */
3367 
3368 /* bit set for which case foldings for a character have been tested already */
3369 enum {
3370     CF_SIMPLE=1,
3371     CF_FULL=2,
3372     CF_TURKIC=4,
3373     CF_ALL=7
3374 };
3375 
3376 static void
testFold(UChar32 c, int which, UChar32 simple, UChar32 turkic, const UChar *full, int32_t fullLength, const UChar *turkicFull, int32_t turkicFullLength)3377 testFold(UChar32 c, int which,
3378          UChar32 simple, UChar32 turkic,
3379          const UChar *full, int32_t fullLength,
3380          const UChar *turkicFull, int32_t turkicFullLength) {
3381     UChar s[2], t[32];
3382     UChar32 c2;
3383     int32_t length, length2;
3384 
3385     UErrorCode errorCode=U_ZERO_ERROR;
3386 
3387     length=0;
3388     U16_APPEND_UNSAFE(s, length, c);
3389 
3390     if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3391         log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3392     }
3393     if((which&CF_FULL)!=0) {
3394         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, 0, &errorCode);
3395         if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3396             log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3397         }
3398     }
3399     if((which&CF_TURKIC)!=0) {
3400         if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3401             log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3402         }
3403 
3404         length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3405         if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3406             log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3407         }
3408     }
3409 }
3410 
3411 /* test that c case-folds to itself */
3412 static void
testFoldToSelf(UChar32 c, int which)3413 testFoldToSelf(UChar32 c, int which) {
3414     UChar s[2];
3415     int32_t length;
3416 
3417     length=0;
3418     U16_APPEND_UNSAFE(s, length, c);
3419     testFold(c, which, c, c, s, length, s, length);
3420 }
3421 
3422 struct CaseFoldingData {
3423     USet *notSeen;
3424     UChar32 prev, prevSimple;
3425     UChar prevFull[32];
3426     int32_t prevFullLength;
3427     int which;
3428 };
3429 typedef struct CaseFoldingData CaseFoldingData;
3430 
3431 static void U_CALLCONV
caseFoldingLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode)3432 caseFoldingLineFn(void *context,
3433                   char *fields[][2], int32_t fieldCount,
3434                   UErrorCode *pErrorCode) {
3435     (void)fieldCount; // suppress compiler warnings about unused variable
3436 
3437     CaseFoldingData *pData=(CaseFoldingData *)context;
3438     char *end;
3439     UChar full[32];
3440     UChar32 c, prev, simple;
3441     int32_t count;
3442     int which;
3443     char status;
3444 
3445     /* get code point */
3446     const char *s=u_skipWhitespace(fields[0][0]);
3447     if(0==strncmp(s, "0000..10FFFF", 12)) {
3448         /*
3449          * Ignore the line
3450          * # @missing: 0000..10FFFF; C; <code point>
3451          * because maps-to-self is already our default, and this line breaks this parser.
3452          */
3453         return;
3454     }
3455     c=(UChar32)strtoul(s, &end, 16);
3456     end=(char *)u_skipWhitespace(end);
3457     if(end<=fields[0][0] || end!=fields[0][1]) {
3458         log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3459         *pErrorCode=U_PARSE_ERROR;
3460         return;
3461     }
3462 
3463     /* get the status of this mapping */
3464     status=*u_skipWhitespace(fields[1][0]);
3465     if(status!='C' && status!='S' && status!='F' && status!='T') {
3466         log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3467         *pErrorCode=U_PARSE_ERROR;
3468         return;
3469     }
3470 
3471     /* get the mapping */
3472     count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3473     if(U_FAILURE(*pErrorCode)) {
3474         log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3475         return;
3476     }
3477 
3478     /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3479     if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3480         simple=c;
3481     }
3482 
3483     if(c!=(prev=pData->prev)) {
3484         /*
3485          * Test remaining mappings for the previous code point.
3486          * If a turkic folding was not mentioned, then it should fold the same
3487          * as the regular simple case folding.
3488          */
3489         UChar prevString[2];
3490         int32_t length;
3491 
3492         length=0;
3493         U16_APPEND_UNSAFE(prevString, length, prev);
3494         testFold(prev, (~pData->which)&CF_ALL,
3495                  prev, pData->prevSimple,
3496                  prevString, length,
3497                  pData->prevFull, pData->prevFullLength);
3498         pData->prev=pData->prevSimple=c;
3499         length=0;
3500         U16_APPEND_UNSAFE(pData->prevFull, length, c);
3501         pData->prevFullLength=length;
3502         pData->which=0;
3503     }
3504 
3505     /*
3506      * Turn the status into a bit set of case foldings to test.
3507      * Remember non-Turkic case foldings as defaults for Turkic mode.
3508      */
3509     switch(status) {
3510     case 'C':
3511         which=CF_SIMPLE|CF_FULL;
3512         pData->prevSimple=simple;
3513         u_memcpy(pData->prevFull, full, count);
3514         pData->prevFullLength=count;
3515         break;
3516     case 'S':
3517         which=CF_SIMPLE;
3518         pData->prevSimple=simple;
3519         break;
3520     case 'F':
3521         which=CF_FULL;
3522         u_memcpy(pData->prevFull, full, count);
3523         pData->prevFullLength=count;
3524         break;
3525     case 'T':
3526         which=CF_TURKIC;
3527         break;
3528     default:
3529         which=0;
3530         break; /* won't happen because of test above */
3531     }
3532 
3533     testFold(c, which, simple, simple, full, count, full, count);
3534 
3535     /* remember which case foldings of c have been tested */
3536     pData->which|=which;
3537 
3538     /* remove c from the set of ones not mentioned in CaseFolding.txt */
3539     uset_remove(pData->notSeen, c);
3540 }
3541 
3542 static void
TestCaseFoldingnull3543 TestCaseFolding() {
3544     CaseFoldingData data={ NULL, 0, 0, {0}, 0, 0 };
3545     char *fields[3][2];
3546     UErrorCode errorCode;
3547 
3548     static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3549 
3550     errorCode=U_ZERO_ERROR;
3551     /* test BMP & plane 1 - nothing interesting above */
3552     data.notSeen=uset_open(0, 0x1ffff);
3553     data.prevFullLength=1; /* length of full case folding of U+0000 */
3554 
3555     parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3556     if(U_SUCCESS(errorCode)) {
3557         int32_t i, start, end;
3558 
3559         /* add a pseudo-last line to finish testing of the actual last one */
3560         fields[0][0]=lastLine;
3561         fields[0][1]=lastLine+6;
3562         fields[1][0]=lastLine+7;
3563         fields[1][1]=lastLine+9;
3564         fields[2][0]=lastLine+10;
3565         fields[2][1]=lastLine+17;
3566         caseFoldingLineFn(&data, fields, 3, &errorCode);
3567 
3568         /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3569         for(i=0;
3570             0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3571                 U_SUCCESS(errorCode);
3572             ++i
3573         ) {
3574             do {
3575                 testFoldToSelf(start, CF_ALL);
3576             } while(++start<=end);
3577         }
3578     }
3579 
3580     uset_close(data.notSeen);
3581 }
3582 
TestBinaryCharacterPropertiesAPInull3583 static void TestBinaryCharacterPropertiesAPI() {
3584     // API test only. See intltest/ucdtest.cpp for functional test.
3585     UErrorCode errorCode = U_ZERO_ERROR;
3586     const USet *set = u_getBinaryPropertySet(-1, &errorCode);
3587     if (U_SUCCESS(errorCode)) {
3588         log_err("u_getBinaryPropertySet(-1) did not fail\n");
3589     }
3590     errorCode = U_ZERO_ERROR;
3591     set = u_getBinaryPropertySet(UCHAR_BINARY_LIMIT, &errorCode);
3592     if (U_SUCCESS(errorCode)) {
3593         log_err("u_getBinaryPropertySet(UCHAR_BINARY_LIMIT) did not fail\n");
3594     }
3595     errorCode = U_ZERO_ERROR;
3596     set = u_getBinaryPropertySet(UCHAR_WHITE_SPACE, &errorCode);
3597     if (!uset_contains(set, 0x20) || uset_contains(set, 0x61)) {
3598         log_err("u_getBinaryPropertySet(UCHAR_WHITE_SPACE) wrong contents\n");
3599     }
3600 }
3601 
TestIntCharacterPropertiesAPInull3602 static void TestIntCharacterPropertiesAPI() {
3603     // API test only. See intltest/ucdtest.cpp for functional test.
3604     UErrorCode errorCode = U_ZERO_ERROR;
3605     const UCPMap *map = u_getIntPropertyMap(UCHAR_INT_START - 1, &errorCode);
3606     if (U_SUCCESS(errorCode)) {
3607         log_err("u_getIntPropertyMap(UCHAR_INT_START - 1) did not fail\n");
3608     }
3609     errorCode = U_ZERO_ERROR;
3610     map = u_getIntPropertyMap(UCHAR_INT_LIMIT, &errorCode);
3611     if (U_SUCCESS(errorCode)) {
3612         log_err("u_getIntPropertyMap(UCHAR_INT_LIMIT) did not fail\n");
3613     }
3614     errorCode = U_ZERO_ERROR;
3615     map = u_getIntPropertyMap(UCHAR_GENERAL_CATEGORY, &errorCode);
3616     if (ucpmap_get(map, 0x20) != U_SPACE_SEPARATOR || ucpmap_get(map, 0x23456) != U_OTHER_LETTER) {
3617         log_err("u_getIntPropertyMap(UCHAR_GENERAL_CATEGORY) wrong contents\n");
3618     }
3619 }
3620