1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING
7
8#include "unicode/dcfmtsym.h"
9
10#include "cstr.h"
11#include "numbertest.h"
12#include "number_utils.h"
13#include "number_skeletons.h"
14#include "putilimp.h"
15
16using namespace icu::number::impl;
17
18
19void NumberSkeletonTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
20    if (exec) {
21        logln("TestSuite AffixUtilsTest: ");
22    }
23    TESTCASE_AUTO_BEGIN;
24        TESTCASE_AUTO(validTokens);
25        TESTCASE_AUTO(invalidTokens);
26        TESTCASE_AUTO(unknownTokens);
27        TESTCASE_AUTO(unexpectedTokens);
28        TESTCASE_AUTO(duplicateValues);
29        TESTCASE_AUTO(stemsRequiringOption);
30        TESTCASE_AUTO(defaultTokens);
31        TESTCASE_AUTO(flexibleSeparators);
32        TESTCASE_AUTO(wildcardCharacters);
33        TESTCASE_AUTO(perUnitInArabic);
34        TESTCASE_AUTO(perUnitToSkeleton);
35    TESTCASE_AUTO_END;
36}
37
38void NumberSkeletonTest::validTokens() {
39    IcuTestErrorCode status(*this, "validTokens");
40
41    // This tests only if the tokens are valid, not their behavior.
42    // Most of these are from the design doc.
43    static const char16_t* cases[] = {
44            u"precision-integer",
45            u"precision-unlimited",
46            u"@@@##",
47            u"@@*",
48            u"@@+",
49            u"@@+/w",
50            u".000##",
51            u".00*",
52            u".00+",
53            u".",
54            u"./w",
55            u".*",
56            u".+",
57            u".+/w",
58            u".######",
59            u".00/@@*",
60            u".00/@@+",
61            u".00/@##",
62            u".00/@##/w",
63            u".00/@",
64            u".00/@r",
65            u".00/@@s",
66            u".00/@@#r",
67            u"precision-increment/3.14",
68            u"precision-increment/3.14/w",
69            u"precision-currency-standard",
70            u"precision-currency-standard/w",
71            u"precision-integer rounding-mode-half-up",
72            u".00# rounding-mode-ceiling",
73            u".00/@@* rounding-mode-floor",
74            u".00/@@+ rounding-mode-floor",
75            u"scientific",
76            u"scientific/*ee",
77            u"scientific/+ee",
78            u"scientific/sign-always",
79            u"scientific/*ee/sign-always",
80            u"scientific/+ee/sign-always",
81            u"scientific/sign-always/*ee",
82            u"scientific/sign-always/+ee",
83            u"scientific/sign-except-zero",
84            u"engineering",
85            u"engineering/*eee",
86            u"engineering/+eee",
87            u"compact-short",
88            u"compact-long",
89            u"notation-simple",
90            u"percent",
91            u"permille",
92            u"measure-unit/length-meter",
93            u"measure-unit/area-square-meter",
94            u"measure-unit/energy-joule per-measure-unit/length-meter",
95            u"unit/square-meter-per-square-meter",
96            u"currency/XXX",
97            u"currency/ZZZ",
98            u"currency/usd",
99            u"group-off",
100            u"group-min2",
101            u"group-auto",
102            u"group-on-aligned",
103            u"group-thousands",
104            u"integer-width/00",
105            u"integer-width/#0",
106            u"integer-width/*00",
107            u"integer-width/+00",
108            u"sign-always",
109            u"sign-auto",
110            u"sign-never",
111            u"sign-accounting",
112            u"sign-accounting-always",
113            u"sign-except-zero",
114            u"sign-accounting-except-zero",
115            u"unit-width-narrow",
116            u"unit-width-short",
117            u"unit-width-iso-code",
118            u"unit-width-full-name",
119            u"unit-width-hidden",
120            u"decimal-auto",
121            u"decimal-always",
122            u"scale/5.2",
123            u"scale/-5.2",
124            u"scale/100",
125            u"scale/1E2",
126            u"scale/1",
127            u"latin",
128            u"numbering-system/arab",
129            u"numbering-system/latn",
130            u"precision-integer/@##",
131            u"precision-integer rounding-mode-ceiling",
132            u"precision-currency-cash rounding-mode-ceiling",
133            u"0",
134            u"00",
135            u"000",
136            u"E0",
137            u"E00",
138            u"E000",
139            u"EE0",
140            u"EE00",
141            u"EE+?0",
142            u"EE+?00",
143            u"EE+!0",
144            u"EE+!00",
145    };
146
147    for (auto& cas : cases) {
148        UnicodeString skeletonString(cas);
149        status.setScope(skeletonString);
150        UParseError perror;
151        NumberFormatter::forSkeleton(skeletonString, perror, status);
152        assertSuccess(CStr(skeletonString)(), status, true);
153        assertEquals(skeletonString, -1, perror.offset);
154        status.errIfFailureAndReset();
155    }
156}
157
158void NumberSkeletonTest::invalidTokens() {
159    static const char16_t* cases[] = {
160            u".00x",
161            u".00i",
162            u".00/x",
163            u".00/ww",
164            u".00##0",
165            u".##*",
166            u".00##*",
167            u".0#*",
168            u"@#*",
169            u".##+",
170            u".00##+",
171            u".0#+",
172            u"@#+",
173            u"@@x",
174            u"@@##0",
175            u".00/@@",
176            u".00/@@x",
177            u".00/@@#",
178            u".00/@@#*",
179            u".00/floor/@@*", // wrong order
180            u".00/@@#+",
181            u".00/@@@+r",
182            u".00/floor/@@+", // wrong order
183            u"precision-increment/français", // non-invariant characters for C++
184            u"scientific/ee",
185            u"precision-increment/xxx",
186            u"precision-increment/NaN",
187            u"precision-increment/Infinity",
188            u"precision-increment/0.1.2",
189            u"scale/xxx",
190            u"scale/NaN",
191            u"scale/Infinity",
192            u"scale/0.1.2",
193            u"scale/français", // non-invariant characters for C++
194            u"currency/dummy",
195            u"currency/ççç", // three characters but not ASCII
196            u"measure-unit/foo",
197            u"integer-width/xxx",
198            u"integer-width/0*",
199            u"integer-width/*0#",
200            u"integer-width/*#",
201            u"integer-width/*#0",
202            u"integer-width/0+",
203            u"integer-width/+0#",
204            u"integer-width/+#",
205            u"integer-width/+#0",
206            u"scientific/foo",
207            u"E",
208            u"E1",
209            u"E+",
210            u"E+?",
211            u"E+!",
212            u"E+0",
213            u"EE",
214            u"EE+",
215            u"EEE",
216            u"EEE0",
217            u"001",
218            u"00*",
219            u"00+",
220    };
221
222    expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));
223}
224
225void NumberSkeletonTest::unknownTokens() {
226    static const char16_t* cases[] = {
227            u"maesure-unit",
228            u"measure-unit/foo-bar",
229            u"numbering-system/dummy",
230            u"français",
231            u"measure-unit/français-français", // non-invariant characters for C++
232            u"numbering-system/français", // non-invariant characters for C++
233            u"currency-USD"};
234
235    expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));
236}
237
238void NumberSkeletonTest::unexpectedTokens() {
239    static const char16_t* cases[] = {
240            u".00/w/w",
241            u"group-thousands/foo",
242            u"precision-integer//@## group-off",
243            u"precision-integer//@##  group-off",
244            u"precision-integer/ group-off",
245            u"precision-integer// group-off"};
246
247    expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));
248}
249
250void NumberSkeletonTest::duplicateValues() {
251    static const char16_t* cases[] = {
252            u"precision-integer precision-integer",
253            u"precision-integer .00+",
254            u"precision-integer precision-unlimited",
255            u"precision-integer @@@",
256            u"scientific engineering",
257            u"engineering compact-long",
258            u"sign-auto sign-always"};
259
260    expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));
261}
262
263void NumberSkeletonTest::stemsRequiringOption() {
264    static const char16_t* stems[] = {
265            u"precision-increment",
266            u"measure-unit",
267            u"per-measure-unit",
268            u"currency",
269            u"integer-width",
270            u"numbering-system",
271            u"scale"};
272    static const char16_t* suffixes[] = {u"", u"/@##", u" scientific", u"/@## scientific"};
273
274    for (auto& stem : stems) {
275        for (auto& suffix : suffixes) {
276            UnicodeString skeletonString = UnicodeString(stem) + suffix;
277            UErrorCode status = U_ZERO_ERROR;
278            UParseError perror;
279            NumberFormatter::forSkeleton(skeletonString, perror, status);
280            assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status);
281
282            // Check the UParseError for integrity.
283            // If an option is present, the option is wrong; error offset is at the start of the option
284            // If an option is not present, the error offset is at the token separator (end of stem)
285            int32_t expectedOffset = u_strlen(stem) + ((suffix[0] == u'/') ? 1 : 0);
286            assertEquals(skeletonString, expectedOffset, perror.offset);
287            UnicodeString expectedPreContext = skeletonString.tempSubString(0, expectedOffset);
288            if (expectedPreContext.length() >= U_PARSE_CONTEXT_LEN - 1) {
289                expectedPreContext = expectedPreContext.tempSubString(expectedOffset - U_PARSE_CONTEXT_LEN + 1);
290            }
291            assertEquals(skeletonString, expectedPreContext, perror.preContext);
292            UnicodeString expectedPostContext = skeletonString.tempSubString(expectedOffset);
293            // None of the postContext strings in this test exceed U_PARSE_CONTEXT_LEN
294            assertEquals(skeletonString, expectedPostContext, perror.postContext);
295        }
296    }
297}
298
299void NumberSkeletonTest::defaultTokens() {
300    IcuTestErrorCode status(*this, "defaultTokens");
301
302    static const char16_t* cases[] = {
303            u"notation-simple",
304            u"base-unit",
305            u"group-auto",
306            u"integer-width/+0",
307            u"sign-auto",
308            u"unit-width-short",
309            u"decimal-auto"};
310
311    for (auto& cas : cases) {
312        UnicodeString skeletonString(cas);
313        status.setScope(skeletonString);
314        UnicodeString normalized = NumberFormatter::forSkeleton(
315                skeletonString, status).toSkeleton(status);
316        // Skeleton should become empty when normalized
317        assertEquals(skeletonString, u"", normalized);
318        status.errIfFailureAndReset();
319    }
320}
321
322void NumberSkeletonTest::flexibleSeparators() {
323    IcuTestErrorCode status(*this, "flexibleSeparators");
324
325    static struct TestCase {
326        const char16_t* skeleton;
327        const char16_t* expected;
328    } cases[] = {{u"precision-integer group-off", u"5142"},
329                 {u"precision-integer  group-off", u"5142"},
330                 {u"precision-integer/@## group-off", u"5140"},
331                 {u"precision-integer/@##  group-off", u"5140"}};
332
333    for (auto& cas : cases) {
334        UnicodeString skeletonString(cas.skeleton);
335        UnicodeString expected(cas.expected);
336        status.setScope(skeletonString);
337        UnicodeString actual = NumberFormatter::forSkeleton(skeletonString, status).locale("en")
338                               .formatDouble(5142.3, status)
339                               .toString(status);
340        if (!status.errDataIfFailureAndReset()) {
341            assertEquals(skeletonString, expected, actual);
342        }
343        status.errIfFailureAndReset();
344    }
345}
346
347void NumberSkeletonTest::wildcardCharacters() {
348    IcuTestErrorCode status(*this, "wildcardCharacters");
349
350    struct TestCase {
351        const char16_t* star;
352        const char16_t* plus;
353    } cases[] = {
354        { u".00*", u".00+" },
355        { u"@@*", u"@@+" },
356        { u"scientific/*ee", u"scientific/+ee" },
357        { u"integer-width/*00", u"integer-width/+00" },
358    };
359
360    for (const auto& cas : cases) {
361        UnicodeString star(cas.star);
362        UnicodeString plus(cas.plus);
363        status.setScope(star);
364
365        UnicodeString normalized = NumberFormatter::forSkeleton(plus, status)
366            .toSkeleton(status);
367        assertEquals("Plus should normalize to star", star, normalized);
368        status.errIfFailureAndReset();
369    }
370}
371
372// In C++, there is no distinguishing between "invalid", "unknown", and "unexpected" tokens.
373void NumberSkeletonTest::expectedErrorSkeleton(const char16_t** cases, int32_t casesLen) {
374    for (int32_t i = 0; i < casesLen; i++) {
375        UnicodeString skeletonString(cases[i]);
376        UErrorCode status = U_ZERO_ERROR;
377        NumberFormatter::forSkeleton(skeletonString, status);
378        assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status);
379    }
380}
381
382void NumberSkeletonTest::perUnitInArabic() {
383    IcuTestErrorCode status(*this, "perUnitInArabic");
384
385    struct TestCase {
386        const char16_t* type;
387        const char16_t* subtype;
388    } cases[] = {
389        {u"area", u"acre"},
390        {u"digital", u"bit"},
391        {u"digital", u"byte"},
392        {u"temperature", u"celsius"},
393        {u"length", u"centimeter"},
394        {u"duration", u"day"},
395        {u"angle", u"degree"},
396        {u"temperature", u"fahrenheit"},
397        {u"volume", u"fluid-ounce"},
398        {u"length", u"foot"},
399        {u"volume", u"gallon"},
400        {u"digital", u"gigabit"},
401        {u"digital", u"gigabyte"},
402        {u"mass", u"gram"},
403        {u"area", u"hectare"},
404        {u"duration", u"hour"},
405        {u"length", u"inch"},
406        {u"digital", u"kilobit"},
407        {u"digital", u"kilobyte"},
408        {u"mass", u"kilogram"},
409        {u"length", u"kilometer"},
410        {u"volume", u"liter"},
411        {u"digital", u"megabit"},
412        {u"digital", u"megabyte"},
413        {u"length", u"meter"},
414        {u"length", u"mile"},
415        {u"length", u"mile-scandinavian"},
416        {u"volume", u"milliliter"},
417        {u"length", u"millimeter"},
418        {u"duration", u"millisecond"},
419        {u"duration", u"minute"},
420        {u"duration", u"month"},
421        {u"mass", u"ounce"},
422        {u"concentr", u"percent"},
423        {u"digital", u"petabyte"},
424        {u"mass", u"pound"},
425        {u"duration", u"second"},
426        {u"mass", u"stone"},
427        {u"digital", u"terabit"},
428        {u"digital", u"terabyte"},
429        {u"duration", u"week"},
430        {u"length", u"yard"},
431        {u"duration", u"year"},
432    };
433
434    for (const auto& cas1 : cases) {
435        for (const auto& cas2 : cases) {
436            UnicodeString skeleton(u"measure-unit/");
437            skeleton += cas1.type;
438            skeleton += u"-";
439            skeleton += cas1.subtype;
440            skeleton += u" ";
441            skeleton += u"per-measure-unit/";
442            skeleton += cas2.type;
443            skeleton += u"-";
444            skeleton += cas2.subtype;
445
446            status.setScope(skeleton);
447            UnicodeString actual = NumberFormatter::forSkeleton(skeleton, status).locale("ar")
448                                   .formatDouble(5142.3, status)
449                                   .toString(status);
450            status.errIfFailureAndReset();
451        }
452    }
453}
454
455void NumberSkeletonTest::perUnitToSkeleton() {
456    IcuTestErrorCode status(*this, "perUnitToSkeleton");
457    struct TestCase {
458        const char16_t* type;
459        const char16_t* subtype;
460    } cases[] = {
461        {u"area", u"acre"},
462        {u"concentr", u"percent"},
463        {u"concentr", u"permille"},
464        {u"concentr", u"permillion"},
465        {u"concentr", u"permyriad"},
466        {u"digital", u"bit"},
467        {u"length", u"yard"},
468    };
469
470    for (const auto& cas1 : cases) {
471        for (const auto& cas2 : cases) {
472            UnicodeString skeleton(u"measure-unit/");
473            skeleton += cas1.type;
474            skeleton += u"-";
475            skeleton += cas1.subtype;
476            skeleton += u" ";
477            skeleton += u"per-measure-unit/";
478            skeleton += cas2.type;
479            skeleton += u"-";
480            skeleton += cas2.subtype;
481
482            status.setScope(skeleton);
483            if (cas1.type != cas2.type && cas1.subtype != cas2.subtype) {
484                UnicodeString toSkeleton = NumberFormatter::forSkeleton(
485                    skeleton, status).toSkeleton(status);
486                if (status.errIfFailureAndReset()) {
487                    continue;
488                }
489                // Ensure both subtype are in the toSkeleton.
490                UnicodeString msg;
491                msg.append(toSkeleton)
492                    .append(" should contain '")
493                    .append(UnicodeString(cas1.subtype))
494                    .append("' when constructed from ")
495                    .append(skeleton);
496                assertTrue(msg, toSkeleton.indexOf(cas1.subtype) >= 0);
497
498                msg.remove();
499                msg.append(toSkeleton)
500                    .append(" should contain '")
501                    .append(UnicodeString(cas2.subtype))
502                    .append("' when constructed from ")
503                    .append(skeleton);
504                assertTrue(msg, toSkeleton.indexOf(cas2.subtype) >= 0);
505            }
506        }
507    }
508}
509
510#endif /* #if !UCONFIG_NO_FORMATTING */
511