17777dab0Sopenharmony_ci// © 2017 and later: Unicode, Inc. and others. 27777dab0Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 37777dab0Sopenharmony_ci 47777dab0Sopenharmony_ci// stringoptions.h 57777dab0Sopenharmony_ci// created: 2017jun08 Markus W. Scherer 67777dab0Sopenharmony_ci 77777dab0Sopenharmony_ci#ifndef __STRINGOPTIONS_H__ 87777dab0Sopenharmony_ci#define __STRINGOPTIONS_H__ 97777dab0Sopenharmony_ci 107777dab0Sopenharmony_ci#include "unicode/utypes.h" 117777dab0Sopenharmony_ci 127777dab0Sopenharmony_ci/** 137777dab0Sopenharmony_ci * \file 147777dab0Sopenharmony_ci * \brief C API: Bit set option bit constants for various string and character processing functions. 157777dab0Sopenharmony_ci */ 167777dab0Sopenharmony_ci 177777dab0Sopenharmony_ci/** 187777dab0Sopenharmony_ci * Option value for case folding: Use default mappings defined in CaseFolding.txt. 197777dab0Sopenharmony_ci * 207777dab0Sopenharmony_ci * @stable ICU 2.0 217777dab0Sopenharmony_ci */ 227777dab0Sopenharmony_ci#define U_FOLD_CASE_DEFAULT 0 237777dab0Sopenharmony_ci 247777dab0Sopenharmony_ci/** 257777dab0Sopenharmony_ci * Option value for case folding: 267777dab0Sopenharmony_ci * 277777dab0Sopenharmony_ci * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 287777dab0Sopenharmony_ci * and dotless i appropriately for Turkic languages (tr, az). 297777dab0Sopenharmony_ci * 307777dab0Sopenharmony_ci * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 317777dab0Sopenharmony_ci * are to be included for default mappings and 327777dab0Sopenharmony_ci * excluded for the Turkic-specific mappings. 337777dab0Sopenharmony_ci * 347777dab0Sopenharmony_ci * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 357777dab0Sopenharmony_ci * are to be excluded for default mappings and 367777dab0Sopenharmony_ci * included for the Turkic-specific mappings. 377777dab0Sopenharmony_ci * 387777dab0Sopenharmony_ci * @stable ICU 2.0 397777dab0Sopenharmony_ci */ 407777dab0Sopenharmony_ci#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 417777dab0Sopenharmony_ci 427777dab0Sopenharmony_ci/** 437777dab0Sopenharmony_ci * Titlecase the string as a whole rather than each word. 447777dab0Sopenharmony_ci * (Titlecase only the character at index 0, possibly adjusted.) 457777dab0Sopenharmony_ci * Option bits value for titlecasing APIs that take an options bit set. 467777dab0Sopenharmony_ci * 477777dab0Sopenharmony_ci * It is an error to specify multiple titlecasing iterator options together, 487777dab0Sopenharmony_ci * including both an options bit and an explicit BreakIterator. 497777dab0Sopenharmony_ci * 507777dab0Sopenharmony_ci * @see U_TITLECASE_ADJUST_TO_CASED 517777dab0Sopenharmony_ci * @stable ICU 60 527777dab0Sopenharmony_ci */ 537777dab0Sopenharmony_ci#define U_TITLECASE_WHOLE_STRING 0x20 547777dab0Sopenharmony_ci 557777dab0Sopenharmony_ci/** 567777dab0Sopenharmony_ci * Titlecase sentences rather than words. 577777dab0Sopenharmony_ci * (Titlecase only the first character of each sentence, possibly adjusted.) 587777dab0Sopenharmony_ci * Option bits value for titlecasing APIs that take an options bit set. 597777dab0Sopenharmony_ci * 607777dab0Sopenharmony_ci * It is an error to specify multiple titlecasing iterator options together, 617777dab0Sopenharmony_ci * including both an options bit and an explicit BreakIterator. 627777dab0Sopenharmony_ci * 637777dab0Sopenharmony_ci * @see U_TITLECASE_ADJUST_TO_CASED 647777dab0Sopenharmony_ci * @stable ICU 60 657777dab0Sopenharmony_ci */ 667777dab0Sopenharmony_ci#define U_TITLECASE_SENTENCES 0x40 677777dab0Sopenharmony_ci 687777dab0Sopenharmony_ci/** 697777dab0Sopenharmony_ci * Do not lowercase non-initial parts of words when titlecasing. 707777dab0Sopenharmony_ci * Option bit for titlecasing APIs that take an options bit set. 717777dab0Sopenharmony_ci * 727777dab0Sopenharmony_ci * By default, titlecasing will titlecase the character at each 737777dab0Sopenharmony_ci * (possibly adjusted) BreakIterator index and 747777dab0Sopenharmony_ci * lowercase all other characters up to the next iterator index. 757777dab0Sopenharmony_ci * With this option, the other characters will not be modified. 767777dab0Sopenharmony_ci * 777777dab0Sopenharmony_ci * @see U_TITLECASE_ADJUST_TO_CASED 787777dab0Sopenharmony_ci * @see UnicodeString::toTitle 797777dab0Sopenharmony_ci * @see CaseMap::toTitle 807777dab0Sopenharmony_ci * @see ucasemap_setOptions 817777dab0Sopenharmony_ci * @see ucasemap_toTitle 827777dab0Sopenharmony_ci * @see ucasemap_utf8ToTitle 837777dab0Sopenharmony_ci * @stable ICU 3.8 847777dab0Sopenharmony_ci */ 857777dab0Sopenharmony_ci#define U_TITLECASE_NO_LOWERCASE 0x100 867777dab0Sopenharmony_ci 877777dab0Sopenharmony_ci/** 887777dab0Sopenharmony_ci * Do not adjust the titlecasing BreakIterator indexes; 897777dab0Sopenharmony_ci * titlecase exactly the characters at breaks from the iterator. 907777dab0Sopenharmony_ci * Option bit for titlecasing APIs that take an options bit set. 917777dab0Sopenharmony_ci * 927777dab0Sopenharmony_ci * By default, titlecasing will take each break iterator index, 937777dab0Sopenharmony_ci * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED), 947777dab0Sopenharmony_ci * and titlecase that one. 957777dab0Sopenharmony_ci * 967777dab0Sopenharmony_ci * Other characters are lowercased. 977777dab0Sopenharmony_ci * 987777dab0Sopenharmony_ci * It is an error to specify multiple titlecasing adjustment options together. 997777dab0Sopenharmony_ci * 1007777dab0Sopenharmony_ci * @see U_TITLECASE_ADJUST_TO_CASED 1017777dab0Sopenharmony_ci * @see U_TITLECASE_NO_LOWERCASE 1027777dab0Sopenharmony_ci * @see UnicodeString::toTitle 1037777dab0Sopenharmony_ci * @see CaseMap::toTitle 1047777dab0Sopenharmony_ci * @see ucasemap_setOptions 1057777dab0Sopenharmony_ci * @see ucasemap_toTitle 1067777dab0Sopenharmony_ci * @see ucasemap_utf8ToTitle 1077777dab0Sopenharmony_ci * @stable ICU 3.8 1087777dab0Sopenharmony_ci */ 1097777dab0Sopenharmony_ci#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 1107777dab0Sopenharmony_ci 1117777dab0Sopenharmony_ci/** 1127777dab0Sopenharmony_ci * Adjust each titlecasing BreakIterator index to the next cased character. 1137777dab0Sopenharmony_ci * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) 1147777dab0Sopenharmony_ci * Option bit for titlecasing APIs that take an options bit set. 1157777dab0Sopenharmony_ci * 1167777dab0Sopenharmony_ci * This used to be the default index adjustment in ICU. 1177777dab0Sopenharmony_ci * Since ICU 60, the default index adjustment is to the next character that is 1187777dab0Sopenharmony_ci * a letter, number, symbol, or private use code point. 1197777dab0Sopenharmony_ci * (Uncased modifier letters are skipped.) 1207777dab0Sopenharmony_ci * The difference in behavior is small for word titlecasing, 1217777dab0Sopenharmony_ci * but the new adjustment is much better for whole-string and sentence titlecasing: 1227777dab0Sopenharmony_ci * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»". 1237777dab0Sopenharmony_ci * 1247777dab0Sopenharmony_ci * It is an error to specify multiple titlecasing adjustment options together. 1257777dab0Sopenharmony_ci * 1267777dab0Sopenharmony_ci * @see U_TITLECASE_NO_BREAK_ADJUSTMENT 1277777dab0Sopenharmony_ci * @stable ICU 60 1287777dab0Sopenharmony_ci */ 1297777dab0Sopenharmony_ci#define U_TITLECASE_ADJUST_TO_CASED 0x400 1307777dab0Sopenharmony_ci 1317777dab0Sopenharmony_ci/** 1327777dab0Sopenharmony_ci * Option for string transformation functions to not first reset the Edits object. 1337777dab0Sopenharmony_ci * Used for example in some case-mapping and normalization functions. 1347777dab0Sopenharmony_ci * 1357777dab0Sopenharmony_ci * @see CaseMap 1367777dab0Sopenharmony_ci * @see Edits 1377777dab0Sopenharmony_ci * @see Normalizer2 1387777dab0Sopenharmony_ci * @stable ICU 60 1397777dab0Sopenharmony_ci */ 1407777dab0Sopenharmony_ci#define U_EDITS_NO_RESET 0x2000 1417777dab0Sopenharmony_ci 1427777dab0Sopenharmony_ci/** 1437777dab0Sopenharmony_ci * Omit unchanged text when recording how source substrings 1447777dab0Sopenharmony_ci * relate to changed and unchanged result substrings. 1457777dab0Sopenharmony_ci * Used for example in some case-mapping and normalization functions. 1467777dab0Sopenharmony_ci * 1477777dab0Sopenharmony_ci * @see CaseMap 1487777dab0Sopenharmony_ci * @see Edits 1497777dab0Sopenharmony_ci * @see Normalizer2 1507777dab0Sopenharmony_ci * @stable ICU 60 1517777dab0Sopenharmony_ci */ 1527777dab0Sopenharmony_ci#define U_OMIT_UNCHANGED_TEXT 0x4000 1537777dab0Sopenharmony_ci 1547777dab0Sopenharmony_ci/** 1557777dab0Sopenharmony_ci * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: 1567777dab0Sopenharmony_ci * Compare strings in code point order instead of code unit order. 1577777dab0Sopenharmony_ci * @stable ICU 2.2 1587777dab0Sopenharmony_ci */ 1597777dab0Sopenharmony_ci#define U_COMPARE_CODE_POINT_ORDER 0x8000 1607777dab0Sopenharmony_ci 1617777dab0Sopenharmony_ci/** 1627777dab0Sopenharmony_ci * Option bit for unorm_compare: 1637777dab0Sopenharmony_ci * Perform case-insensitive comparison. 1647777dab0Sopenharmony_ci * @stable ICU 2.2 1657777dab0Sopenharmony_ci */ 1667777dab0Sopenharmony_ci#define U_COMPARE_IGNORE_CASE 0x10000 1677777dab0Sopenharmony_ci 1687777dab0Sopenharmony_ci/** 1697777dab0Sopenharmony_ci * Option bit for unorm_compare: 1707777dab0Sopenharmony_ci * Both input strings are assumed to fulfill FCD conditions. 1717777dab0Sopenharmony_ci * @stable ICU 2.2 1727777dab0Sopenharmony_ci */ 1737777dab0Sopenharmony_ci#define UNORM_INPUT_IS_FCD 0x20000 1747777dab0Sopenharmony_ci 1757777dab0Sopenharmony_ci// Related definitions elsewhere. 1767777dab0Sopenharmony_ci// Options that are not meaningful in the same functions 1777777dab0Sopenharmony_ci// can share the same bits. 1787777dab0Sopenharmony_ci// 1797777dab0Sopenharmony_ci// Public: 1807777dab0Sopenharmony_ci// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 1817777dab0Sopenharmony_ci// 1827777dab0Sopenharmony_ci// Internal: (may change or be removed) 1837777dab0Sopenharmony_ci// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff 1847777dab0Sopenharmony_ci// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7 1857777dab0Sopenharmony_ci// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0 1867777dab0Sopenharmony_ci// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600 1877777dab0Sopenharmony_ci// ustr_imp.h #define _STRNCMP_STYLE 0x1000 1887777dab0Sopenharmony_ci// unormcmp.cpp #define _COMPARE_EQUIV 0x80000 1897777dab0Sopenharmony_ci 1907777dab0Sopenharmony_ci#endif // __STRINGOPTIONS_H__ 191