17777dab0Sopenharmony_ci// © 2017 and later: Unicode, Inc. and others.
27777dab0Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
37777dab0Sopenharmony_ci
47777dab0Sopenharmony_ci// stringoptions.h
57777dab0Sopenharmony_ci// created: 2017jun08 Markus W. Scherer
67777dab0Sopenharmony_ci
77777dab0Sopenharmony_ci#ifndef __STRINGOPTIONS_H__
87777dab0Sopenharmony_ci#define __STRINGOPTIONS_H__
97777dab0Sopenharmony_ci
107777dab0Sopenharmony_ci#include "unicode/utypes.h"
117777dab0Sopenharmony_ci
127777dab0Sopenharmony_ci/**
137777dab0Sopenharmony_ci * \file
147777dab0Sopenharmony_ci * \brief C API: Bit set option bit constants for various string and character processing functions.
157777dab0Sopenharmony_ci */
167777dab0Sopenharmony_ci
177777dab0Sopenharmony_ci/**
187777dab0Sopenharmony_ci * Option value for case folding: Use default mappings defined in CaseFolding.txt.
197777dab0Sopenharmony_ci *
207777dab0Sopenharmony_ci * @stable ICU 2.0
217777dab0Sopenharmony_ci */
227777dab0Sopenharmony_ci#define U_FOLD_CASE_DEFAULT 0
237777dab0Sopenharmony_ci
247777dab0Sopenharmony_ci/**
257777dab0Sopenharmony_ci * Option value for case folding:
267777dab0Sopenharmony_ci *
277777dab0Sopenharmony_ci * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
287777dab0Sopenharmony_ci * and dotless i appropriately for Turkic languages (tr, az).
297777dab0Sopenharmony_ci *
307777dab0Sopenharmony_ci * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
317777dab0Sopenharmony_ci * are to be included for default mappings and
327777dab0Sopenharmony_ci * excluded for the Turkic-specific mappings.
337777dab0Sopenharmony_ci *
347777dab0Sopenharmony_ci * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
357777dab0Sopenharmony_ci * are to be excluded for default mappings and
367777dab0Sopenharmony_ci * included for the Turkic-specific mappings.
377777dab0Sopenharmony_ci *
387777dab0Sopenharmony_ci * @stable ICU 2.0
397777dab0Sopenharmony_ci */
407777dab0Sopenharmony_ci#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
417777dab0Sopenharmony_ci
427777dab0Sopenharmony_ci/**
437777dab0Sopenharmony_ci * Titlecase the string as a whole rather than each word.
447777dab0Sopenharmony_ci * (Titlecase only the character at index 0, possibly adjusted.)
457777dab0Sopenharmony_ci * Option bits value for titlecasing APIs that take an options bit set.
467777dab0Sopenharmony_ci *
477777dab0Sopenharmony_ci * It is an error to specify multiple titlecasing iterator options together,
487777dab0Sopenharmony_ci * including both an options bit and an explicit BreakIterator.
497777dab0Sopenharmony_ci *
507777dab0Sopenharmony_ci * @see U_TITLECASE_ADJUST_TO_CASED
517777dab0Sopenharmony_ci * @stable ICU 60
527777dab0Sopenharmony_ci */
537777dab0Sopenharmony_ci#define U_TITLECASE_WHOLE_STRING 0x20
547777dab0Sopenharmony_ci
557777dab0Sopenharmony_ci/**
567777dab0Sopenharmony_ci * Titlecase sentences rather than words.
577777dab0Sopenharmony_ci * (Titlecase only the first character of each sentence, possibly adjusted.)
587777dab0Sopenharmony_ci * Option bits value for titlecasing APIs that take an options bit set.
597777dab0Sopenharmony_ci *
607777dab0Sopenharmony_ci * It is an error to specify multiple titlecasing iterator options together,
617777dab0Sopenharmony_ci * including both an options bit and an explicit BreakIterator.
627777dab0Sopenharmony_ci *
637777dab0Sopenharmony_ci * @see U_TITLECASE_ADJUST_TO_CASED
647777dab0Sopenharmony_ci * @stable ICU 60
657777dab0Sopenharmony_ci */
667777dab0Sopenharmony_ci#define U_TITLECASE_SENTENCES 0x40
677777dab0Sopenharmony_ci
687777dab0Sopenharmony_ci/**
697777dab0Sopenharmony_ci * Do not lowercase non-initial parts of words when titlecasing.
707777dab0Sopenharmony_ci * Option bit for titlecasing APIs that take an options bit set.
717777dab0Sopenharmony_ci *
727777dab0Sopenharmony_ci * By default, titlecasing will titlecase the character at each
737777dab0Sopenharmony_ci * (possibly adjusted) BreakIterator index and
747777dab0Sopenharmony_ci * lowercase all other characters up to the next iterator index.
757777dab0Sopenharmony_ci * With this option, the other characters will not be modified.
767777dab0Sopenharmony_ci *
777777dab0Sopenharmony_ci * @see U_TITLECASE_ADJUST_TO_CASED
787777dab0Sopenharmony_ci * @see UnicodeString::toTitle
797777dab0Sopenharmony_ci * @see CaseMap::toTitle
807777dab0Sopenharmony_ci * @see ucasemap_setOptions
817777dab0Sopenharmony_ci * @see ucasemap_toTitle
827777dab0Sopenharmony_ci * @see ucasemap_utf8ToTitle
837777dab0Sopenharmony_ci * @stable ICU 3.8
847777dab0Sopenharmony_ci */
857777dab0Sopenharmony_ci#define U_TITLECASE_NO_LOWERCASE 0x100
867777dab0Sopenharmony_ci
877777dab0Sopenharmony_ci/**
887777dab0Sopenharmony_ci * Do not adjust the titlecasing BreakIterator indexes;
897777dab0Sopenharmony_ci * titlecase exactly the characters at breaks from the iterator.
907777dab0Sopenharmony_ci * Option bit for titlecasing APIs that take an options bit set.
917777dab0Sopenharmony_ci *
927777dab0Sopenharmony_ci * By default, titlecasing will take each break iterator index,
937777dab0Sopenharmony_ci * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
947777dab0Sopenharmony_ci * and titlecase that one.
957777dab0Sopenharmony_ci *
967777dab0Sopenharmony_ci * Other characters are lowercased.
977777dab0Sopenharmony_ci *
987777dab0Sopenharmony_ci * It is an error to specify multiple titlecasing adjustment options together.
997777dab0Sopenharmony_ci *
1007777dab0Sopenharmony_ci * @see U_TITLECASE_ADJUST_TO_CASED
1017777dab0Sopenharmony_ci * @see U_TITLECASE_NO_LOWERCASE
1027777dab0Sopenharmony_ci * @see UnicodeString::toTitle
1037777dab0Sopenharmony_ci * @see CaseMap::toTitle
1047777dab0Sopenharmony_ci * @see ucasemap_setOptions
1057777dab0Sopenharmony_ci * @see ucasemap_toTitle
1067777dab0Sopenharmony_ci * @see ucasemap_utf8ToTitle
1077777dab0Sopenharmony_ci * @stable ICU 3.8
1087777dab0Sopenharmony_ci */
1097777dab0Sopenharmony_ci#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
1107777dab0Sopenharmony_ci
1117777dab0Sopenharmony_ci/**
1127777dab0Sopenharmony_ci * Adjust each titlecasing BreakIterator index to the next cased character.
1137777dab0Sopenharmony_ci * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
1147777dab0Sopenharmony_ci * Option bit for titlecasing APIs that take an options bit set.
1157777dab0Sopenharmony_ci *
1167777dab0Sopenharmony_ci * This used to be the default index adjustment in ICU.
1177777dab0Sopenharmony_ci * Since ICU 60, the default index adjustment is to the next character that is
1187777dab0Sopenharmony_ci * a letter, number, symbol, or private use code point.
1197777dab0Sopenharmony_ci * (Uncased modifier letters are skipped.)
1207777dab0Sopenharmony_ci * The difference in behavior is small for word titlecasing,
1217777dab0Sopenharmony_ci * but the new adjustment is much better for whole-string and sentence titlecasing:
1227777dab0Sopenharmony_ci * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
1237777dab0Sopenharmony_ci *
1247777dab0Sopenharmony_ci * It is an error to specify multiple titlecasing adjustment options together.
1257777dab0Sopenharmony_ci *
1267777dab0Sopenharmony_ci * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
1277777dab0Sopenharmony_ci * @stable ICU 60
1287777dab0Sopenharmony_ci */
1297777dab0Sopenharmony_ci#define U_TITLECASE_ADJUST_TO_CASED 0x400
1307777dab0Sopenharmony_ci
1317777dab0Sopenharmony_ci/**
1327777dab0Sopenharmony_ci * Option for string transformation functions to not first reset the Edits object.
1337777dab0Sopenharmony_ci * Used for example in some case-mapping and normalization functions.
1347777dab0Sopenharmony_ci *
1357777dab0Sopenharmony_ci * @see CaseMap
1367777dab0Sopenharmony_ci * @see Edits
1377777dab0Sopenharmony_ci * @see Normalizer2
1387777dab0Sopenharmony_ci * @stable ICU 60
1397777dab0Sopenharmony_ci */
1407777dab0Sopenharmony_ci#define U_EDITS_NO_RESET 0x2000
1417777dab0Sopenharmony_ci
1427777dab0Sopenharmony_ci/**
1437777dab0Sopenharmony_ci * Omit unchanged text when recording how source substrings
1447777dab0Sopenharmony_ci * relate to changed and unchanged result substrings.
1457777dab0Sopenharmony_ci * Used for example in some case-mapping and normalization functions.
1467777dab0Sopenharmony_ci *
1477777dab0Sopenharmony_ci * @see CaseMap
1487777dab0Sopenharmony_ci * @see Edits
1497777dab0Sopenharmony_ci * @see Normalizer2
1507777dab0Sopenharmony_ci * @stable ICU 60
1517777dab0Sopenharmony_ci */
1527777dab0Sopenharmony_ci#define U_OMIT_UNCHANGED_TEXT 0x4000
1537777dab0Sopenharmony_ci
1547777dab0Sopenharmony_ci/**
1557777dab0Sopenharmony_ci * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
1567777dab0Sopenharmony_ci * Compare strings in code point order instead of code unit order.
1577777dab0Sopenharmony_ci * @stable ICU 2.2
1587777dab0Sopenharmony_ci */
1597777dab0Sopenharmony_ci#define U_COMPARE_CODE_POINT_ORDER  0x8000
1607777dab0Sopenharmony_ci
1617777dab0Sopenharmony_ci/**
1627777dab0Sopenharmony_ci * Option bit for unorm_compare:
1637777dab0Sopenharmony_ci * Perform case-insensitive comparison.
1647777dab0Sopenharmony_ci * @stable ICU 2.2
1657777dab0Sopenharmony_ci */
1667777dab0Sopenharmony_ci#define U_COMPARE_IGNORE_CASE       0x10000
1677777dab0Sopenharmony_ci
1687777dab0Sopenharmony_ci/**
1697777dab0Sopenharmony_ci * Option bit for unorm_compare:
1707777dab0Sopenharmony_ci * Both input strings are assumed to fulfill FCD conditions.
1717777dab0Sopenharmony_ci * @stable ICU 2.2
1727777dab0Sopenharmony_ci */
1737777dab0Sopenharmony_ci#define UNORM_INPUT_IS_FCD          0x20000
1747777dab0Sopenharmony_ci
1757777dab0Sopenharmony_ci// Related definitions elsewhere.
1767777dab0Sopenharmony_ci// Options that are not meaningful in the same functions
1777777dab0Sopenharmony_ci// can share the same bits.
1787777dab0Sopenharmony_ci//
1797777dab0Sopenharmony_ci// Public:
1807777dab0Sopenharmony_ci// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
1817777dab0Sopenharmony_ci//
1827777dab0Sopenharmony_ci// Internal: (may change or be removed)
1837777dab0Sopenharmony_ci// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
1847777dab0Sopenharmony_ci// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
1857777dab0Sopenharmony_ci// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
1867777dab0Sopenharmony_ci// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
1877777dab0Sopenharmony_ci// ustr_imp.h #define _STRNCMP_STYLE 0x1000
1887777dab0Sopenharmony_ci// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
1897777dab0Sopenharmony_ci
1907777dab0Sopenharmony_ci#endif  // __STRINGOPTIONS_H__
191