17777dab0Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 27777dab0Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 37777dab0Sopenharmony_ci/* 47777dab0Sopenharmony_ci******************************************************************************* 57777dab0Sopenharmony_ci* 67777dab0Sopenharmony_ci* Copyright (C) 2002-2014, International Business Machines 77777dab0Sopenharmony_ci* Corporation and others. All Rights Reserved. 87777dab0Sopenharmony_ci* 97777dab0Sopenharmony_ci******************************************************************************* 107777dab0Sopenharmony_ci* file name: uset.h 117777dab0Sopenharmony_ci* encoding: UTF-8 127777dab0Sopenharmony_ci* tab size: 8 (not used) 137777dab0Sopenharmony_ci* indentation:4 147777dab0Sopenharmony_ci* 157777dab0Sopenharmony_ci* created on: 2002mar07 167777dab0Sopenharmony_ci* created by: Markus W. Scherer 177777dab0Sopenharmony_ci* 187777dab0Sopenharmony_ci* C version of UnicodeSet. 197777dab0Sopenharmony_ci*/ 207777dab0Sopenharmony_ci 217777dab0Sopenharmony_ci 227777dab0Sopenharmony_ci/** 237777dab0Sopenharmony_ci * \file 247777dab0Sopenharmony_ci * \brief C API: Unicode Set 257777dab0Sopenharmony_ci * 267777dab0Sopenharmony_ci * <p>This is a C wrapper around the C++ UnicodeSet class.</p> 277777dab0Sopenharmony_ci */ 287777dab0Sopenharmony_ci 297777dab0Sopenharmony_ci#ifndef __USET_H__ 307777dab0Sopenharmony_ci#define __USET_H__ 317777dab0Sopenharmony_ci 327777dab0Sopenharmony_ci#include "unicode/utypes.h" 337777dab0Sopenharmony_ci#include "unicode/uchar.h" 347777dab0Sopenharmony_ci 357777dab0Sopenharmony_ci#if U_SHOW_CPLUSPLUS_API 367777dab0Sopenharmony_ci#include "unicode/localpointer.h" 377777dab0Sopenharmony_ci#endif // U_SHOW_CPLUSPLUS_API 387777dab0Sopenharmony_ci 397777dab0Sopenharmony_ci#ifndef USET_DEFINED 407777dab0Sopenharmony_ci 417777dab0Sopenharmony_ci#ifndef U_IN_DOXYGEN 427777dab0Sopenharmony_ci#define USET_DEFINED 437777dab0Sopenharmony_ci#endif 447777dab0Sopenharmony_ci/** 457777dab0Sopenharmony_ci * USet is the C API type corresponding to C++ class UnicodeSet. 467777dab0Sopenharmony_ci * Use the uset_* API to manipulate. Create with 477777dab0Sopenharmony_ci * uset_open*, and destroy with uset_close. 487777dab0Sopenharmony_ci * @stable ICU 2.4 497777dab0Sopenharmony_ci */ 507777dab0Sopenharmony_citypedef struct USet USet; 517777dab0Sopenharmony_ci#endif 527777dab0Sopenharmony_ci 537777dab0Sopenharmony_ci/** 547777dab0Sopenharmony_ci * Bitmask values to be passed to uset_openPatternOptions() or 557777dab0Sopenharmony_ci * uset_applyPattern() taking an option parameter. 567777dab0Sopenharmony_ci * @stable ICU 2.4 577777dab0Sopenharmony_ci */ 587777dab0Sopenharmony_cienum { 597777dab0Sopenharmony_ci /** 607777dab0Sopenharmony_ci * Ignore white space within patterns unless quoted or escaped. 617777dab0Sopenharmony_ci * @stable ICU 2.4 627777dab0Sopenharmony_ci */ 637777dab0Sopenharmony_ci USET_IGNORE_SPACE = 1, 647777dab0Sopenharmony_ci 657777dab0Sopenharmony_ci /** 667777dab0Sopenharmony_ci * Enable case insensitive matching. E.g., "[ab]" with this flag 677777dab0Sopenharmony_ci * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will 687777dab0Sopenharmony_ci * match all except 'a', 'A', 'b', and 'B'. This performs a full 697777dab0Sopenharmony_ci * closure over case mappings, e.g. U+017F for s. 707777dab0Sopenharmony_ci * 717777dab0Sopenharmony_ci * The resulting set is a superset of the input for the code points but 727777dab0Sopenharmony_ci * not for the strings. 737777dab0Sopenharmony_ci * It performs a case mapping closure of the code points and adds 747777dab0Sopenharmony_ci * full case folding strings for the code points, and reduces strings of 757777dab0Sopenharmony_ci * the original set to their full case folding equivalents. 767777dab0Sopenharmony_ci * 777777dab0Sopenharmony_ci * This is designed for case-insensitive matches, for example 787777dab0Sopenharmony_ci * in regular expressions. The full code point case closure allows checking of 797777dab0Sopenharmony_ci * an input character directly against the closure set. 807777dab0Sopenharmony_ci * Strings are matched by comparing the case-folded form from the closure 817777dab0Sopenharmony_ci * set with an incremental case folding of the string in question. 827777dab0Sopenharmony_ci * 837777dab0Sopenharmony_ci * The closure set will also contain single code points if the original 847777dab0Sopenharmony_ci * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.). 857777dab0Sopenharmony_ci * This is not necessary (that is, redundant) for the above matching method 867777dab0Sopenharmony_ci * but results in the same closure sets regardless of whether the original 877777dab0Sopenharmony_ci * set contained the code point or a string. 887777dab0Sopenharmony_ci * 897777dab0Sopenharmony_ci * @stable ICU 2.4 907777dab0Sopenharmony_ci */ 917777dab0Sopenharmony_ci USET_CASE_INSENSITIVE = 2, 927777dab0Sopenharmony_ci 937777dab0Sopenharmony_ci /** 947777dab0Sopenharmony_ci * Enable case insensitive matching. E.g., "[ab]" with this flag 957777dab0Sopenharmony_ci * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will 967777dab0Sopenharmony_ci * match all except 'a', 'A', 'b', and 'B'. This adds the lower-, 977777dab0Sopenharmony_ci * title-, and uppercase mappings as well as the case folding 987777dab0Sopenharmony_ci * of each existing element in the set. 997777dab0Sopenharmony_ci * @stable ICU 3.2 1007777dab0Sopenharmony_ci */ 1017777dab0Sopenharmony_ci USET_ADD_CASE_MAPPINGS = 4 1027777dab0Sopenharmony_ci}; 1037777dab0Sopenharmony_ci 1047777dab0Sopenharmony_ci/** 1057777dab0Sopenharmony_ci * Argument values for whether span() and similar functions continue while 1067777dab0Sopenharmony_ci * the current character is contained vs. not contained in the set. 1077777dab0Sopenharmony_ci * 1087777dab0Sopenharmony_ci * The functionality is straightforward for sets with only single code points, 1097777dab0Sopenharmony_ci * without strings (which is the common case): 1107777dab0Sopenharmony_ci * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same. 1117777dab0Sopenharmony_ci * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED. 1127777dab0Sopenharmony_ci * - span() and spanBack() partition any string the same way when 1137777dab0Sopenharmony_ci * alternating between span(USET_SPAN_NOT_CONTAINED) and 1147777dab0Sopenharmony_ci * span(either "contained" condition). 1157777dab0Sopenharmony_ci * - Using a complemented (inverted) set and the opposite span conditions 1167777dab0Sopenharmony_ci * yields the same results. 1177777dab0Sopenharmony_ci * 1187777dab0Sopenharmony_ci * When a set contains multi-code point strings, then these statements may not 1197777dab0Sopenharmony_ci * be true, depending on the strings in the set (for example, whether they 1207777dab0Sopenharmony_ci * overlap with each other) and the string that is processed. 1217777dab0Sopenharmony_ci * For a set with strings: 1227777dab0Sopenharmony_ci * - The complement of the set contains the opposite set of code points, 1237777dab0Sopenharmony_ci * but the same set of strings. 1247777dab0Sopenharmony_ci * Therefore, complementing both the set and the span conditions 1257777dab0Sopenharmony_ci * may yield different results. 1267777dab0Sopenharmony_ci * - When starting spans at different positions in a string 1277777dab0Sopenharmony_ci * (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different 1287777dab0Sopenharmony_ci * because a set string may start before the later position. 1297777dab0Sopenharmony_ci * - span(USET_SPAN_SIMPLE) may be shorter than 1307777dab0Sopenharmony_ci * span(USET_SPAN_CONTAINED) because it will not recursively try 1317777dab0Sopenharmony_ci * all possible paths. 1327777dab0Sopenharmony_ci * For example, with a set which contains the three strings "xy", "xya" and "ax", 1337777dab0Sopenharmony_ci * span("xyax", USET_SPAN_CONTAINED) will return 4 but 1347777dab0Sopenharmony_ci * span("xyax", USET_SPAN_SIMPLE) will return 3. 1357777dab0Sopenharmony_ci * span(USET_SPAN_SIMPLE) will never be longer than 1367777dab0Sopenharmony_ci * span(USET_SPAN_CONTAINED). 1377777dab0Sopenharmony_ci * - With either "contained" condition, span() and spanBack() may partition 1387777dab0Sopenharmony_ci * a string in different ways. 1397777dab0Sopenharmony_ci * For example, with a set which contains the two strings "ab" and "ba", 1407777dab0Sopenharmony_ci * and when processing the string "aba", 1417777dab0Sopenharmony_ci * span() will yield contained/not-contained boundaries of { 0, 2, 3 } 1427777dab0Sopenharmony_ci * while spanBack() will yield boundaries of { 0, 1, 3 }. 1437777dab0Sopenharmony_ci * 1447777dab0Sopenharmony_ci * Note: If it is important to get the same boundaries whether iterating forward 1457777dab0Sopenharmony_ci * or backward through a string, then either only span() should be used and 1467777dab0Sopenharmony_ci * the boundaries cached for backward operation, or an ICU BreakIterator 1477777dab0Sopenharmony_ci * could be used. 1487777dab0Sopenharmony_ci * 1497777dab0Sopenharmony_ci * Note: Unpaired surrogates are treated like surrogate code points. 1507777dab0Sopenharmony_ci * Similarly, set strings match only on code point boundaries, 1517777dab0Sopenharmony_ci * never in the middle of a surrogate pair. 1527777dab0Sopenharmony_ci * Illegal UTF-8 sequences are treated like U+FFFD. 1537777dab0Sopenharmony_ci * When processing UTF-8 strings, malformed set strings 1547777dab0Sopenharmony_ci * (strings with unpaired surrogates which cannot be converted to UTF-8) 1557777dab0Sopenharmony_ci * are ignored. 1567777dab0Sopenharmony_ci * 1577777dab0Sopenharmony_ci * @stable ICU 3.8 1587777dab0Sopenharmony_ci */ 1597777dab0Sopenharmony_citypedef enum USetSpanCondition { 1607777dab0Sopenharmony_ci /** 1617777dab0Sopenharmony_ci * Continues a span() while there is no set element at the current position. 1627777dab0Sopenharmony_ci * Increments by one code point at a time. 1637777dab0Sopenharmony_ci * Stops before the first set element (character or string). 1647777dab0Sopenharmony_ci * (For code points only, this is like while contains(current)==false). 1657777dab0Sopenharmony_ci * 1667777dab0Sopenharmony_ci * When span() returns, the substring between where it started and the position 1677777dab0Sopenharmony_ci * it returned consists only of characters that are not in the set, 1687777dab0Sopenharmony_ci * and none of its strings overlap with the span. 1697777dab0Sopenharmony_ci * 1707777dab0Sopenharmony_ci * @stable ICU 3.8 1717777dab0Sopenharmony_ci */ 1727777dab0Sopenharmony_ci USET_SPAN_NOT_CONTAINED = 0, 1737777dab0Sopenharmony_ci /** 1747777dab0Sopenharmony_ci * Spans the longest substring that is a concatenation of set elements (characters or strings). 1757777dab0Sopenharmony_ci * (For characters only, this is like while contains(current)==true). 1767777dab0Sopenharmony_ci * 1777777dab0Sopenharmony_ci * When span() returns, the substring between where it started and the position 1787777dab0Sopenharmony_ci * it returned consists only of set elements (characters or strings) that are in the set. 1797777dab0Sopenharmony_ci * 1807777dab0Sopenharmony_ci * If a set contains strings, then the span will be the longest substring for which there 1817777dab0Sopenharmony_ci * exists at least one non-overlapping concatenation of set elements (characters or strings). 1827777dab0Sopenharmony_ci * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>. 1837777dab0Sopenharmony_ci * (Java/ICU/Perl regex stops at the first match of an OR.) 1847777dab0Sopenharmony_ci * 1857777dab0Sopenharmony_ci * @stable ICU 3.8 1867777dab0Sopenharmony_ci */ 1877777dab0Sopenharmony_ci USET_SPAN_CONTAINED = 1, 1887777dab0Sopenharmony_ci /** 1897777dab0Sopenharmony_ci * Continues a span() while there is a set element at the current position. 1907777dab0Sopenharmony_ci * Increments by the longest matching element at each position. 1917777dab0Sopenharmony_ci * (For characters only, this is like while contains(current)==true). 1927777dab0Sopenharmony_ci * 1937777dab0Sopenharmony_ci * When span() returns, the substring between where it started and the position 1947777dab0Sopenharmony_ci * it returned consists only of set elements (characters or strings) that are in the set. 1957777dab0Sopenharmony_ci * 1967777dab0Sopenharmony_ci * If a set only contains single characters, then this is the same 1977777dab0Sopenharmony_ci * as USET_SPAN_CONTAINED. 1987777dab0Sopenharmony_ci * 1997777dab0Sopenharmony_ci * If a set contains strings, then the span will be the longest substring 2007777dab0Sopenharmony_ci * with a match at each position with the longest single set element (character or string). 2017777dab0Sopenharmony_ci * 2027777dab0Sopenharmony_ci * Use this span condition together with other longest-match algorithms, 2037777dab0Sopenharmony_ci * such as ICU converters (ucnv_getUnicodeSet()). 2047777dab0Sopenharmony_ci * 2057777dab0Sopenharmony_ci * @stable ICU 3.8 2067777dab0Sopenharmony_ci */ 2077777dab0Sopenharmony_ci USET_SPAN_SIMPLE = 2, 2087777dab0Sopenharmony_ci} USetSpanCondition; 2097777dab0Sopenharmony_ci 2107777dab0Sopenharmony_cienum { 2117777dab0Sopenharmony_ci /** 2127777dab0Sopenharmony_ci * Capacity of USerializedSet::staticArray. 2137777dab0Sopenharmony_ci * Enough for any single-code point set. 2147777dab0Sopenharmony_ci * Also provides padding for nice sizeof(USerializedSet). 2157777dab0Sopenharmony_ci * @stable ICU 2.4 2167777dab0Sopenharmony_ci */ 2177777dab0Sopenharmony_ci USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 2187777dab0Sopenharmony_ci}; 2197777dab0Sopenharmony_ci 2207777dab0Sopenharmony_ci/** 2217777dab0Sopenharmony_ci * A serialized form of a Unicode set. Limited manipulations are 2227777dab0Sopenharmony_ci * possible directly on a serialized set. See below. 2237777dab0Sopenharmony_ci * @stable ICU 2.4 2247777dab0Sopenharmony_ci */ 2257777dab0Sopenharmony_citypedef struct USerializedSet { 2267777dab0Sopenharmony_ci /** 2277777dab0Sopenharmony_ci * The serialized Unicode Set. 2287777dab0Sopenharmony_ci * @stable ICU 2.4 2297777dab0Sopenharmony_ci */ 2307777dab0Sopenharmony_ci const uint16_t *array; 2317777dab0Sopenharmony_ci /** 2327777dab0Sopenharmony_ci * The length of the array that contains BMP characters. 2337777dab0Sopenharmony_ci * @stable ICU 2.4 2347777dab0Sopenharmony_ci */ 2357777dab0Sopenharmony_ci int32_t bmpLength; 2367777dab0Sopenharmony_ci /** 2377777dab0Sopenharmony_ci * The total length of the array. 2387777dab0Sopenharmony_ci * @stable ICU 2.4 2397777dab0Sopenharmony_ci */ 2407777dab0Sopenharmony_ci int32_t length; 2417777dab0Sopenharmony_ci /** 2427777dab0Sopenharmony_ci * A small buffer for the array to reduce memory allocations. 2437777dab0Sopenharmony_ci * @stable ICU 2.4 2447777dab0Sopenharmony_ci */ 2457777dab0Sopenharmony_ci uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]; 2467777dab0Sopenharmony_ci} USerializedSet; 2477777dab0Sopenharmony_ci 2487777dab0Sopenharmony_ci/********************************************************************* 2497777dab0Sopenharmony_ci * USet API 2507777dab0Sopenharmony_ci *********************************************************************/ 2517777dab0Sopenharmony_ci 2527777dab0Sopenharmony_ci/** 2537777dab0Sopenharmony_ci * Creates a USet object that contains the range of characters 2547777dab0Sopenharmony_ci * start..end, inclusive. If <code>start > end</code> 2557777dab0Sopenharmony_ci * then an empty set is created (same as using uset_openEmpty()). 2567777dab0Sopenharmony_ci * @param start first character of the range, inclusive 2577777dab0Sopenharmony_ci * @param end last character of the range, inclusive 2587777dab0Sopenharmony_ci * @return a newly created USet. The caller must call uset_close() on 2597777dab0Sopenharmony_ci * it when done. 2607777dab0Sopenharmony_ci * @stable ICU 2.4 2617777dab0Sopenharmony_ci */ 2627777dab0Sopenharmony_ciU_CAPI USet* U_EXPORT2 2637777dab0Sopenharmony_ciuset_open(UChar32 start, UChar32 end); 2647777dab0Sopenharmony_ci 2657777dab0Sopenharmony_ci/** 2667777dab0Sopenharmony_ci * Creates a set from the given pattern. See the UnicodeSet class 2677777dab0Sopenharmony_ci * description for the syntax of the pattern language. 2687777dab0Sopenharmony_ci * @param pattern a string specifying what characters are in the set 2697777dab0Sopenharmony_ci * @param patternLength the length of the pattern, or -1 if null 2707777dab0Sopenharmony_ci * terminated 2717777dab0Sopenharmony_ci * @param ec the error code 2727777dab0Sopenharmony_ci * @stable ICU 2.4 2737777dab0Sopenharmony_ci */ 2747777dab0Sopenharmony_ciU_CAPI USet* U_EXPORT2 2757777dab0Sopenharmony_ciuset_openPattern(const UChar* pattern, int32_t patternLength, 2767777dab0Sopenharmony_ci UErrorCode* ec); 2777777dab0Sopenharmony_ci 2787777dab0Sopenharmony_ci/** 2797777dab0Sopenharmony_ci * Creates a set from the given pattern. See the UnicodeSet class 2807777dab0Sopenharmony_ci * description for the syntax of the pattern language. 2817777dab0Sopenharmony_ci * @param pattern a string specifying what characters are in the set 2827777dab0Sopenharmony_ci * @param patternLength the length of the pattern, or -1 if null 2837777dab0Sopenharmony_ci * terminated 2847777dab0Sopenharmony_ci * @param options bitmask for options to apply to the pattern. 2857777dab0Sopenharmony_ci * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. 2867777dab0Sopenharmony_ci * @param ec the error code 2877777dab0Sopenharmony_ci * @stable ICU 2.4 2887777dab0Sopenharmony_ci */ 2897777dab0Sopenharmony_ciU_CAPI USet* U_EXPORT2 2907777dab0Sopenharmony_ciuset_openPatternOptions(const UChar* pattern, int32_t patternLength, 2917777dab0Sopenharmony_ci uint32_t options, 2927777dab0Sopenharmony_ci UErrorCode* ec); 2937777dab0Sopenharmony_ci 2947777dab0Sopenharmony_ci/** 2957777dab0Sopenharmony_ci * Disposes of the storage used by a USet object. This function should 2967777dab0Sopenharmony_ci * be called exactly once for objects returned by uset_open(). 2977777dab0Sopenharmony_ci * @param set the object to dispose of 2987777dab0Sopenharmony_ci * @stable ICU 2.4 2997777dab0Sopenharmony_ci */ 3007777dab0Sopenharmony_ciU_CAPI void U_EXPORT2 3017777dab0Sopenharmony_ciuset_close(USet* set); 3027777dab0Sopenharmony_ci 3037777dab0Sopenharmony_ci#if U_SHOW_CPLUSPLUS_API 3047777dab0Sopenharmony_ci 3057777dab0Sopenharmony_ciU_NAMESPACE_BEGIN 3067777dab0Sopenharmony_ci 3077777dab0Sopenharmony_ci/** 3087777dab0Sopenharmony_ci * \class LocalUSetPointer 3097777dab0Sopenharmony_ci * "Smart pointer" class, closes a USet via uset_close(). 3107777dab0Sopenharmony_ci * For most methods see the LocalPointerBase base class. 3117777dab0Sopenharmony_ci * 3127777dab0Sopenharmony_ci * @see LocalPointerBase 3137777dab0Sopenharmony_ci * @see LocalPointer 3147777dab0Sopenharmony_ci * @stable ICU 4.4 3157777dab0Sopenharmony_ci */ 3167777dab0Sopenharmony_ciU_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close); 3177777dab0Sopenharmony_ci 3187777dab0Sopenharmony_ciU_NAMESPACE_END 3197777dab0Sopenharmony_ci 3207777dab0Sopenharmony_ci#endif 3217777dab0Sopenharmony_ci 3227777dab0Sopenharmony_ci/** 3237777dab0Sopenharmony_ci * Returns a string representation of this set. If the result of 3247777dab0Sopenharmony_ci * calling this function is passed to a uset_openPattern(), it 3257777dab0Sopenharmony_ci * will produce another set that is equal to this one. 3267777dab0Sopenharmony_ci * @param set the set 3277777dab0Sopenharmony_ci * @param result the string to receive the rules, may be NULL 3287777dab0Sopenharmony_ci * @param resultCapacity the capacity of result, may be 0 if result is NULL 3297777dab0Sopenharmony_ci * @param escapeUnprintable if true then convert unprintable 3307777dab0Sopenharmony_ci * character to their hex escape representations, \\uxxxx or 3317777dab0Sopenharmony_ci * \\Uxxxxxxxx. Unprintable characters are those other than 3327777dab0Sopenharmony_ci * U+000A, U+0020..U+007E. 3337777dab0Sopenharmony_ci * @param ec error code. 3347777dab0Sopenharmony_ci * @return length of string, possibly larger than resultCapacity 3357777dab0Sopenharmony_ci * @stable ICU 2.4 3367777dab0Sopenharmony_ci */ 3377777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2 3387777dab0Sopenharmony_ciuset_toPattern(const USet* set, 3397777dab0Sopenharmony_ci UChar* result, int32_t resultCapacity, 3407777dab0Sopenharmony_ci UBool escapeUnprintable, 3417777dab0Sopenharmony_ci UErrorCode* ec); 3427777dab0Sopenharmony_ci 3437777dab0Sopenharmony_ci/** 3447777dab0Sopenharmony_ci * Adds the given character to the given USet. After this call, 3457777dab0Sopenharmony_ci * uset_contains(set, c) will return true. 3467777dab0Sopenharmony_ci * A frozen set will not be modified. 3477777dab0Sopenharmony_ci * @param set the object to which to add the character 3487777dab0Sopenharmony_ci * @param c the character to add 3497777dab0Sopenharmony_ci * @stable ICU 2.4 3507777dab0Sopenharmony_ci */ 3517777dab0Sopenharmony_ciU_CAPI void U_EXPORT2 3527777dab0Sopenharmony_ciuset_add(USet* set, UChar32 c); 3537777dab0Sopenharmony_ci 3547777dab0Sopenharmony_ci/** 3557777dab0Sopenharmony_ci * Adds the given string to the given USet. After this call, 3567777dab0Sopenharmony_ci * uset_containsString(set, str, strLen) will return true. 3577777dab0Sopenharmony_ci * A frozen set will not be modified. 3587777dab0Sopenharmony_ci * @param set the object to which to add the character 3597777dab0Sopenharmony_ci * @param str the string to add 3607777dab0Sopenharmony_ci * @param strLen the length of the string or -1 if null terminated. 3617777dab0Sopenharmony_ci * @stable ICU 2.4 3627777dab0Sopenharmony_ci */ 3637777dab0Sopenharmony_ciU_CAPI void U_EXPORT2 3647777dab0Sopenharmony_ciuset_addString(USet* set, const UChar* str, int32_t strLen); 3657777dab0Sopenharmony_ci 3667777dab0Sopenharmony_ci/** 3677777dab0Sopenharmony_ci * Removes the given character from the given USet. After this call, 3687777dab0Sopenharmony_ci * uset_contains(set, c) will return false. 3697777dab0Sopenharmony_ci * A frozen set will not be modified. 3707777dab0Sopenharmony_ci * @param set the object from which to remove the character 3717777dab0Sopenharmony_ci * @param c the character to remove 3727777dab0Sopenharmony_ci * @stable ICU 2.4 3737777dab0Sopenharmony_ci */ 3747777dab0Sopenharmony_ciU_CAPI void U_EXPORT2 3757777dab0Sopenharmony_ciuset_remove(USet* set, UChar32 c); 3767777dab0Sopenharmony_ci 3777777dab0Sopenharmony_ci/** 3787777dab0Sopenharmony_ci * Removes the given string to the given USet. After this call, 3797777dab0Sopenharmony_ci * uset_containsString(set, str, strLen) will return false. 3807777dab0Sopenharmony_ci * A frozen set will not be modified. 3817777dab0Sopenharmony_ci * @param set the object to which to add the character 3827777dab0Sopenharmony_ci * @param str the string to remove 3837777dab0Sopenharmony_ci * @param strLen the length of the string or -1 if null terminated. 3847777dab0Sopenharmony_ci * @stable ICU 2.4 3857777dab0Sopenharmony_ci */ 3867777dab0Sopenharmony_ciU_CAPI void U_EXPORT2 3877777dab0Sopenharmony_ciuset_removeString(USet* set, const UChar* str, int32_t strLen); 3887777dab0Sopenharmony_ci 3897777dab0Sopenharmony_ci/** 3907777dab0Sopenharmony_ci * This is equivalent to 3917777dab0Sopenharmony_ci * <code>uset_complementRange(set, 0, 0x10FFFF)</code>. 3927777dab0Sopenharmony_ci * 3937777dab0Sopenharmony_ci * <strong>Note:</strong> This performs a symmetric difference with all code points 3947777dab0Sopenharmony_ci * <em>and thus retains all multicharacter strings</em>. 3957777dab0Sopenharmony_ci * In order to achieve a “code point complement” (all code points minus this set), 3967777dab0Sopenharmony_ci * the easiest is to <code>uset_complement(set); uset_removeAllStrings(set);</code>. 3977777dab0Sopenharmony_ci * 3987777dab0Sopenharmony_ci * A frozen set will not be modified. 3997777dab0Sopenharmony_ci * @param set the set 4007777dab0Sopenharmony_ci * @stable ICU 2.4 4017777dab0Sopenharmony_ci */ 4027777dab0Sopenharmony_ciU_CAPI void U_EXPORT2 4037777dab0Sopenharmony_ciuset_complement(USet* set); 4047777dab0Sopenharmony_ci 4057777dab0Sopenharmony_ci/** 4067777dab0Sopenharmony_ci * Removes all of the elements from this set. This set will be 4077777dab0Sopenharmony_ci * empty after this call returns. 4087777dab0Sopenharmony_ci * A frozen set will not be modified. 4097777dab0Sopenharmony_ci * @param set the set 4107777dab0Sopenharmony_ci * @stable ICU 2.4 4117777dab0Sopenharmony_ci */ 4127777dab0Sopenharmony_ciU_CAPI void U_EXPORT2 4137777dab0Sopenharmony_ciuset_clear(USet* set); 4147777dab0Sopenharmony_ci 4157777dab0Sopenharmony_ci/** 4167777dab0Sopenharmony_ci * Returns true if the given USet contains no characters and no 4177777dab0Sopenharmony_ci * strings. 4187777dab0Sopenharmony_ci * @param set the set 4197777dab0Sopenharmony_ci * @return true if set is empty 4207777dab0Sopenharmony_ci * @stable ICU 2.4 4217777dab0Sopenharmony_ci */ 4227777dab0Sopenharmony_ciU_CAPI UBool U_EXPORT2 4237777dab0Sopenharmony_ciuset_isEmpty(const USet* set); 4247777dab0Sopenharmony_ci 4257777dab0Sopenharmony_ci/** 4267777dab0Sopenharmony_ci * Returns true if the given USet contains the given character. 4277777dab0Sopenharmony_ci * This function works faster with a frozen set. 4287777dab0Sopenharmony_ci * @param set the set 4297777dab0Sopenharmony_ci * @param c The codepoint to check for within the set 4307777dab0Sopenharmony_ci * @return true if set contains c 4317777dab0Sopenharmony_ci * @stable ICU 2.4 4327777dab0Sopenharmony_ci */ 4337777dab0Sopenharmony_ciU_CAPI UBool U_EXPORT2 4347777dab0Sopenharmony_ciuset_contains(const USet* set, UChar32 c); 4357777dab0Sopenharmony_ci 4367777dab0Sopenharmony_ci/** 4377777dab0Sopenharmony_ci * Returns true if the given USet contains the given string. 4387777dab0Sopenharmony_ci * @param set the set 4397777dab0Sopenharmony_ci * @param str the string 4407777dab0Sopenharmony_ci * @param strLen the length of the string or -1 if null terminated. 4417777dab0Sopenharmony_ci * @return true if set contains str 4427777dab0Sopenharmony_ci * @stable ICU 2.4 4437777dab0Sopenharmony_ci */ 4447777dab0Sopenharmony_ciU_CAPI UBool U_EXPORT2 4457777dab0Sopenharmony_ciuset_containsString(const USet* set, const UChar* str, int32_t strLen); 4467777dab0Sopenharmony_ci/** 4477777dab0Sopenharmony_ci * Returns the number of characters and strings contained in this set. 4487777dab0Sopenharmony_ci * The last (uset_getItemCount() - uset_getRangeCount()) items are strings. 4497777dab0Sopenharmony_ci * 4507777dab0Sopenharmony_ci * This is slower than uset_getRangeCount() and uset_getItemCount() because 4517777dab0Sopenharmony_ci * it counts the code points of all ranges. 4527777dab0Sopenharmony_ci * 4537777dab0Sopenharmony_ci * @param set the set 4547777dab0Sopenharmony_ci * @return a non-negative integer counting the characters and strings 4557777dab0Sopenharmony_ci * contained in set 4567777dab0Sopenharmony_ci * @stable ICU 2.4 4577777dab0Sopenharmony_ci * @see uset_getRangeCount 4587777dab0Sopenharmony_ci */ 4597777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2 4607777dab0Sopenharmony_ciuset_size(const USet* set); 4617777dab0Sopenharmony_ci 4627777dab0Sopenharmony_ci/** 4637777dab0Sopenharmony_ci * @param set the set 4647777dab0Sopenharmony_ci * @return the number of ranges in this set. 4657777dab0Sopenharmony_ci * @stable ICU 70 4667777dab0Sopenharmony_ci * @see uset_getItemCount 4677777dab0Sopenharmony_ci * @see uset_getItem 4687777dab0Sopenharmony_ci * @see uset_size 4697777dab0Sopenharmony_ci */ 4707777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2 4717777dab0Sopenharmony_ciuset_getRangeCount(const USet *set); 4727777dab0Sopenharmony_ci 4737777dab0Sopenharmony_ci/** 4747777dab0Sopenharmony_ci * Returns the number of items in this set. An item is either a range 4757777dab0Sopenharmony_ci * of characters or a single multicharacter string. 4767777dab0Sopenharmony_ci * @param set the set 4777777dab0Sopenharmony_ci * @return a non-negative integer counting the character ranges 4787777dab0Sopenharmony_ci * and/or strings contained in set 4797777dab0Sopenharmony_ci * @stable ICU 2.4 4807777dab0Sopenharmony_ci */ 4817777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2 4827777dab0Sopenharmony_ciuset_getItemCount(const USet* set); 4837777dab0Sopenharmony_ci 4847777dab0Sopenharmony_ci/** 4857777dab0Sopenharmony_ci * Returns an item of this set. An item is either a range of 4867777dab0Sopenharmony_ci * characters or a single multicharacter string (which can be the empty string). 4877777dab0Sopenharmony_ci * 4887777dab0Sopenharmony_ci * If <code>itemIndex</code> is less than uset_getRangeCount(), then this function returns 0, 4897777dab0Sopenharmony_ci * and the range is <code>*start</code>..<code>*end</code>. 4907777dab0Sopenharmony_ci * 4917777dab0Sopenharmony_ci * If <code>itemIndex</code> is at least uset_getRangeCount() and less than uset_getItemCount(), then 4927777dab0Sopenharmony_ci * this function copies the string into <code>str[strCapacity]</code> and 4937777dab0Sopenharmony_ci * returns the length of the string (0 for the empty string). 4947777dab0Sopenharmony_ci * 4957777dab0Sopenharmony_ci * If <code>itemIndex</code> is out of range, then this function returns -1. 4967777dab0Sopenharmony_ci * 4977777dab0Sopenharmony_ci * Note that 0 is returned for each range as well as for the empty string. 4987777dab0Sopenharmony_ci * 4997777dab0Sopenharmony_ci * @param set the set 5007777dab0Sopenharmony_ci * @param itemIndex a non-negative integer in the range 0..uset_getItemCount(set)-1 5017777dab0Sopenharmony_ci * @param start pointer to variable to receive first character in range, inclusive; 5027777dab0Sopenharmony_ci * can be NULL for a string item 5037777dab0Sopenharmony_ci * @param end pointer to variable to receive last character in range, inclusive; 5047777dab0Sopenharmony_ci * can be NULL for a string item 5057777dab0Sopenharmony_ci * @param str buffer to receive the string, may be NULL 5067777dab0Sopenharmony_ci * @param strCapacity capacity of str, or 0 if str is NULL 5077777dab0Sopenharmony_ci * @param ec error code; U_INDEX_OUTOFBOUNDS_ERROR if the itemIndex is out of range 5087777dab0Sopenharmony_ci * @return the length of the string (0 or >= 2), or 0 if the item is a range, 5097777dab0Sopenharmony_ci * or -1 if the itemIndex is out of range 5107777dab0Sopenharmony_ci * @stable ICU 2.4 5117777dab0Sopenharmony_ci */ 5127777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2 5137777dab0Sopenharmony_ciuset_getItem(const USet* set, int32_t itemIndex, 5147777dab0Sopenharmony_ci UChar32* start, UChar32* end, 5157777dab0Sopenharmony_ci UChar* str, int32_t strCapacity, 5167777dab0Sopenharmony_ci UErrorCode* ec); 5177777dab0Sopenharmony_ci#endif 518