17777dab0Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
27777dab0Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
37777dab0Sopenharmony_ci/*
47777dab0Sopenharmony_ci*******************************************************************************
57777dab0Sopenharmony_ci*
67777dab0Sopenharmony_ci*   Copyright (C) 2002-2014, International Business Machines
77777dab0Sopenharmony_ci*   Corporation and others.  All Rights Reserved.
87777dab0Sopenharmony_ci*
97777dab0Sopenharmony_ci*******************************************************************************
107777dab0Sopenharmony_ci*   file name:  uset.h
117777dab0Sopenharmony_ci*   encoding:   UTF-8
127777dab0Sopenharmony_ci*   tab size:   8 (not used)
137777dab0Sopenharmony_ci*   indentation:4
147777dab0Sopenharmony_ci*
157777dab0Sopenharmony_ci*   created on: 2002mar07
167777dab0Sopenharmony_ci*   created by: Markus W. Scherer
177777dab0Sopenharmony_ci*
187777dab0Sopenharmony_ci*   C version of UnicodeSet.
197777dab0Sopenharmony_ci*/
207777dab0Sopenharmony_ci
217777dab0Sopenharmony_ci
227777dab0Sopenharmony_ci/**
237777dab0Sopenharmony_ci * \file
247777dab0Sopenharmony_ci * \brief C API: Unicode Set
257777dab0Sopenharmony_ci *
267777dab0Sopenharmony_ci * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
277777dab0Sopenharmony_ci */
287777dab0Sopenharmony_ci
297777dab0Sopenharmony_ci#ifndef __USET_H__
307777dab0Sopenharmony_ci#define __USET_H__
317777dab0Sopenharmony_ci
327777dab0Sopenharmony_ci#include "unicode/utypes.h"
337777dab0Sopenharmony_ci#include "unicode/uchar.h"
347777dab0Sopenharmony_ci
357777dab0Sopenharmony_ci#if U_SHOW_CPLUSPLUS_API
367777dab0Sopenharmony_ci#include "unicode/localpointer.h"
377777dab0Sopenharmony_ci#endif   // U_SHOW_CPLUSPLUS_API
387777dab0Sopenharmony_ci
397777dab0Sopenharmony_ci#ifndef USET_DEFINED
407777dab0Sopenharmony_ci
417777dab0Sopenharmony_ci#ifndef U_IN_DOXYGEN
427777dab0Sopenharmony_ci#define USET_DEFINED
437777dab0Sopenharmony_ci#endif
447777dab0Sopenharmony_ci/**
457777dab0Sopenharmony_ci * USet is the C API type corresponding to C++ class UnicodeSet.
467777dab0Sopenharmony_ci * Use the uset_* API to manipulate.  Create with
477777dab0Sopenharmony_ci * uset_open*, and destroy with uset_close.
487777dab0Sopenharmony_ci * @stable ICU 2.4
497777dab0Sopenharmony_ci */
507777dab0Sopenharmony_citypedef struct USet USet;
517777dab0Sopenharmony_ci#endif
527777dab0Sopenharmony_ci
537777dab0Sopenharmony_ci/**
547777dab0Sopenharmony_ci * Bitmask values to be passed to uset_openPatternOptions() or
557777dab0Sopenharmony_ci * uset_applyPattern() taking an option parameter.
567777dab0Sopenharmony_ci * @stable ICU 2.4
577777dab0Sopenharmony_ci */
587777dab0Sopenharmony_cienum {
597777dab0Sopenharmony_ci    /**
607777dab0Sopenharmony_ci     * Ignore white space within patterns unless quoted or escaped.
617777dab0Sopenharmony_ci     * @stable ICU 2.4
627777dab0Sopenharmony_ci     */
637777dab0Sopenharmony_ci    USET_IGNORE_SPACE = 1,
647777dab0Sopenharmony_ci
657777dab0Sopenharmony_ci    /**
667777dab0Sopenharmony_ci     * Enable case insensitive matching.  E.g., "[ab]" with this flag
677777dab0Sopenharmony_ci     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
687777dab0Sopenharmony_ci     * match all except 'a', 'A', 'b', and 'B'. This performs a full
697777dab0Sopenharmony_ci     * closure over case mappings, e.g. U+017F for s.
707777dab0Sopenharmony_ci     *
717777dab0Sopenharmony_ci     * The resulting set is a superset of the input for the code points but
727777dab0Sopenharmony_ci     * not for the strings.
737777dab0Sopenharmony_ci     * It performs a case mapping closure of the code points and adds
747777dab0Sopenharmony_ci     * full case folding strings for the code points, and reduces strings of
757777dab0Sopenharmony_ci     * the original set to their full case folding equivalents.
767777dab0Sopenharmony_ci     *
777777dab0Sopenharmony_ci     * This is designed for case-insensitive matches, for example
787777dab0Sopenharmony_ci     * in regular expressions. The full code point case closure allows checking of
797777dab0Sopenharmony_ci     * an input character directly against the closure set.
807777dab0Sopenharmony_ci     * Strings are matched by comparing the case-folded form from the closure
817777dab0Sopenharmony_ci     * set with an incremental case folding of the string in question.
827777dab0Sopenharmony_ci     *
837777dab0Sopenharmony_ci     * The closure set will also contain single code points if the original
847777dab0Sopenharmony_ci     * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
857777dab0Sopenharmony_ci     * This is not necessary (that is, redundant) for the above matching method
867777dab0Sopenharmony_ci     * but results in the same closure sets regardless of whether the original
877777dab0Sopenharmony_ci     * set contained the code point or a string.
887777dab0Sopenharmony_ci     *
897777dab0Sopenharmony_ci     * @stable ICU 2.4
907777dab0Sopenharmony_ci     */
917777dab0Sopenharmony_ci    USET_CASE_INSENSITIVE = 2,
927777dab0Sopenharmony_ci
937777dab0Sopenharmony_ci    /**
947777dab0Sopenharmony_ci     * Enable case insensitive matching.  E.g., "[ab]" with this flag
957777dab0Sopenharmony_ci     * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
967777dab0Sopenharmony_ci     * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
977777dab0Sopenharmony_ci     * title-, and uppercase mappings as well as the case folding
987777dab0Sopenharmony_ci     * of each existing element in the set.
997777dab0Sopenharmony_ci     * @stable ICU 3.2
1007777dab0Sopenharmony_ci     */
1017777dab0Sopenharmony_ci    USET_ADD_CASE_MAPPINGS = 4
1027777dab0Sopenharmony_ci};
1037777dab0Sopenharmony_ci
1047777dab0Sopenharmony_ci/**
1057777dab0Sopenharmony_ci * Argument values for whether span() and similar functions continue while
1067777dab0Sopenharmony_ci * the current character is contained vs. not contained in the set.
1077777dab0Sopenharmony_ci *
1087777dab0Sopenharmony_ci * The functionality is straightforward for sets with only single code points,
1097777dab0Sopenharmony_ci * without strings (which is the common case):
1107777dab0Sopenharmony_ci * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same.
1117777dab0Sopenharmony_ci * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED.
1127777dab0Sopenharmony_ci * - span() and spanBack() partition any string the same way when
1137777dab0Sopenharmony_ci *   alternating between span(USET_SPAN_NOT_CONTAINED) and
1147777dab0Sopenharmony_ci *   span(either "contained" condition).
1157777dab0Sopenharmony_ci * - Using a complemented (inverted) set and the opposite span conditions
1167777dab0Sopenharmony_ci *   yields the same results.
1177777dab0Sopenharmony_ci *
1187777dab0Sopenharmony_ci * When a set contains multi-code point strings, then these statements may not
1197777dab0Sopenharmony_ci * be true, depending on the strings in the set (for example, whether they
1207777dab0Sopenharmony_ci * overlap with each other) and the string that is processed.
1217777dab0Sopenharmony_ci * For a set with strings:
1227777dab0Sopenharmony_ci * - The complement of the set contains the opposite set of code points,
1237777dab0Sopenharmony_ci *   but the same set of strings.
1247777dab0Sopenharmony_ci *   Therefore, complementing both the set and the span conditions
1257777dab0Sopenharmony_ci *   may yield different results.
1267777dab0Sopenharmony_ci * - When starting spans at different positions in a string
1277777dab0Sopenharmony_ci *   (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
1287777dab0Sopenharmony_ci *   because a set string may start before the later position.
1297777dab0Sopenharmony_ci * - span(USET_SPAN_SIMPLE) may be shorter than
1307777dab0Sopenharmony_ci *   span(USET_SPAN_CONTAINED) because it will not recursively try
1317777dab0Sopenharmony_ci *   all possible paths.
1327777dab0Sopenharmony_ci *   For example, with a set which contains the three strings "xy", "xya" and "ax",
1337777dab0Sopenharmony_ci *   span("xyax", USET_SPAN_CONTAINED) will return 4 but
1347777dab0Sopenharmony_ci *   span("xyax", USET_SPAN_SIMPLE) will return 3.
1357777dab0Sopenharmony_ci *   span(USET_SPAN_SIMPLE) will never be longer than
1367777dab0Sopenharmony_ci *   span(USET_SPAN_CONTAINED).
1377777dab0Sopenharmony_ci * - With either "contained" condition, span() and spanBack() may partition
1387777dab0Sopenharmony_ci *   a string in different ways.
1397777dab0Sopenharmony_ci *   For example, with a set which contains the two strings "ab" and "ba",
1407777dab0Sopenharmony_ci *   and when processing the string "aba",
1417777dab0Sopenharmony_ci *   span() will yield contained/not-contained boundaries of { 0, 2, 3 }
1427777dab0Sopenharmony_ci *   while spanBack() will yield boundaries of { 0, 1, 3 }.
1437777dab0Sopenharmony_ci *
1447777dab0Sopenharmony_ci * Note: If it is important to get the same boundaries whether iterating forward
1457777dab0Sopenharmony_ci * or backward through a string, then either only span() should be used and
1467777dab0Sopenharmony_ci * the boundaries cached for backward operation, or an ICU BreakIterator
1477777dab0Sopenharmony_ci * could be used.
1487777dab0Sopenharmony_ci *
1497777dab0Sopenharmony_ci * Note: Unpaired surrogates are treated like surrogate code points.
1507777dab0Sopenharmony_ci * Similarly, set strings match only on code point boundaries,
1517777dab0Sopenharmony_ci * never in the middle of a surrogate pair.
1527777dab0Sopenharmony_ci * Illegal UTF-8 sequences are treated like U+FFFD.
1537777dab0Sopenharmony_ci * When processing UTF-8 strings, malformed set strings
1547777dab0Sopenharmony_ci * (strings with unpaired surrogates which cannot be converted to UTF-8)
1557777dab0Sopenharmony_ci * are ignored.
1567777dab0Sopenharmony_ci *
1577777dab0Sopenharmony_ci * @stable ICU 3.8
1587777dab0Sopenharmony_ci */
1597777dab0Sopenharmony_citypedef enum USetSpanCondition {
1607777dab0Sopenharmony_ci    /**
1617777dab0Sopenharmony_ci     * Continues a span() while there is no set element at the current position.
1627777dab0Sopenharmony_ci     * Increments by one code point at a time.
1637777dab0Sopenharmony_ci     * Stops before the first set element (character or string).
1647777dab0Sopenharmony_ci     * (For code points only, this is like while contains(current)==false).
1657777dab0Sopenharmony_ci     *
1667777dab0Sopenharmony_ci     * When span() returns, the substring between where it started and the position
1677777dab0Sopenharmony_ci     * it returned consists only of characters that are not in the set,
1687777dab0Sopenharmony_ci     * and none of its strings overlap with the span.
1697777dab0Sopenharmony_ci     *
1707777dab0Sopenharmony_ci     * @stable ICU 3.8
1717777dab0Sopenharmony_ci     */
1727777dab0Sopenharmony_ci    USET_SPAN_NOT_CONTAINED = 0,
1737777dab0Sopenharmony_ci    /**
1747777dab0Sopenharmony_ci     * Spans the longest substring that is a concatenation of set elements (characters or strings).
1757777dab0Sopenharmony_ci     * (For characters only, this is like while contains(current)==true).
1767777dab0Sopenharmony_ci     *
1777777dab0Sopenharmony_ci     * When span() returns, the substring between where it started and the position
1787777dab0Sopenharmony_ci     * it returned consists only of set elements (characters or strings) that are in the set.
1797777dab0Sopenharmony_ci     *
1807777dab0Sopenharmony_ci     * If a set contains strings, then the span will be the longest substring for which there
1817777dab0Sopenharmony_ci     * exists at least one non-overlapping concatenation of set elements (characters or strings).
1827777dab0Sopenharmony_ci     * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>.
1837777dab0Sopenharmony_ci     * (Java/ICU/Perl regex stops at the first match of an OR.)
1847777dab0Sopenharmony_ci     *
1857777dab0Sopenharmony_ci     * @stable ICU 3.8
1867777dab0Sopenharmony_ci     */
1877777dab0Sopenharmony_ci    USET_SPAN_CONTAINED = 1,
1887777dab0Sopenharmony_ci    /**
1897777dab0Sopenharmony_ci     * Continues a span() while there is a set element at the current position.
1907777dab0Sopenharmony_ci     * Increments by the longest matching element at each position.
1917777dab0Sopenharmony_ci     * (For characters only, this is like while contains(current)==true).
1927777dab0Sopenharmony_ci     *
1937777dab0Sopenharmony_ci     * When span() returns, the substring between where it started and the position
1947777dab0Sopenharmony_ci     * it returned consists only of set elements (characters or strings) that are in the set.
1957777dab0Sopenharmony_ci     *
1967777dab0Sopenharmony_ci     * If a set only contains single characters, then this is the same
1977777dab0Sopenharmony_ci     * as USET_SPAN_CONTAINED.
1987777dab0Sopenharmony_ci     *
1997777dab0Sopenharmony_ci     * If a set contains strings, then the span will be the longest substring
2007777dab0Sopenharmony_ci     * with a match at each position with the longest single set element (character or string).
2017777dab0Sopenharmony_ci     *
2027777dab0Sopenharmony_ci     * Use this span condition together with other longest-match algorithms,
2037777dab0Sopenharmony_ci     * such as ICU converters (ucnv_getUnicodeSet()).
2047777dab0Sopenharmony_ci     *
2057777dab0Sopenharmony_ci     * @stable ICU 3.8
2067777dab0Sopenharmony_ci     */
2077777dab0Sopenharmony_ci    USET_SPAN_SIMPLE = 2,
2087777dab0Sopenharmony_ci} USetSpanCondition;
2097777dab0Sopenharmony_ci
2107777dab0Sopenharmony_cienum {
2117777dab0Sopenharmony_ci    /**
2127777dab0Sopenharmony_ci     * Capacity of USerializedSet::staticArray.
2137777dab0Sopenharmony_ci     * Enough for any single-code point set.
2147777dab0Sopenharmony_ci     * Also provides padding for nice sizeof(USerializedSet).
2157777dab0Sopenharmony_ci     * @stable ICU 2.4
2167777dab0Sopenharmony_ci     */
2177777dab0Sopenharmony_ci    USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
2187777dab0Sopenharmony_ci};
2197777dab0Sopenharmony_ci
2207777dab0Sopenharmony_ci/**
2217777dab0Sopenharmony_ci * A serialized form of a Unicode set.  Limited manipulations are
2227777dab0Sopenharmony_ci * possible directly on a serialized set.  See below.
2237777dab0Sopenharmony_ci * @stable ICU 2.4
2247777dab0Sopenharmony_ci */
2257777dab0Sopenharmony_citypedef struct USerializedSet {
2267777dab0Sopenharmony_ci    /**
2277777dab0Sopenharmony_ci     * The serialized Unicode Set.
2287777dab0Sopenharmony_ci     * @stable ICU 2.4
2297777dab0Sopenharmony_ci     */
2307777dab0Sopenharmony_ci    const uint16_t *array;
2317777dab0Sopenharmony_ci    /**
2327777dab0Sopenharmony_ci     * The length of the array that contains BMP characters.
2337777dab0Sopenharmony_ci     * @stable ICU 2.4
2347777dab0Sopenharmony_ci     */
2357777dab0Sopenharmony_ci    int32_t bmpLength;
2367777dab0Sopenharmony_ci    /**
2377777dab0Sopenharmony_ci     * The total length of the array.
2387777dab0Sopenharmony_ci     * @stable ICU 2.4
2397777dab0Sopenharmony_ci     */
2407777dab0Sopenharmony_ci    int32_t length;
2417777dab0Sopenharmony_ci    /**
2427777dab0Sopenharmony_ci     * A small buffer for the array to reduce memory allocations.
2437777dab0Sopenharmony_ci     * @stable ICU 2.4
2447777dab0Sopenharmony_ci     */
2457777dab0Sopenharmony_ci    uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
2467777dab0Sopenharmony_ci} USerializedSet;
2477777dab0Sopenharmony_ci
2487777dab0Sopenharmony_ci/*********************************************************************
2497777dab0Sopenharmony_ci * USet API
2507777dab0Sopenharmony_ci *********************************************************************/
2517777dab0Sopenharmony_ci
2527777dab0Sopenharmony_ci/**
2537777dab0Sopenharmony_ci * Creates a USet object that contains the range of characters
2547777dab0Sopenharmony_ci * start..end, inclusive.  If <code>start > end</code>
2557777dab0Sopenharmony_ci * then an empty set is created (same as using uset_openEmpty()).
2567777dab0Sopenharmony_ci * @param start first character of the range, inclusive
2577777dab0Sopenharmony_ci * @param end last character of the range, inclusive
2587777dab0Sopenharmony_ci * @return a newly created USet.  The caller must call uset_close() on
2597777dab0Sopenharmony_ci * it when done.
2607777dab0Sopenharmony_ci * @stable ICU 2.4
2617777dab0Sopenharmony_ci */
2627777dab0Sopenharmony_ciU_CAPI USet* U_EXPORT2
2637777dab0Sopenharmony_ciuset_open(UChar32 start, UChar32 end);
2647777dab0Sopenharmony_ci
2657777dab0Sopenharmony_ci/**
2667777dab0Sopenharmony_ci * Creates a set from the given pattern.  See the UnicodeSet class
2677777dab0Sopenharmony_ci * description for the syntax of the pattern language.
2687777dab0Sopenharmony_ci * @param pattern a string specifying what characters are in the set
2697777dab0Sopenharmony_ci * @param patternLength the length of the pattern, or -1 if null
2707777dab0Sopenharmony_ci * terminated
2717777dab0Sopenharmony_ci * @param ec the error code
2727777dab0Sopenharmony_ci * @stable ICU 2.4
2737777dab0Sopenharmony_ci */
2747777dab0Sopenharmony_ciU_CAPI USet* U_EXPORT2
2757777dab0Sopenharmony_ciuset_openPattern(const UChar* pattern, int32_t patternLength,
2767777dab0Sopenharmony_ci                 UErrorCode* ec);
2777777dab0Sopenharmony_ci
2787777dab0Sopenharmony_ci/**
2797777dab0Sopenharmony_ci * Creates a set from the given pattern.  See the UnicodeSet class
2807777dab0Sopenharmony_ci * description for the syntax of the pattern language.
2817777dab0Sopenharmony_ci * @param pattern a string specifying what characters are in the set
2827777dab0Sopenharmony_ci * @param patternLength the length of the pattern, or -1 if null
2837777dab0Sopenharmony_ci * terminated
2847777dab0Sopenharmony_ci * @param options bitmask for options to apply to the pattern.
2857777dab0Sopenharmony_ci * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
2867777dab0Sopenharmony_ci * @param ec the error code
2877777dab0Sopenharmony_ci * @stable ICU 2.4
2887777dab0Sopenharmony_ci */
2897777dab0Sopenharmony_ciU_CAPI USet* U_EXPORT2
2907777dab0Sopenharmony_ciuset_openPatternOptions(const UChar* pattern, int32_t patternLength,
2917777dab0Sopenharmony_ci                 uint32_t options,
2927777dab0Sopenharmony_ci                 UErrorCode* ec);
2937777dab0Sopenharmony_ci
2947777dab0Sopenharmony_ci/**
2957777dab0Sopenharmony_ci * Disposes of the storage used by a USet object.  This function should
2967777dab0Sopenharmony_ci * be called exactly once for objects returned by uset_open().
2977777dab0Sopenharmony_ci * @param set the object to dispose of
2987777dab0Sopenharmony_ci * @stable ICU 2.4
2997777dab0Sopenharmony_ci */
3007777dab0Sopenharmony_ciU_CAPI void U_EXPORT2
3017777dab0Sopenharmony_ciuset_close(USet* set);
3027777dab0Sopenharmony_ci
3037777dab0Sopenharmony_ci#if U_SHOW_CPLUSPLUS_API
3047777dab0Sopenharmony_ci
3057777dab0Sopenharmony_ciU_NAMESPACE_BEGIN
3067777dab0Sopenharmony_ci
3077777dab0Sopenharmony_ci/**
3087777dab0Sopenharmony_ci * \class LocalUSetPointer
3097777dab0Sopenharmony_ci * "Smart pointer" class, closes a USet via uset_close().
3107777dab0Sopenharmony_ci * For most methods see the LocalPointerBase base class.
3117777dab0Sopenharmony_ci *
3127777dab0Sopenharmony_ci * @see LocalPointerBase
3137777dab0Sopenharmony_ci * @see LocalPointer
3147777dab0Sopenharmony_ci * @stable ICU 4.4
3157777dab0Sopenharmony_ci */
3167777dab0Sopenharmony_ciU_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);
3177777dab0Sopenharmony_ci
3187777dab0Sopenharmony_ciU_NAMESPACE_END
3197777dab0Sopenharmony_ci
3207777dab0Sopenharmony_ci#endif
3217777dab0Sopenharmony_ci
3227777dab0Sopenharmony_ci/**
3237777dab0Sopenharmony_ci * Returns a string representation of this set.  If the result of
3247777dab0Sopenharmony_ci * calling this function is passed to a uset_openPattern(), it
3257777dab0Sopenharmony_ci * will produce another set that is equal to this one.
3267777dab0Sopenharmony_ci * @param set the set
3277777dab0Sopenharmony_ci * @param result the string to receive the rules, may be NULL
3287777dab0Sopenharmony_ci * @param resultCapacity the capacity of result, may be 0 if result is NULL
3297777dab0Sopenharmony_ci * @param escapeUnprintable if true then convert unprintable
3307777dab0Sopenharmony_ci * character to their hex escape representations, \\uxxxx or
3317777dab0Sopenharmony_ci * \\Uxxxxxxxx.  Unprintable characters are those other than
3327777dab0Sopenharmony_ci * U+000A, U+0020..U+007E.
3337777dab0Sopenharmony_ci * @param ec error code.
3347777dab0Sopenharmony_ci * @return length of string, possibly larger than resultCapacity
3357777dab0Sopenharmony_ci * @stable ICU 2.4
3367777dab0Sopenharmony_ci */
3377777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
3387777dab0Sopenharmony_ciuset_toPattern(const USet* set,
3397777dab0Sopenharmony_ci               UChar* result, int32_t resultCapacity,
3407777dab0Sopenharmony_ci               UBool escapeUnprintable,
3417777dab0Sopenharmony_ci               UErrorCode* ec);
3427777dab0Sopenharmony_ci
3437777dab0Sopenharmony_ci/**
3447777dab0Sopenharmony_ci * Adds the given character to the given USet.  After this call,
3457777dab0Sopenharmony_ci * uset_contains(set, c) will return true.
3467777dab0Sopenharmony_ci * A frozen set will not be modified.
3477777dab0Sopenharmony_ci * @param set the object to which to add the character
3487777dab0Sopenharmony_ci * @param c the character to add
3497777dab0Sopenharmony_ci * @stable ICU 2.4
3507777dab0Sopenharmony_ci */
3517777dab0Sopenharmony_ciU_CAPI void U_EXPORT2
3527777dab0Sopenharmony_ciuset_add(USet* set, UChar32 c);
3537777dab0Sopenharmony_ci
3547777dab0Sopenharmony_ci/**
3557777dab0Sopenharmony_ci * Adds the given string to the given USet.  After this call,
3567777dab0Sopenharmony_ci * uset_containsString(set, str, strLen) will return true.
3577777dab0Sopenharmony_ci * A frozen set will not be modified.
3587777dab0Sopenharmony_ci * @param set the object to which to add the character
3597777dab0Sopenharmony_ci * @param str the string to add
3607777dab0Sopenharmony_ci * @param strLen the length of the string or -1 if null terminated.
3617777dab0Sopenharmony_ci * @stable ICU 2.4
3627777dab0Sopenharmony_ci */
3637777dab0Sopenharmony_ciU_CAPI void U_EXPORT2
3647777dab0Sopenharmony_ciuset_addString(USet* set, const UChar* str, int32_t strLen);
3657777dab0Sopenharmony_ci
3667777dab0Sopenharmony_ci/**
3677777dab0Sopenharmony_ci * Removes the given character from the given USet.  After this call,
3687777dab0Sopenharmony_ci * uset_contains(set, c) will return false.
3697777dab0Sopenharmony_ci * A frozen set will not be modified.
3707777dab0Sopenharmony_ci * @param set the object from which to remove the character
3717777dab0Sopenharmony_ci * @param c the character to remove
3727777dab0Sopenharmony_ci * @stable ICU 2.4
3737777dab0Sopenharmony_ci */
3747777dab0Sopenharmony_ciU_CAPI void U_EXPORT2
3757777dab0Sopenharmony_ciuset_remove(USet* set, UChar32 c);
3767777dab0Sopenharmony_ci
3777777dab0Sopenharmony_ci/**
3787777dab0Sopenharmony_ci * Removes the given string to the given USet.  After this call,
3797777dab0Sopenharmony_ci * uset_containsString(set, str, strLen) will return false.
3807777dab0Sopenharmony_ci * A frozen set will not be modified.
3817777dab0Sopenharmony_ci * @param set the object to which to add the character
3827777dab0Sopenharmony_ci * @param str the string to remove
3837777dab0Sopenharmony_ci * @param strLen the length of the string or -1 if null terminated.
3847777dab0Sopenharmony_ci * @stable ICU 2.4
3857777dab0Sopenharmony_ci */
3867777dab0Sopenharmony_ciU_CAPI void U_EXPORT2
3877777dab0Sopenharmony_ciuset_removeString(USet* set, const UChar* str, int32_t strLen);
3887777dab0Sopenharmony_ci
3897777dab0Sopenharmony_ci/**
3907777dab0Sopenharmony_ci * This is equivalent to
3917777dab0Sopenharmony_ci * <code>uset_complementRange(set, 0, 0x10FFFF)</code>.
3927777dab0Sopenharmony_ci *
3937777dab0Sopenharmony_ci * <strong>Note:</strong> This performs a symmetric difference with all code points
3947777dab0Sopenharmony_ci * <em>and thus retains all multicharacter strings</em>.
3957777dab0Sopenharmony_ci * In order to achieve a “code point complement” (all code points minus this set),
3967777dab0Sopenharmony_ci * the easiest is to <code>uset_complement(set); uset_removeAllStrings(set);</code>.
3977777dab0Sopenharmony_ci *
3987777dab0Sopenharmony_ci * A frozen set will not be modified.
3997777dab0Sopenharmony_ci * @param set the set
4007777dab0Sopenharmony_ci * @stable ICU 2.4
4017777dab0Sopenharmony_ci */
4027777dab0Sopenharmony_ciU_CAPI void U_EXPORT2
4037777dab0Sopenharmony_ciuset_complement(USet* set);
4047777dab0Sopenharmony_ci
4057777dab0Sopenharmony_ci/**
4067777dab0Sopenharmony_ci * Removes all of the elements from this set.  This set will be
4077777dab0Sopenharmony_ci * empty after this call returns.
4087777dab0Sopenharmony_ci * A frozen set will not be modified.
4097777dab0Sopenharmony_ci * @param set the set
4107777dab0Sopenharmony_ci * @stable ICU 2.4
4117777dab0Sopenharmony_ci */
4127777dab0Sopenharmony_ciU_CAPI void U_EXPORT2
4137777dab0Sopenharmony_ciuset_clear(USet* set);
4147777dab0Sopenharmony_ci
4157777dab0Sopenharmony_ci/**
4167777dab0Sopenharmony_ci * Returns true if the given USet contains no characters and no
4177777dab0Sopenharmony_ci * strings.
4187777dab0Sopenharmony_ci * @param set the set
4197777dab0Sopenharmony_ci * @return true if set is empty
4207777dab0Sopenharmony_ci * @stable ICU 2.4
4217777dab0Sopenharmony_ci */
4227777dab0Sopenharmony_ciU_CAPI UBool U_EXPORT2
4237777dab0Sopenharmony_ciuset_isEmpty(const USet* set);
4247777dab0Sopenharmony_ci
4257777dab0Sopenharmony_ci/**
4267777dab0Sopenharmony_ci * Returns true if the given USet contains the given character.
4277777dab0Sopenharmony_ci * This function works faster with a frozen set.
4287777dab0Sopenharmony_ci * @param set the set
4297777dab0Sopenharmony_ci * @param c The codepoint to check for within the set
4307777dab0Sopenharmony_ci * @return true if set contains c
4317777dab0Sopenharmony_ci * @stable ICU 2.4
4327777dab0Sopenharmony_ci */
4337777dab0Sopenharmony_ciU_CAPI UBool U_EXPORT2
4347777dab0Sopenharmony_ciuset_contains(const USet* set, UChar32 c);
4357777dab0Sopenharmony_ci
4367777dab0Sopenharmony_ci/**
4377777dab0Sopenharmony_ci * Returns true if the given USet contains the given string.
4387777dab0Sopenharmony_ci * @param set the set
4397777dab0Sopenharmony_ci * @param str the string
4407777dab0Sopenharmony_ci * @param strLen the length of the string or -1 if null terminated.
4417777dab0Sopenharmony_ci * @return true if set contains str
4427777dab0Sopenharmony_ci * @stable ICU 2.4
4437777dab0Sopenharmony_ci */
4447777dab0Sopenharmony_ciU_CAPI UBool U_EXPORT2
4457777dab0Sopenharmony_ciuset_containsString(const USet* set, const UChar* str, int32_t strLen);
4467777dab0Sopenharmony_ci/**
4477777dab0Sopenharmony_ci * Returns the number of characters and strings contained in this set.
4487777dab0Sopenharmony_ci * The last (uset_getItemCount() - uset_getRangeCount()) items are strings.
4497777dab0Sopenharmony_ci *
4507777dab0Sopenharmony_ci * This is slower than uset_getRangeCount() and uset_getItemCount() because
4517777dab0Sopenharmony_ci * it counts the code points of all ranges.
4527777dab0Sopenharmony_ci *
4537777dab0Sopenharmony_ci * @param set the set
4547777dab0Sopenharmony_ci * @return a non-negative integer counting the characters and strings
4557777dab0Sopenharmony_ci * contained in set
4567777dab0Sopenharmony_ci * @stable ICU 2.4
4577777dab0Sopenharmony_ci * @see uset_getRangeCount
4587777dab0Sopenharmony_ci */
4597777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
4607777dab0Sopenharmony_ciuset_size(const USet* set);
4617777dab0Sopenharmony_ci
4627777dab0Sopenharmony_ci/**
4637777dab0Sopenharmony_ci * @param set the set
4647777dab0Sopenharmony_ci * @return the number of ranges in this set.
4657777dab0Sopenharmony_ci * @stable ICU 70
4667777dab0Sopenharmony_ci * @see uset_getItemCount
4677777dab0Sopenharmony_ci * @see uset_getItem
4687777dab0Sopenharmony_ci * @see uset_size
4697777dab0Sopenharmony_ci */
4707777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
4717777dab0Sopenharmony_ciuset_getRangeCount(const USet *set);
4727777dab0Sopenharmony_ci
4737777dab0Sopenharmony_ci/**
4747777dab0Sopenharmony_ci * Returns the number of items in this set.  An item is either a range
4757777dab0Sopenharmony_ci * of characters or a single multicharacter string.
4767777dab0Sopenharmony_ci * @param set the set
4777777dab0Sopenharmony_ci * @return a non-negative integer counting the character ranges
4787777dab0Sopenharmony_ci * and/or strings contained in set
4797777dab0Sopenharmony_ci * @stable ICU 2.4
4807777dab0Sopenharmony_ci */
4817777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
4827777dab0Sopenharmony_ciuset_getItemCount(const USet* set);
4837777dab0Sopenharmony_ci
4847777dab0Sopenharmony_ci/**
4857777dab0Sopenharmony_ci * Returns an item of this set.  An item is either a range of
4867777dab0Sopenharmony_ci * characters or a single multicharacter string (which can be the empty string).
4877777dab0Sopenharmony_ci *
4887777dab0Sopenharmony_ci * If <code>itemIndex</code> is less than uset_getRangeCount(), then this function returns 0,
4897777dab0Sopenharmony_ci * and the range is <code>*start</code>..<code>*end</code>.
4907777dab0Sopenharmony_ci *
4917777dab0Sopenharmony_ci * If <code>itemIndex</code> is at least uset_getRangeCount() and less than uset_getItemCount(), then
4927777dab0Sopenharmony_ci * this function copies the string into <code>str[strCapacity]</code> and
4937777dab0Sopenharmony_ci * returns the length of the string (0 for the empty string).
4947777dab0Sopenharmony_ci *
4957777dab0Sopenharmony_ci * If <code>itemIndex</code> is out of range, then this function returns -1.
4967777dab0Sopenharmony_ci *
4977777dab0Sopenharmony_ci * Note that 0 is returned for each range as well as for the empty string.
4987777dab0Sopenharmony_ci *
4997777dab0Sopenharmony_ci * @param set the set
5007777dab0Sopenharmony_ci * @param itemIndex a non-negative integer in the range 0..uset_getItemCount(set)-1
5017777dab0Sopenharmony_ci * @param start pointer to variable to receive first character in range, inclusive;
5027777dab0Sopenharmony_ci *              can be NULL for a string item
5037777dab0Sopenharmony_ci * @param end pointer to variable to receive last character in range, inclusive;
5047777dab0Sopenharmony_ci *            can be NULL for a string item
5057777dab0Sopenharmony_ci * @param str buffer to receive the string, may be NULL
5067777dab0Sopenharmony_ci * @param strCapacity capacity of str, or 0 if str is NULL
5077777dab0Sopenharmony_ci * @param ec error code; U_INDEX_OUTOFBOUNDS_ERROR if the itemIndex is out of range
5087777dab0Sopenharmony_ci * @return the length of the string (0 or >= 2), or 0 if the item is a range,
5097777dab0Sopenharmony_ci *         or -1 if the itemIndex is out of range
5107777dab0Sopenharmony_ci * @stable ICU 2.4
5117777dab0Sopenharmony_ci */
5127777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
5137777dab0Sopenharmony_ciuset_getItem(const USet* set, int32_t itemIndex,
5147777dab0Sopenharmony_ci             UChar32* start, UChar32* end,
5157777dab0Sopenharmony_ci             UChar* str, int32_t strCapacity,
5167777dab0Sopenharmony_ci             UErrorCode* ec);
5177777dab0Sopenharmony_ci#endif
518