17777dab0Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
27777dab0Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
37777dab0Sopenharmony_ci/*
47777dab0Sopenharmony_ci******************************************************************************
57777dab0Sopenharmony_ci* Copyright (C) 1996-2015, International Business Machines Corporation and others.
67777dab0Sopenharmony_ci* All Rights Reserved.
77777dab0Sopenharmony_ci******************************************************************************
87777dab0Sopenharmony_ci*/
97777dab0Sopenharmony_ci
107777dab0Sopenharmony_ci#ifndef UBRK_H
117777dab0Sopenharmony_ci#define UBRK_H
127777dab0Sopenharmony_ci
137777dab0Sopenharmony_ci#include "unicode/utypes.h"
147777dab0Sopenharmony_ci#include "unicode/uloc.h"
157777dab0Sopenharmony_ci
167777dab0Sopenharmony_ci#if U_SHOW_CPLUSPLUS_API
177777dab0Sopenharmony_ci#include "unicode/localpointer.h"
187777dab0Sopenharmony_ci#endif   // U_SHOW_CPLUSPLUS_API
197777dab0Sopenharmony_ci
207777dab0Sopenharmony_ci/**
217777dab0Sopenharmony_ci * A text-break iterator.
227777dab0Sopenharmony_ci *  For usage in C programs.
237777dab0Sopenharmony_ci */
247777dab0Sopenharmony_ci#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
257777dab0Sopenharmony_ci#   define UBRK_TYPEDEF_UBREAK_ITERATOR
267777dab0Sopenharmony_ci    /**
277777dab0Sopenharmony_ci     *  Opaque type representing an ICU Break iterator object.
287777dab0Sopenharmony_ci     *  @stable ICU 2.0
297777dab0Sopenharmony_ci     */
307777dab0Sopenharmony_ci    typedef struct UBreakIterator UBreakIterator;
317777dab0Sopenharmony_ci#endif
327777dab0Sopenharmony_ci
337777dab0Sopenharmony_ci#include "unicode/parseerr.h"
347777dab0Sopenharmony_ci
357777dab0Sopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION
367777dab0Sopenharmony_ci/**
377777dab0Sopenharmony_ci * \file
387777dab0Sopenharmony_ci * \brief C API: BreakIterator
397777dab0Sopenharmony_ci *
407777dab0Sopenharmony_ci * <h2> BreakIterator C API </h2>
417777dab0Sopenharmony_ci *
427777dab0Sopenharmony_ci * The BreakIterator C API defines  methods for finding the location
437777dab0Sopenharmony_ci * of boundaries in text. Pointer to a UBreakIterator maintain a
447777dab0Sopenharmony_ci * current position and scan over text returning the index of characters
457777dab0Sopenharmony_ci * where boundaries occur.
467777dab0Sopenharmony_ci * <p>
477777dab0Sopenharmony_ci * Line boundary analysis determines where a text string can be broken
487777dab0Sopenharmony_ci * when line-wrapping. The mechanism correctly handles punctuation and
497777dab0Sopenharmony_ci * hyphenated words.
507777dab0Sopenharmony_ci * <p>
517777dab0Sopenharmony_ci * Note: The locale keyword "lb" can be used to modify line break
527777dab0Sopenharmony_ci * behavior according to the CSS level 3 line-break options, see
537777dab0Sopenharmony_ci * <http://dev.w3.org/csswg/css-text/#line-breaking>. For example:
547777dab0Sopenharmony_ci * "ja@lb=strict", "zh@lb=loose".
557777dab0Sopenharmony_ci * <p>
567777dab0Sopenharmony_ci * Sentence boundary analysis allows selection with correct
577777dab0Sopenharmony_ci * interpretation of periods within numbers and abbreviations, and
587777dab0Sopenharmony_ci * trailing punctuation marks such as quotation marks and parentheses.
597777dab0Sopenharmony_ci * <p>
607777dab0Sopenharmony_ci * Note: The locale keyword "ss" can be used to enable use of
617777dab0Sopenharmony_ci * segmentation suppression data (preventing breaks in English after
627777dab0Sopenharmony_ci * abbreviations such as "Mr." or "Est.", for example), as follows:
637777dab0Sopenharmony_ci * "en@ss=standard".
647777dab0Sopenharmony_ci * <p>
657777dab0Sopenharmony_ci * Word boundary analysis is used by search and replace functions, as
667777dab0Sopenharmony_ci * well as within text editing applications that allow the user to
677777dab0Sopenharmony_ci * select words with a double click. Word selection provides correct
687777dab0Sopenharmony_ci * interpretation of punctuation marks within and following
697777dab0Sopenharmony_ci * words. Characters that are not part of a word, such as symbols or
707777dab0Sopenharmony_ci * punctuation marks, have word-breaks on both sides.
717777dab0Sopenharmony_ci * <p>
727777dab0Sopenharmony_ci * Character boundary analysis identifies the boundaries of
737777dab0Sopenharmony_ci * "Extended Grapheme Clusters", which are groupings of codepoints
747777dab0Sopenharmony_ci * that should be treated as character-like units for many text operations.
757777dab0Sopenharmony_ci * Please see Unicode Standard Annex #29, Unicode Text Segmentation,
767777dab0Sopenharmony_ci * http://www.unicode.org/reports/tr29/ for additional information
777777dab0Sopenharmony_ci * on grapheme clusters and guidelines on their use.
787777dab0Sopenharmony_ci * <p>
797777dab0Sopenharmony_ci * Title boundary analysis locates all positions,
807777dab0Sopenharmony_ci * typically starts of words, that should be set to Title Case
817777dab0Sopenharmony_ci * when title casing the text.
827777dab0Sopenharmony_ci * <p>
837777dab0Sopenharmony_ci * The text boundary positions are found according to the rules
847777dab0Sopenharmony_ci * described in Unicode Standard Annex #29, Text Boundaries, and
857777dab0Sopenharmony_ci * Unicode Standard Annex #14, Line Breaking Properties.  These
867777dab0Sopenharmony_ci * are available at http://www.unicode.org/reports/tr14/ and
877777dab0Sopenharmony_ci * http://www.unicode.org/reports/tr29/.
887777dab0Sopenharmony_ci * <p>
897777dab0Sopenharmony_ci * In addition to the plain C API defined in this header file, an
907777dab0Sopenharmony_ci * object oriented C++ API with equivalent functionality is defined in the
917777dab0Sopenharmony_ci * file brkiter.h.
927777dab0Sopenharmony_ci * <p>
937777dab0Sopenharmony_ci * Code snippets illustrating the use of the Break Iterator APIs
947777dab0Sopenharmony_ci * are available in the ICU User Guide,
957777dab0Sopenharmony_ci * https://unicode-org.github.io/icu/userguide/boundaryanalysis/
967777dab0Sopenharmony_ci * and in the sample program icu/source/samples/break/break.cpp
977777dab0Sopenharmony_ci */
987777dab0Sopenharmony_ci
997777dab0Sopenharmony_ci/** The possible types of text boundaries.  @stable ICU 2.0 */
1007777dab0Sopenharmony_citypedef enum UBreakIteratorType {
1017777dab0Sopenharmony_ci  /** Character breaks  @stable ICU 2.0 */
1027777dab0Sopenharmony_ci  UBRK_CHARACTER = 0,
1037777dab0Sopenharmony_ci  /** Word breaks @stable ICU 2.0 */
1047777dab0Sopenharmony_ci  UBRK_WORD = 1,
1057777dab0Sopenharmony_ci  /** Line breaks @stable ICU 2.0 */
1067777dab0Sopenharmony_ci  UBRK_LINE = 2,
1077777dab0Sopenharmony_ci  /** Sentence breaks @stable ICU 2.0 */
1087777dab0Sopenharmony_ci  UBRK_SENTENCE = 3,
1097777dab0Sopenharmony_ci} UBreakIteratorType;
1107777dab0Sopenharmony_ci
1117777dab0Sopenharmony_ci/** Value indicating all text boundaries have been returned.
1127777dab0Sopenharmony_ci *  @stable ICU 2.0
1137777dab0Sopenharmony_ci */
1147777dab0Sopenharmony_ci#define UBRK_DONE ((int32_t) -1)
1157777dab0Sopenharmony_ci
1167777dab0Sopenharmony_ci
1177777dab0Sopenharmony_ci/**
1187777dab0Sopenharmony_ci *  Enum constants for the word break tags returned by
1197777dab0Sopenharmony_ci *  getRuleStatus().  A range of values is defined for each category of
1207777dab0Sopenharmony_ci *  word, to allow for further subdivisions of a category in future releases.
1217777dab0Sopenharmony_ci *  Applications should check for tag values falling within the range, rather
1227777dab0Sopenharmony_ci *  than for single individual values.
1237777dab0Sopenharmony_ci *
1247777dab0Sopenharmony_ci * The numeric values of all of these constants are stable (will not change).
1257777dab0Sopenharmony_ci *
1267777dab0Sopenharmony_ci * @stable ICU 2.2
1277777dab0Sopenharmony_ci*/
1287777dab0Sopenharmony_citypedef enum UWordBreak {
1297777dab0Sopenharmony_ci    /** Tag value for "words" that do not fit into any of other categories.
1307777dab0Sopenharmony_ci     *  Includes spaces and most punctuation. */
1317777dab0Sopenharmony_ci    UBRK_WORD_NONE           = 0,
1327777dab0Sopenharmony_ci    /** Upper bound for tags for uncategorized words. */
1337777dab0Sopenharmony_ci    UBRK_WORD_NONE_LIMIT     = 100,
1347777dab0Sopenharmony_ci    /** Tag value for words that appear to be numbers, lower limit.    */
1357777dab0Sopenharmony_ci    UBRK_WORD_NUMBER         = 100,
1367777dab0Sopenharmony_ci    /** Tag value for words that appear to be numbers, upper limit.    */
1377777dab0Sopenharmony_ci    UBRK_WORD_NUMBER_LIMIT   = 200,
1387777dab0Sopenharmony_ci    /** Tag value for words that contain letters, excluding
1397777dab0Sopenharmony_ci     *  hiragana, katakana or ideographic characters, lower limit.    */
1407777dab0Sopenharmony_ci    UBRK_WORD_LETTER         = 200,
1417777dab0Sopenharmony_ci    /** Tag value for words containing letters, upper limit  */
1427777dab0Sopenharmony_ci    UBRK_WORD_LETTER_LIMIT   = 300,
1437777dab0Sopenharmony_ci    /** Tag value for words containing kana characters, lower limit */
1447777dab0Sopenharmony_ci    UBRK_WORD_KANA           = 300,
1457777dab0Sopenharmony_ci    /** Tag value for words containing kana characters, upper limit */
1467777dab0Sopenharmony_ci    UBRK_WORD_KANA_LIMIT     = 400,
1477777dab0Sopenharmony_ci    /** Tag value for words containing ideographic characters, lower limit */
1487777dab0Sopenharmony_ci    UBRK_WORD_IDEO           = 400,
1497777dab0Sopenharmony_ci    /** Tag value for words containing ideographic characters, upper limit */
1507777dab0Sopenharmony_ci    UBRK_WORD_IDEO_LIMIT     = 500
1517777dab0Sopenharmony_ci} UWordBreak;
1527777dab0Sopenharmony_ci
1537777dab0Sopenharmony_ci/**
1547777dab0Sopenharmony_ci *  Enum constants for the line break tags returned by getRuleStatus().
1557777dab0Sopenharmony_ci *  A range of values is defined for each category of
1567777dab0Sopenharmony_ci *  word, to allow for further subdivisions of a category in future releases.
1577777dab0Sopenharmony_ci *  Applications should check for tag values falling within the range, rather
1587777dab0Sopenharmony_ci *  than for single individual values.
1597777dab0Sopenharmony_ci *
1607777dab0Sopenharmony_ci * The numeric values of all of these constants are stable (will not change).
1617777dab0Sopenharmony_ci *
1627777dab0Sopenharmony_ci * @stable ICU 2.8
1637777dab0Sopenharmony_ci*/
1647777dab0Sopenharmony_citypedef enum ULineBreakTag {
1657777dab0Sopenharmony_ci    /** Tag value for soft line breaks, positions at which a line break
1667777dab0Sopenharmony_ci      *  is acceptable but not required                */
1677777dab0Sopenharmony_ci    UBRK_LINE_SOFT            = 0,
1687777dab0Sopenharmony_ci    /** Upper bound for soft line breaks.              */
1697777dab0Sopenharmony_ci    UBRK_LINE_SOFT_LIMIT      = 100,
1707777dab0Sopenharmony_ci    /** Tag value for a hard, or mandatory line break  */
1717777dab0Sopenharmony_ci    UBRK_LINE_HARD            = 100,
1727777dab0Sopenharmony_ci    /** Upper bound for hard line breaks.              */
1737777dab0Sopenharmony_ci    UBRK_LINE_HARD_LIMIT      = 200
1747777dab0Sopenharmony_ci} ULineBreakTag;
1757777dab0Sopenharmony_ci
1767777dab0Sopenharmony_ci
1777777dab0Sopenharmony_ci
1787777dab0Sopenharmony_ci/**
1797777dab0Sopenharmony_ci *  Enum constants for the sentence break tags returned by getRuleStatus().
1807777dab0Sopenharmony_ci *  A range of values is defined for each category of
1817777dab0Sopenharmony_ci *  sentence, to allow for further subdivisions of a category in future releases.
1827777dab0Sopenharmony_ci *  Applications should check for tag values falling within the range, rather
1837777dab0Sopenharmony_ci *  than for single individual values.
1847777dab0Sopenharmony_ci *
1857777dab0Sopenharmony_ci * The numeric values of all of these constants are stable (will not change).
1867777dab0Sopenharmony_ci *
1877777dab0Sopenharmony_ci * @stable ICU 2.8
1887777dab0Sopenharmony_ci*/
1897777dab0Sopenharmony_citypedef enum USentenceBreakTag {
1907777dab0Sopenharmony_ci    /** Tag value for for sentences  ending with a sentence terminator
1917777dab0Sopenharmony_ci      * ('.', '?', '!', etc.) character, possibly followed by a
1927777dab0Sopenharmony_ci      * hard separator (CR, LF, PS, etc.)
1937777dab0Sopenharmony_ci      */
1947777dab0Sopenharmony_ci    UBRK_SENTENCE_TERM       = 0,
1957777dab0Sopenharmony_ci    /** Upper bound for tags for sentences ended by sentence terminators.    */
1967777dab0Sopenharmony_ci    UBRK_SENTENCE_TERM_LIMIT = 100,
1977777dab0Sopenharmony_ci    /** Tag value for for sentences that do not contain an ending
1987777dab0Sopenharmony_ci      * sentence terminator ('.', '?', '!', etc.) character, but
1997777dab0Sopenharmony_ci      * are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
2007777dab0Sopenharmony_ci      */
2017777dab0Sopenharmony_ci    UBRK_SENTENCE_SEP        = 100,
2027777dab0Sopenharmony_ci    /** Upper bound for tags for sentences ended by a separator.              */
2037777dab0Sopenharmony_ci    UBRK_SENTENCE_SEP_LIMIT  = 200
2047777dab0Sopenharmony_ci    /** Tag value for a hard, or mandatory line break  */
2057777dab0Sopenharmony_ci} USentenceBreakTag;
2067777dab0Sopenharmony_ci
2077777dab0Sopenharmony_ci
2087777dab0Sopenharmony_ci/**
2097777dab0Sopenharmony_ci * Open a new UBreakIterator for locating text boundaries for a specified locale.
2107777dab0Sopenharmony_ci * A UBreakIterator may be used for detecting character, line, word,
2117777dab0Sopenharmony_ci * and sentence breaks in text.
2127777dab0Sopenharmony_ci * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
2137777dab0Sopenharmony_ci * UBRK_LINE, UBRK_SENTENCE
2147777dab0Sopenharmony_ci * @param locale The locale specifying the text-breaking conventions. Note that
2157777dab0Sopenharmony_ci * locale keys such as "lb" and "ss" may be used to modify text break behavior,
2167777dab0Sopenharmony_ci * see general discussion of BreakIterator C API.
2177777dab0Sopenharmony_ci * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
2187777dab0Sopenharmony_ci *        used to specify the text to be iterated.
2197777dab0Sopenharmony_ci * @param textLength The number of characters in text, or -1 if null-terminated.
2207777dab0Sopenharmony_ci * @param status A UErrorCode to receive any errors.
2217777dab0Sopenharmony_ci * @return A UBreakIterator for the specified locale.
2227777dab0Sopenharmony_ci * @see ubrk_openRules
2237777dab0Sopenharmony_ci * @stable ICU 2.0
2247777dab0Sopenharmony_ci */
2257777dab0Sopenharmony_ciU_CAPI UBreakIterator* U_EXPORT2
2267777dab0Sopenharmony_ciubrk_open(UBreakIteratorType type,
2277777dab0Sopenharmony_ci      const char *locale,
2287777dab0Sopenharmony_ci      const UChar *text,
2297777dab0Sopenharmony_ci      int32_t textLength,
2307777dab0Sopenharmony_ci      UErrorCode *status);
2317777dab0Sopenharmony_ci
2327777dab0Sopenharmony_ci/**
2337777dab0Sopenharmony_ci * Open a new UBreakIterator for locating text boundaries using specified breaking rules.
2347777dab0Sopenharmony_ci * The rule syntax is ... (TBD)
2357777dab0Sopenharmony_ci * @param rules A set of rules specifying the text breaking conventions.
2367777dab0Sopenharmony_ci * @param rulesLength The number of characters in rules, or -1 if null-terminated.
2377777dab0Sopenharmony_ci * @param text The text to be iterated over.  May be null, in which case ubrk_setText() is
2387777dab0Sopenharmony_ci *        used to specify the text to be iterated.
2397777dab0Sopenharmony_ci * @param textLength The number of characters in text, or -1 if null-terminated.
2407777dab0Sopenharmony_ci * @param parseErr   Receives position and context information for any syntax errors
2417777dab0Sopenharmony_ci *                   detected while parsing the rules.
2427777dab0Sopenharmony_ci * @param status A UErrorCode to receive any errors.
2437777dab0Sopenharmony_ci * @return A UBreakIterator for the specified rules.
2447777dab0Sopenharmony_ci * @see ubrk_open
2457777dab0Sopenharmony_ci * @stable ICU 2.2
2467777dab0Sopenharmony_ci */
2477777dab0Sopenharmony_ciU_CAPI UBreakIterator* U_EXPORT2
2487777dab0Sopenharmony_ciubrk_openRules(const UChar     *rules,
2497777dab0Sopenharmony_ci               int32_t         rulesLength,
2507777dab0Sopenharmony_ci               const UChar     *text,
2517777dab0Sopenharmony_ci               int32_t          textLength,
2527777dab0Sopenharmony_ci               UParseError     *parseErr,
2537777dab0Sopenharmony_ci               UErrorCode      *status);
2547777dab0Sopenharmony_ci
2557777dab0Sopenharmony_ci/**
2567777dab0Sopenharmony_ci * Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
2577777dab0Sopenharmony_ci * Opening a UBreakIterator this way is substantially faster than using ubrk_openRules.
2587777dab0Sopenharmony_ci * Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not
2597777dab0Sopenharmony_ci * compatible across different major versions of ICU, nor across platforms of different
2607777dab0Sopenharmony_ci * endianness or different base character set family (ASCII vs EBCDIC).
2617777dab0Sopenharmony_ci * @param binaryRules A set of compiled binary rules specifying the text breaking
2627777dab0Sopenharmony_ci *                    conventions. Ownership of the storage containing the compiled
2637777dab0Sopenharmony_ci *                    rules remains with the caller of this function. The compiled
2647777dab0Sopenharmony_ci *                    rules must not be modified or deleted during the life of the
2657777dab0Sopenharmony_ci *                    break iterator.
2667777dab0Sopenharmony_ci * @param rulesLength The length of binaryRules in bytes; must be >= 0.
2677777dab0Sopenharmony_ci * @param text        The text to be iterated over.  May be null, in which case
2687777dab0Sopenharmony_ci *                    ubrk_setText() is used to specify the text to be iterated.
2697777dab0Sopenharmony_ci * @param textLength  The number of characters in text, or -1 if null-terminated.
2707777dab0Sopenharmony_ci * @param status      Pointer to UErrorCode to receive any errors.
2717777dab0Sopenharmony_ci * @return            UBreakIterator for the specified rules.
2727777dab0Sopenharmony_ci * @see ubrk_getBinaryRules
2737777dab0Sopenharmony_ci * @stable ICU 59
2747777dab0Sopenharmony_ci */
2757777dab0Sopenharmony_ciU_CAPI UBreakIterator* U_EXPORT2
2767777dab0Sopenharmony_ciubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
2777777dab0Sopenharmony_ci                     const UChar *  text, int32_t textLength,
2787777dab0Sopenharmony_ci                     UErrorCode *   status);
2797777dab0Sopenharmony_ci
2807777dab0Sopenharmony_ci#ifndef U_HIDE_DEPRECATED_API
2817777dab0Sopenharmony_ci
2827777dab0Sopenharmony_ci#endif /* U_HIDE_DEPRECATED_API */
2837777dab0Sopenharmony_ci
2847777dab0Sopenharmony_ci/**
2857777dab0Sopenharmony_ci * Thread safe cloning operation.
2867777dab0Sopenharmony_ci * @param bi iterator to be cloned
2877777dab0Sopenharmony_ci * @param status to indicate whether the operation went on smoothly or there were errors
2887777dab0Sopenharmony_ci * @return pointer to the new clone
2897777dab0Sopenharmony_ci * @stable ICU 69
2907777dab0Sopenharmony_ci */
2917777dab0Sopenharmony_ciU_CAPI UBreakIterator * U_EXPORT2
2927777dab0Sopenharmony_ciubrk_clone(const UBreakIterator *bi,
2937777dab0Sopenharmony_ci           UErrorCode *status);
2947777dab0Sopenharmony_ci
2957777dab0Sopenharmony_ci/**
2967777dab0Sopenharmony_ci* Close a UBreakIterator.
2977777dab0Sopenharmony_ci* Once closed, a UBreakIterator may no longer be used.
2987777dab0Sopenharmony_ci* @param bi The break iterator to close.
2997777dab0Sopenharmony_ci * @stable ICU 2.0
3007777dab0Sopenharmony_ci*/
3017777dab0Sopenharmony_ciU_CAPI void U_EXPORT2
3027777dab0Sopenharmony_ciubrk_close(UBreakIterator *bi);
3037777dab0Sopenharmony_ci
3047777dab0Sopenharmony_ci#if U_SHOW_CPLUSPLUS_API
3057777dab0Sopenharmony_ci
3067777dab0Sopenharmony_ciU_NAMESPACE_BEGIN
3077777dab0Sopenharmony_ci
3087777dab0Sopenharmony_ci/**
3097777dab0Sopenharmony_ci * \class LocalUBreakIteratorPointer
3107777dab0Sopenharmony_ci * "Smart pointer" class, closes a UBreakIterator via ubrk_close().
3117777dab0Sopenharmony_ci * For most methods see the LocalPointerBase base class.
3127777dab0Sopenharmony_ci *
3137777dab0Sopenharmony_ci * @see LocalPointerBase
3147777dab0Sopenharmony_ci * @see LocalPointer
3157777dab0Sopenharmony_ci * @stable ICU 4.4
3167777dab0Sopenharmony_ci */
3177777dab0Sopenharmony_ciU_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close);
3187777dab0Sopenharmony_ci
3197777dab0Sopenharmony_ciU_NAMESPACE_END
3207777dab0Sopenharmony_ci
3217777dab0Sopenharmony_ci#endif
3227777dab0Sopenharmony_ci
3237777dab0Sopenharmony_ci/**
3247777dab0Sopenharmony_ci * Sets an existing iterator to point to a new piece of text.
3257777dab0Sopenharmony_ci * The break iterator retains a pointer to the supplied text.
3267777dab0Sopenharmony_ci * The caller must not modify or delete the text while the BreakIterator
3277777dab0Sopenharmony_ci * retains the reference.
3287777dab0Sopenharmony_ci *
3297777dab0Sopenharmony_ci * @param bi The iterator to use
3307777dab0Sopenharmony_ci * @param text The text to be set
3317777dab0Sopenharmony_ci * @param textLength The length of the text
3327777dab0Sopenharmony_ci * @param status The error code
3337777dab0Sopenharmony_ci * @stable ICU 2.0
3347777dab0Sopenharmony_ci */
3357777dab0Sopenharmony_ciU_CAPI void U_EXPORT2
3367777dab0Sopenharmony_ciubrk_setText(UBreakIterator* bi,
3377777dab0Sopenharmony_ci             const UChar*    text,
3387777dab0Sopenharmony_ci             int32_t         textLength,
3397777dab0Sopenharmony_ci             UErrorCode*     status);
3407777dab0Sopenharmony_ci
3417777dab0Sopenharmony_ci/**
3427777dab0Sopenharmony_ci * Determine the most recently-returned text boundary.
3437777dab0Sopenharmony_ci *
3447777dab0Sopenharmony_ci * @param bi The break iterator to use.
3457777dab0Sopenharmony_ci * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
3467777dab0Sopenharmony_ci * \ref ubrk_first, or \ref ubrk_last.
3477777dab0Sopenharmony_ci * @stable ICU 2.0
3487777dab0Sopenharmony_ci */
3497777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
3507777dab0Sopenharmony_ciubrk_current(const UBreakIterator *bi);
3517777dab0Sopenharmony_ci
3527777dab0Sopenharmony_ci/**
3537777dab0Sopenharmony_ci * Advance the iterator to the boundary following the current boundary.
3547777dab0Sopenharmony_ci *
3557777dab0Sopenharmony_ci * @param bi The break iterator to use.
3567777dab0Sopenharmony_ci * @return The character index of the next text boundary, or UBRK_DONE
3577777dab0Sopenharmony_ci * if all text boundaries have been returned.
3587777dab0Sopenharmony_ci * @see ubrk_previous
3597777dab0Sopenharmony_ci * @stable ICU 2.0
3607777dab0Sopenharmony_ci */
3617777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
3627777dab0Sopenharmony_ciubrk_next(UBreakIterator *bi);
3637777dab0Sopenharmony_ci
3647777dab0Sopenharmony_ci/**
3657777dab0Sopenharmony_ci * Set the iterator position to the boundary preceding the current boundary.
3667777dab0Sopenharmony_ci *
3677777dab0Sopenharmony_ci * @param bi The break iterator to use.
3687777dab0Sopenharmony_ci * @return The character index of the preceding text boundary, or UBRK_DONE
3697777dab0Sopenharmony_ci * if all text boundaries have been returned.
3707777dab0Sopenharmony_ci * @see ubrk_next
3717777dab0Sopenharmony_ci * @stable ICU 2.0
3727777dab0Sopenharmony_ci */
3737777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
3747777dab0Sopenharmony_ciubrk_previous(UBreakIterator *bi);
3757777dab0Sopenharmony_ci
3767777dab0Sopenharmony_ci/**
3777777dab0Sopenharmony_ci * Set the iterator position to zero, the start of the text being scanned.
3787777dab0Sopenharmony_ci * @param bi The break iterator to use.
3797777dab0Sopenharmony_ci * @return The new iterator position (zero).
3807777dab0Sopenharmony_ci * @see ubrk_last
3817777dab0Sopenharmony_ci * @stable ICU 2.0
3827777dab0Sopenharmony_ci */
3837777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
3847777dab0Sopenharmony_ciubrk_first(UBreakIterator *bi);
3857777dab0Sopenharmony_ci
3867777dab0Sopenharmony_ci/**
3877777dab0Sopenharmony_ci * Set the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned.
3887777dab0Sopenharmony_ci * This is not the same as the last character.
3897777dab0Sopenharmony_ci * @param bi The break iterator to use.
3907777dab0Sopenharmony_ci * @return The character offset immediately <EM>beyond</EM> the last character in the
3917777dab0Sopenharmony_ci * text being scanned.
3927777dab0Sopenharmony_ci * @see ubrk_first
3937777dab0Sopenharmony_ci * @stable ICU 2.0
3947777dab0Sopenharmony_ci */
3957777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
3967777dab0Sopenharmony_ciubrk_last(UBreakIterator *bi);
3977777dab0Sopenharmony_ci
3987777dab0Sopenharmony_ci/**
3997777dab0Sopenharmony_ci * Set the iterator position to the first boundary preceding the specified offset.
4007777dab0Sopenharmony_ci * The new position is always smaller than offset, or UBRK_DONE.
4017777dab0Sopenharmony_ci * @param bi The break iterator to use.
4027777dab0Sopenharmony_ci * @param offset The offset to begin scanning.
4037777dab0Sopenharmony_ci * @return The text boundary preceding offset, or UBRK_DONE.
4047777dab0Sopenharmony_ci * @see ubrk_following
4057777dab0Sopenharmony_ci * @stable ICU 2.0
4067777dab0Sopenharmony_ci */
4077777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
4087777dab0Sopenharmony_ciubrk_preceding(UBreakIterator *bi,
4097777dab0Sopenharmony_ci           int32_t offset);
4107777dab0Sopenharmony_ci
4117777dab0Sopenharmony_ci/**
4127777dab0Sopenharmony_ci * Advance the iterator to the first boundary following the specified offset.
4137777dab0Sopenharmony_ci * The value returned is always greater than offset, or UBRK_DONE.
4147777dab0Sopenharmony_ci * @param bi The break iterator to use.
4157777dab0Sopenharmony_ci * @param offset The offset to begin scanning.
4167777dab0Sopenharmony_ci * @return The text boundary following offset, or UBRK_DONE.
4177777dab0Sopenharmony_ci * @see ubrk_preceding
4187777dab0Sopenharmony_ci * @stable ICU 2.0
4197777dab0Sopenharmony_ci */
4207777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
4217777dab0Sopenharmony_ciubrk_following(UBreakIterator *bi,
4227777dab0Sopenharmony_ci           int32_t offset);
4237777dab0Sopenharmony_ci
4247777dab0Sopenharmony_ci/**
4257777dab0Sopenharmony_ci* Get a locale for which text breaking information is available.
4267777dab0Sopenharmony_ci* A UBreakIterator in a locale returned by this function will perform the correct
4277777dab0Sopenharmony_ci* text breaking for the locale.
4287777dab0Sopenharmony_ci* @param index The index of the desired locale.
4297777dab0Sopenharmony_ci* @return A locale for which number text breaking information is available, or 0 if none.
4307777dab0Sopenharmony_ci* @see ubrk_countAvailable
4317777dab0Sopenharmony_ci* @stable ICU 2.0
4327777dab0Sopenharmony_ci*/
4337777dab0Sopenharmony_ciU_CAPI const char* U_EXPORT2
4347777dab0Sopenharmony_ciubrk_getAvailable(int32_t index);
4357777dab0Sopenharmony_ci
4367777dab0Sopenharmony_ci/**
4377777dab0Sopenharmony_ci* Determine how many locales have text breaking information available.
4387777dab0Sopenharmony_ci* This function is most useful as determining the loop ending condition for
4397777dab0Sopenharmony_ci* calls to \ref ubrk_getAvailable.
4407777dab0Sopenharmony_ci* @return The number of locales for which text breaking information is available.
4417777dab0Sopenharmony_ci* @see ubrk_getAvailable
4427777dab0Sopenharmony_ci* @stable ICU 2.0
4437777dab0Sopenharmony_ci*/
4447777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
4457777dab0Sopenharmony_ciubrk_countAvailable(void);
4467777dab0Sopenharmony_ci
4477777dab0Sopenharmony_ci
4487777dab0Sopenharmony_ci/**
4497777dab0Sopenharmony_ci* Returns true if the specified position is a boundary position.  As a side
4507777dab0Sopenharmony_ci* effect, leaves the iterator pointing to the first boundary position at
4517777dab0Sopenharmony_ci* or after "offset".
4527777dab0Sopenharmony_ci* @param bi The break iterator to use.
4537777dab0Sopenharmony_ci* @param offset the offset to check.
4547777dab0Sopenharmony_ci* @return True if "offset" is a boundary position.
4557777dab0Sopenharmony_ci* @stable ICU 2.0
4567777dab0Sopenharmony_ci*/
4577777dab0Sopenharmony_ciU_CAPI  UBool U_EXPORT2
4587777dab0Sopenharmony_ciubrk_isBoundary(UBreakIterator *bi, int32_t offset);
4597777dab0Sopenharmony_ci
4607777dab0Sopenharmony_ci/**
4617777dab0Sopenharmony_ci * Return the status from the break rule that determined the most recently
4627777dab0Sopenharmony_ci * returned break position.  The values appear in the rule source
4637777dab0Sopenharmony_ci * within brackets, {123}, for example.  For rules that do not specify a
4647777dab0Sopenharmony_ci * status, a default value of 0 is returned.
4657777dab0Sopenharmony_ci * <p>
4667777dab0Sopenharmony_ci * For word break iterators, the possible values are defined in enum UWordBreak.
4677777dab0Sopenharmony_ci * @stable ICU 2.2
4687777dab0Sopenharmony_ci */
4697777dab0Sopenharmony_ciU_CAPI  int32_t U_EXPORT2
4707777dab0Sopenharmony_ciubrk_getRuleStatus(UBreakIterator *bi);
4717777dab0Sopenharmony_ci
4727777dab0Sopenharmony_ci/**
4737777dab0Sopenharmony_ci * Get the statuses from the break rules that determined the most recently
4747777dab0Sopenharmony_ci * returned break position.  The values appear in the rule source
4757777dab0Sopenharmony_ci * within brackets, {123}, for example.  The default status value for rules
4767777dab0Sopenharmony_ci * that do not explicitly provide one is zero.
4777777dab0Sopenharmony_ci * <p>
4787777dab0Sopenharmony_ci * For word break iterators, the possible values are defined in enum UWordBreak.
4797777dab0Sopenharmony_ci * @param bi        The break iterator to use
4807777dab0Sopenharmony_ci * @param fillInVec an array to be filled in with the status values.
4817777dab0Sopenharmony_ci * @param capacity  the length of the supplied vector.  A length of zero causes
4827777dab0Sopenharmony_ci *                  the function to return the number of status values, in the
4837777dab0Sopenharmony_ci *                  normal way, without attempting to store any values.
4847777dab0Sopenharmony_ci * @param status    receives error codes.
4857777dab0Sopenharmony_ci * @return          The number of rule status values from rules that determined
4867777dab0Sopenharmony_ci *                  the most recent boundary returned by the break iterator.
4877777dab0Sopenharmony_ci * @stable ICU 3.0
4887777dab0Sopenharmony_ci */
4897777dab0Sopenharmony_ciU_CAPI  int32_t U_EXPORT2
4907777dab0Sopenharmony_ciubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
4917777dab0Sopenharmony_ci
4927777dab0Sopenharmony_ci/**
4937777dab0Sopenharmony_ci * Return the locale of the break iterator. You can choose between the valid and
4947777dab0Sopenharmony_ci * the actual locale.
4957777dab0Sopenharmony_ci * @param bi break iterator
4967777dab0Sopenharmony_ci * @param type locale type (valid or actual)
4977777dab0Sopenharmony_ci * @param status error code
4987777dab0Sopenharmony_ci * @return locale string
4997777dab0Sopenharmony_ci * @stable ICU 2.8
5007777dab0Sopenharmony_ci */
5017777dab0Sopenharmony_ciU_CAPI const char* U_EXPORT2
5027777dab0Sopenharmony_ciubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
5037777dab0Sopenharmony_ci
5047777dab0Sopenharmony_ci/**
5057777dab0Sopenharmony_ci * Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
5067777dab0Sopenharmony_ci * The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator
5077777dab0Sopenharmony_ci * more quickly than using ubrk_openRules. The compiled rules are not compatible across
5087777dab0Sopenharmony_ci * different major versions of ICU, nor across platforms of different endianness or
5097777dab0Sopenharmony_ci * different base character set family (ASCII vs EBCDIC). Supports preflighting (with
5107777dab0Sopenharmony_ci * binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to
5117777dab0Sopenharmony_ci * the binaryRules buffer. However, whether preflighting or not, if the actual length
5127777dab0Sopenharmony_ci * is greater than INT32_MAX, then the function returns 0 and sets *status to
5137777dab0Sopenharmony_ci * U_INDEX_OUTOFBOUNDS_ERROR.
5147777dab0Sopenharmony_ci
5157777dab0Sopenharmony_ci * @param bi            The break iterator to use.
5167777dab0Sopenharmony_ci * @param binaryRules   Buffer to receive the compiled binary rules; set to NULL for
5177777dab0Sopenharmony_ci *                      preflighting.
5187777dab0Sopenharmony_ci * @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for
5197777dab0Sopenharmony_ci *                      preflighting. Must be >= 0.
5207777dab0Sopenharmony_ci * @param status        Pointer to UErrorCode to receive any errors, such as
5217777dab0Sopenharmony_ci *                      U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or
5227777dab0Sopenharmony_ci *                      U_ILLEGAL_ARGUMENT_ERROR.
5237777dab0Sopenharmony_ci * @return              The actual byte length of the binary rules, if <= INT32_MAX;
5247777dab0Sopenharmony_ci *                      otherwise 0. If not preflighting and this is larger than
5257777dab0Sopenharmony_ci *                      rulesCapacity, *status will be set to an error.
5267777dab0Sopenharmony_ci * @see ubrk_openBinaryRules
5277777dab0Sopenharmony_ci * @stable ICU 59
5287777dab0Sopenharmony_ci */
5297777dab0Sopenharmony_ciU_CAPI int32_t U_EXPORT2
5307777dab0Sopenharmony_ciubrk_getBinaryRules(UBreakIterator *bi,
5317777dab0Sopenharmony_ci                    uint8_t *       binaryRules, int32_t rulesCapacity,
5327777dab0Sopenharmony_ci                    UErrorCode *    status);
5337777dab0Sopenharmony_ci
5347777dab0Sopenharmony_ci#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
5357777dab0Sopenharmony_ci
5367777dab0Sopenharmony_ci#endif
537