1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4********************************************************************************
5*   Copyright (C) 1996-2015, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7********************************************************************************
8*/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_BREAK_ITERATION
13
14#include "unicode/ubrk.h"
15
16#include "unicode/brkiter.h"
17#include "unicode/uloc.h"
18#include "unicode/ustring.h"
19#include "unicode/uchriter.h"
20#include "unicode/rbbi.h"
21#include "rbbirb.h"
22#include "uassert.h"
23#include "cmemory.h"
24
25U_NAMESPACE_USE
26
27//------------------------------------------------------------------------------
28//
29//    ubrk_open      Create a canned type of break iterator based on type (word, line, etc.)
30//                   and locale.
31//
32//------------------------------------------------------------------------------
33U_CAPI UBreakIterator* U_EXPORT2
34ubrk_open(UBreakIteratorType type,
35      const char *locale,
36      const char16_t *text,
37      int32_t textLength,
38      UErrorCode *status)
39{
40
41  if(U_FAILURE(*status)) return 0;
42
43  BreakIterator *result = 0;
44
45  switch(type) {
46
47  case UBRK_CHARACTER:
48    result = BreakIterator::createCharacterInstance(Locale(locale), *status);
49    break;
50
51  case UBRK_WORD:
52    result = BreakIterator::createWordInstance(Locale(locale), *status);
53    break;
54
55  case UBRK_LINE:
56    result = BreakIterator::createLineInstance(Locale(locale), *status);
57    break;
58
59  case UBRK_SENTENCE:
60    result = BreakIterator::createSentenceInstance(Locale(locale), *status);
61    break;
62
63  case UBRK_TITLE:
64    result = BreakIterator::createTitleInstance(Locale(locale), *status);
65    break;
66
67  default:
68    *status = U_ILLEGAL_ARGUMENT_ERROR;
69  }
70
71  // check for allocation error
72  if (U_FAILURE(*status)) {
73     return 0;
74  }
75  if(result == 0) {
76    *status = U_MEMORY_ALLOCATION_ERROR;
77    return 0;
78  }
79
80
81  UBreakIterator *uBI = (UBreakIterator *)result;
82  if (text != nullptr) {
83      ubrk_setText(uBI, text, textLength, status);
84  }
85  return uBI;
86}
87
88
89
90//------------------------------------------------------------------------------
91//
92//   ubrk_openRules      open a break iterator from a set of break rules.
93//                       Invokes the rule builder.
94//
95//------------------------------------------------------------------------------
96U_CAPI UBreakIterator* U_EXPORT2
97ubrk_openRules(  const char16_t     *rules,
98                       int32_t       rulesLength,
99                 const char16_t     *text,
100                       int32_t       textLength,
101                       UParseError  *parseErr,
102                       UErrorCode   *status)  {
103
104    if (status == nullptr || U_FAILURE(*status)){
105        return 0;
106    }
107
108    BreakIterator *result = 0;
109    UnicodeString ruleString(rules, rulesLength);
110    result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status);
111    if(U_FAILURE(*status)) {
112        return 0;
113    }
114
115    UBreakIterator *uBI = (UBreakIterator *)result;
116    if (text != nullptr) {
117        ubrk_setText(uBI, text, textLength, status);
118    }
119    return uBI;
120}
121
122
123U_CAPI UBreakIterator* U_EXPORT2
124ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
125                     const char16_t *  text, int32_t textLength,
126                     UErrorCode *   status)
127{
128    if (U_FAILURE(*status)) {
129        return nullptr;
130    }
131    if (rulesLength < 0) {
132        *status = U_ILLEGAL_ARGUMENT_ERROR;
133        return nullptr;
134    }
135    LocalPointer<RuleBasedBreakIterator> lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status);
136    if (U_FAILURE(*status)) {
137        return nullptr;
138    }
139    UBreakIterator *uBI = reinterpret_cast<UBreakIterator *>(lpRBBI.orphan());
140    if (text != nullptr) {
141        ubrk_setText(uBI, text, textLength, status);
142    }
143    return uBI;
144}
145
146
147U_CAPI UBreakIterator * U_EXPORT2
148ubrk_safeClone(
149          const UBreakIterator *bi,
150          void * /*stackBuffer*/,
151          int32_t *pBufferSize,
152          UErrorCode *status)
153{
154    if (status == nullptr || U_FAILURE(*status)){
155        return nullptr;
156    }
157    if (bi == nullptr) {
158       *status = U_ILLEGAL_ARGUMENT_ERROR;
159        return nullptr;
160    }
161    if (pBufferSize != nullptr) {
162        int32_t inputSize = *pBufferSize;
163        *pBufferSize = 1;
164        if (inputSize == 0) {
165            return nullptr;  // preflighting for deprecated functionality
166        }
167    }
168    BreakIterator *newBI = ((BreakIterator *)bi)->clone();
169    if (newBI == nullptr) {
170        *status = U_MEMORY_ALLOCATION_ERROR;
171    } else if (pBufferSize != nullptr) {
172        *status = U_SAFECLONE_ALLOCATED_WARNING;
173    }
174    return (UBreakIterator *)newBI;
175}
176
177U_CAPI UBreakIterator * U_EXPORT2
178ubrk_clone(const UBreakIterator *bi, UErrorCode *status) {
179    return ubrk_safeClone(bi, nullptr, nullptr, status);
180}
181
182
183U_CAPI void U_EXPORT2
184ubrk_close(UBreakIterator *bi)
185{
186    delete (BreakIterator *)bi;
187}
188
189U_CAPI void U_EXPORT2
190ubrk_setText(UBreakIterator* bi,
191             const char16_t*    text,
192             int32_t         textLength,
193             UErrorCode*     status)
194{
195    UText  ut = UTEXT_INITIALIZER;
196    utext_openUChars(&ut, text, textLength, status);
197    ((BreakIterator*)bi)->setText(&ut, *status);
198    // A stack allocated UText wrapping a char16_t * string
199    //   can be dumped without explicitly closing it.
200}
201
202
203
204U_CAPI void U_EXPORT2
205ubrk_setUText(UBreakIterator *bi,
206             UText          *text,
207             UErrorCode     *status)
208{
209  ((BreakIterator*)bi)->setText(text, *status);
210}
211
212
213
214
215
216U_CAPI int32_t U_EXPORT2
217ubrk_current(const UBreakIterator *bi)
218{
219
220  return ((BreakIterator*)bi)->current();
221}
222
223U_CAPI int32_t U_EXPORT2
224ubrk_next(UBreakIterator *bi)
225{
226
227  return ((BreakIterator*)bi)->next();
228}
229
230U_CAPI int32_t U_EXPORT2
231ubrk_previous(UBreakIterator *bi)
232{
233
234  return ((BreakIterator*)bi)->previous();
235}
236
237U_CAPI int32_t U_EXPORT2
238ubrk_first(UBreakIterator *bi)
239{
240
241  return ((BreakIterator*)bi)->first();
242}
243
244U_CAPI int32_t U_EXPORT2
245ubrk_last(UBreakIterator *bi)
246{
247
248  return ((BreakIterator*)bi)->last();
249}
250
251U_CAPI int32_t U_EXPORT2
252ubrk_preceding(UBreakIterator *bi,
253           int32_t offset)
254{
255
256  return ((BreakIterator*)bi)->preceding(offset);
257}
258
259U_CAPI int32_t U_EXPORT2
260ubrk_following(UBreakIterator *bi,
261           int32_t offset)
262{
263
264  return ((BreakIterator*)bi)->following(offset);
265}
266
267U_CAPI const char* U_EXPORT2
268ubrk_getAvailable(int32_t index)
269{
270
271  return uloc_getAvailable(index);
272}
273
274U_CAPI int32_t U_EXPORT2
275ubrk_countAvailable()
276{
277
278  return uloc_countAvailable();
279}
280
281
282U_CAPI  UBool U_EXPORT2
283ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
284{
285    return ((BreakIterator*)bi)->isBoundary(offset);
286}
287
288
289U_CAPI  int32_t U_EXPORT2
290ubrk_getRuleStatus(UBreakIterator *bi)
291{
292    return ((BreakIterator*)bi)->getRuleStatus();
293}
294
295U_CAPI  int32_t U_EXPORT2
296ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
297{
298    return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status);
299}
300
301
302U_CAPI const char* U_EXPORT2
303ubrk_getLocaleByType(const UBreakIterator *bi,
304                     ULocDataLocaleType type,
305                     UErrorCode* status)
306{
307    if (bi == nullptr) {
308        if (U_SUCCESS(*status)) {
309            *status = U_ILLEGAL_ARGUMENT_ERROR;
310        }
311        return nullptr;
312    }
313    return ((BreakIterator*)bi)->getLocaleID(type, *status);
314}
315
316
317U_CAPI void U_EXPORT2
318ubrk_refreshUText(UBreakIterator *bi,
319                       UText          *text,
320                       UErrorCode     *status)
321{
322    BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi);
323    bii->refreshInputText(text, *status);
324}
325
326U_CAPI int32_t U_EXPORT2
327ubrk_getBinaryRules(UBreakIterator *bi,
328                    uint8_t *       binaryRules, int32_t rulesCapacity,
329                    UErrorCode *    status)
330{
331    if (U_FAILURE(*status)) {
332        return 0;
333    }
334    if ((binaryRules == nullptr && rulesCapacity > 0) || rulesCapacity < 0) {
335        *status = U_ILLEGAL_ARGUMENT_ERROR;
336        return 0;
337    }
338    RuleBasedBreakIterator* rbbi;
339    if ((rbbi = dynamic_cast<RuleBasedBreakIterator*>(reinterpret_cast<BreakIterator*>(bi))) == nullptr) {
340        *status = U_ILLEGAL_ARGUMENT_ERROR;
341        return 0;
342    }
343    uint32_t rulesLength;
344    const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength);
345    if (rulesLength > INT32_MAX) {
346        *status = U_INDEX_OUTOFBOUNDS_ERROR;
347        return 0;
348    }
349    if (binaryRules != nullptr) { // if not preflighting
350        // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely
351        if ((int32_t)rulesLength > rulesCapacity) {
352            *status = U_BUFFER_OVERFLOW_ERROR;
353        } else {
354            uprv_memcpy(binaryRules, returnedRules, rulesLength);
355        }
356    }
357    return (int32_t)rulesLength;
358}
359
360
361#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
362