xref: /third_party/icu/icu4c/source/i18n/stsearch.cpp (revision 2e5b6d6d)
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5*   Copyright (C) 2001-2014 IBM and others. All rights reserved.
6**********************************************************************
7*   Date        Name        Description
8*  03/22/2000   helena      Creation.
9**********************************************************************
10*/
11
12#include "unicode/utypes.h"
13
14#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
15
16#include "unicode/stsearch.h"
17#include "usrchimp.h"
18#include "cmemory.h"
19
20U_NAMESPACE_BEGIN
21
22UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
23
24// public constructors and destructors -----------------------------------
25
26StringSearch::StringSearch(const UnicodeString &pattern,
27                           const UnicodeString &text,
28                           const Locale        &locale,
29                                 BreakIterator *breakiter,
30                                 UErrorCode    &status) :
31                           SearchIterator(text, breakiter),
32                           m_pattern_(pattern)
33{
34    if (U_FAILURE(status)) {
35        m_strsrch_ = NULL;
36        return;
37    }
38
39    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
40                              m_text_.getBuffer(), m_text_.length(),
41                              locale.getName(), (UBreakIterator *)breakiter,
42                              &status);
43    uprv_free(m_search_);
44    m_search_ = NULL;
45
46    if (U_SUCCESS(status)) {
47        // m_search_ has been created by the base SearchIterator class
48        m_search_        = m_strsrch_->search;
49    }
50}
51
52StringSearch::StringSearch(const UnicodeString     &pattern,
53                           const UnicodeString     &text,
54                                 RuleBasedCollator *coll,
55                                 BreakIterator     *breakiter,
56                                 UErrorCode        &status) :
57                           SearchIterator(text, breakiter),
58                           m_pattern_(pattern)
59{
60    if (U_FAILURE(status)) {
61        m_strsrch_ = NULL;
62        return;
63    }
64    if (coll == NULL) {
65        status     = U_ILLEGAL_ARGUMENT_ERROR;
66        m_strsrch_ = NULL;
67        return;
68    }
69    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
70                                          m_pattern_.length(),
71                                          m_text_.getBuffer(),
72                                          m_text_.length(), coll->toUCollator(),
73                                          (UBreakIterator *)breakiter,
74                                          &status);
75    uprv_free(m_search_);
76    m_search_ = NULL;
77
78    if (U_SUCCESS(status)) {
79        // m_search_ has been created by the base SearchIterator class
80        m_search_ = m_strsrch_->search;
81    }
82}
83
84StringSearch::StringSearch(const UnicodeString     &pattern,
85                                 CharacterIterator &text,
86                           const Locale            &locale,
87                                 BreakIterator     *breakiter,
88                                 UErrorCode        &status) :
89                           SearchIterator(text, breakiter),
90                           m_pattern_(pattern)
91{
92    if (U_FAILURE(status)) {
93        m_strsrch_ = NULL;
94        return;
95    }
96    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
97                              m_text_.getBuffer(), m_text_.length(),
98                              locale.getName(), (UBreakIterator *)breakiter,
99                              &status);
100    uprv_free(m_search_);
101    m_search_ = NULL;
102
103    if (U_SUCCESS(status)) {
104        // m_search_ has been created by the base SearchIterator class
105        m_search_ = m_strsrch_->search;
106    }
107}
108
109StringSearch::StringSearch(const UnicodeString     &pattern,
110                                 CharacterIterator &text,
111                                 RuleBasedCollator *coll,
112                                 BreakIterator     *breakiter,
113                                 UErrorCode        &status) :
114                           SearchIterator(text, breakiter),
115                           m_pattern_(pattern)
116{
117    if (U_FAILURE(status)) {
118        m_strsrch_ = NULL;
119        return;
120    }
121    if (coll == NULL) {
122        status     = U_ILLEGAL_ARGUMENT_ERROR;
123        m_strsrch_ = NULL;
124        return;
125    }
126    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
127                                          m_pattern_.length(),
128                                          m_text_.getBuffer(),
129                                          m_text_.length(), coll->toUCollator(),
130                                          (UBreakIterator *)breakiter,
131                                          &status);
132    uprv_free(m_search_);
133    m_search_ = NULL;
134
135    if (U_SUCCESS(status)) {
136        // m_search_ has been created by the base SearchIterator class
137        m_search_ = m_strsrch_->search;
138    }
139}
140
141StringSearch::StringSearch(const StringSearch &that) :
142                       SearchIterator(that.m_text_, that.m_breakiterator_),
143                       m_pattern_(that.m_pattern_)
144{
145    UErrorCode status = U_ZERO_ERROR;
146
147    // Free m_search_ from the superclass
148    uprv_free(m_search_);
149    m_search_ = NULL;
150
151    if (that.m_strsrch_ == NULL) {
152        // This was not a good copy
153        m_strsrch_ = NULL;
154    }
155    else {
156        // Make a deep copy
157        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
158                                              m_pattern_.length(),
159                                              m_text_.getBuffer(),
160                                              m_text_.length(),
161                                              that.m_strsrch_->collator,
162                                             (UBreakIterator *)that.m_breakiterator_,
163                                              &status);
164        if (U_SUCCESS(status)) {
165            // m_search_ has been created by the base SearchIterator class
166            m_search_        = m_strsrch_->search;
167        }
168    }
169}
170
171StringSearch::~StringSearch()
172{
173    if (m_strsrch_ != NULL) {
174        usearch_close(m_strsrch_);
175        m_search_ = NULL;
176    }
177}
178
179StringSearch *
180StringSearch::clone() const {
181    return new StringSearch(*this);
182}
183
184// operator overloading ---------------------------------------------
185StringSearch & StringSearch::operator=(const StringSearch &that)
186{
187    if (this != &that) {
188        UErrorCode status = U_ZERO_ERROR;
189        m_text_          = that.m_text_;
190        m_breakiterator_ = that.m_breakiterator_;
191        m_pattern_       = that.m_pattern_;
192        // all m_search_ in the parent class is linked up with m_strsrch_
193        usearch_close(m_strsrch_);
194        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
195                                              m_pattern_.length(),
196                                              m_text_.getBuffer(),
197                                              m_text_.length(),
198                                              that.m_strsrch_->collator,
199                                              NULL, &status);
200        // Check null pointer
201        if (m_strsrch_ != NULL) {
202            m_search_ = m_strsrch_->search;
203        }
204    }
205    return *this;
206}
207
208bool StringSearch::operator==(const SearchIterator &that) const
209{
210    if (this == &that) {
211        return true;
212    }
213    if (SearchIterator::operator ==(that)) {
214        StringSearch &thatsrch = (StringSearch &)that;
215        return (this->m_pattern_ == thatsrch.m_pattern_ &&
216                this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
217    }
218    return false;
219}
220
221// public get and set methods ----------------------------------------
222
223void StringSearch::setOffset(int32_t position, UErrorCode &status)
224{
225    // status checked in usearch_setOffset
226    usearch_setOffset(m_strsrch_, position, &status);
227}
228
229int32_t StringSearch::getOffset(void) const
230{
231    return usearch_getOffset(m_strsrch_);
232}
233
234void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
235{
236    if (U_SUCCESS(status)) {
237        m_text_ = text;
238        usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
239    }
240}
241
242void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
243{
244    if (U_SUCCESS(status)) {
245        text.getText(m_text_);
246        usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
247    }
248}
249
250RuleBasedCollator * StringSearch::getCollator() const
251{
252    // Note the const_cast. It would be cleaner if this const method returned a const collator.
253    return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
254}
255
256void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
257{
258    if (U_SUCCESS(status)) {
259        usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
260    }
261}
262
263void StringSearch::setPattern(const UnicodeString &pattern,
264                                    UErrorCode    &status)
265{
266    if (U_SUCCESS(status)) {
267        m_pattern_ = pattern;
268        usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
269                           &status);
270    }
271}
272
273const UnicodeString & StringSearch::getPattern() const
274{
275    return m_pattern_;
276}
277
278// public methods ----------------------------------------------------
279
280void StringSearch::reset()
281{
282    usearch_reset(m_strsrch_);
283}
284
285StringSearch * StringSearch::safeClone() const
286{
287    UErrorCode status = U_ZERO_ERROR;
288    StringSearch *result = new StringSearch(m_pattern_, m_text_,
289                                            getCollator(),
290                                            m_breakiterator_,
291                                            status);
292    /* test for NULL */
293    if (result == 0) {
294        status = U_MEMORY_ALLOCATION_ERROR;
295        return 0;
296    }
297    result->setOffset(getOffset(), status);
298    result->setMatchStart(m_strsrch_->search->matchedIndex);
299    result->setMatchLength(m_strsrch_->search->matchedLength);
300    if (U_FAILURE(status)) {
301        return NULL;
302    }
303    return result;
304}
305
306// protected method -------------------------------------------------
307
308int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
309{
310    // values passed here are already in the pre-shift position
311    if (U_SUCCESS(status)) {
312        if (m_strsrch_->pattern.cesLength == 0) {
313            m_search_->matchedIndex =
314                                    m_search_->matchedIndex == USEARCH_DONE ?
315                                    getOffset() : m_search_->matchedIndex + 1;
316            m_search_->matchedLength = 0;
317            ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
318                           &status);
319            if (m_search_->matchedIndex == m_search_->textLength) {
320                m_search_->matchedIndex = USEARCH_DONE;
321            }
322        }
323        else {
324            // looking at usearch.cpp, this part is shifted out to
325            // StringSearch instead of SearchIterator because m_strsrch_ is
326            // not accessible in SearchIterator
327#if 0
328            if (position + m_strsrch_->pattern.defaultShiftSize
329                > m_search_->textLength) {
330                setMatchNotFound();
331                return USEARCH_DONE;
332            }
333#endif
334            if (m_search_->matchedLength <= 0) {
335                // the flipping direction issue has already been handled
336                // in next()
337                // for boundary check purposes. this will ensure that the
338                // next match will not precede the current offset
339                // note search->matchedIndex will always be set to something
340                // in the code
341                m_search_->matchedIndex = position - 1;
342            }
343
344            ucol_setOffset(m_strsrch_->textIter, position, &status);
345
346#if 0
347            for (;;) {
348                if (m_search_->isCanonicalMatch) {
349                    // can't use exact here since extra accents are allowed.
350                    usearch_handleNextCanonical(m_strsrch_, &status);
351                }
352                else {
353                    usearch_handleNextExact(m_strsrch_, &status);
354                }
355                if (U_FAILURE(status)) {
356                    return USEARCH_DONE;
357                }
358                if (m_breakiterator_ == NULL
359#if !UCONFIG_NO_BREAK_ITERATION
360                    ||
361                    m_search_->matchedIndex == USEARCH_DONE ||
362                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
363                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
364                                                  m_search_->matchedLength))
365#endif
366                ) {
367                    if (m_search_->matchedIndex == USEARCH_DONE) {
368                        ucol_setOffset(m_strsrch_->textIter,
369                                       m_search_->textLength, &status);
370                    }
371                    else {
372                        ucol_setOffset(m_strsrch_->textIter,
373                                       m_search_->matchedIndex, &status);
374                    }
375                    return m_search_->matchedIndex;
376                }
377            }
378#else
379            // if m_strsrch_->breakIter is always the same as m_breakiterator_
380            // then we don't need to check the match boundaries here because
381            // usearch_handleNextXXX will already have done it.
382            if (m_search_->isCanonicalMatch) {
383            	// *could* actually use exact here 'cause no extra accents allowed...
384            	usearch_handleNextCanonical(m_strsrch_, &status);
385            } else {
386            	usearch_handleNextExact(m_strsrch_, &status);
387            }
388
389            if (U_FAILURE(status)) {
390            	return USEARCH_DONE;
391            }
392
393            if (m_search_->matchedIndex == USEARCH_DONE) {
394            	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
395            } else {
396            	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
397            }
398
399            return m_search_->matchedIndex;
400#endif
401        }
402    }
403    return USEARCH_DONE;
404}
405
406int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
407{
408    // values passed here are already in the pre-shift position
409    if (U_SUCCESS(status)) {
410        if (m_strsrch_->pattern.cesLength == 0) {
411            m_search_->matchedIndex =
412                  (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
413                   m_search_->matchedIndex);
414            if (m_search_->matchedIndex == 0) {
415                setMatchNotFound();
416            }
417            else {
418                m_search_->matchedIndex --;
419                ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
420                               &status);
421                m_search_->matchedLength = 0;
422            }
423        }
424        else {
425            // looking at usearch.cpp, this part is shifted out to
426            // StringSearch instead of SearchIterator because m_strsrch_ is
427            // not accessible in SearchIterator
428#if 0
429            if (!m_search_->isOverlap &&
430                position - m_strsrch_->pattern.defaultShiftSize < 0) {
431                setMatchNotFound();
432                return USEARCH_DONE;
433            }
434
435            for (;;) {
436                if (m_search_->isCanonicalMatch) {
437                    // can't use exact here since extra accents are allowed.
438                    usearch_handlePreviousCanonical(m_strsrch_, &status);
439                }
440                else {
441                    usearch_handlePreviousExact(m_strsrch_, &status);
442                }
443                if (U_FAILURE(status)) {
444                    return USEARCH_DONE;
445                }
446                if (m_breakiterator_ == NULL
447#if !UCONFIG_NO_BREAK_ITERATION
448                    ||
449                    m_search_->matchedIndex == USEARCH_DONE ||
450                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
451                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
452                                                  m_search_->matchedLength))
453#endif
454                ) {
455                    return m_search_->matchedIndex;
456                }
457            }
458#else
459            ucol_setOffset(m_strsrch_->textIter, position, &status);
460
461            if (m_search_->isCanonicalMatch) {
462            	// *could* use exact match here since extra accents *not* allowed!
463            	usearch_handlePreviousCanonical(m_strsrch_, &status);
464            } else {
465            	usearch_handlePreviousExact(m_strsrch_, &status);
466            }
467
468            if (U_FAILURE(status)) {
469            	return USEARCH_DONE;
470            }
471
472            return m_search_->matchedIndex;
473#endif
474        }
475
476        return m_search_->matchedIndex;
477    }
478    return USEARCH_DONE;
479}
480
481U_NAMESPACE_END
482
483#endif /* #if !UCONFIG_NO_COLLATION */
484