12e5b6d6dSopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
32e5b6d6dSopenharmony_ci//
42e5b6d6dSopenharmony_ci//  file:  rbbiscan.cpp
52e5b6d6dSopenharmony_ci//
62e5b6d6dSopenharmony_ci//  Copyright (C) 2002-2016, International Business Machines Corporation and others.
72e5b6d6dSopenharmony_ci//  All Rights Reserved.
82e5b6d6dSopenharmony_ci//
92e5b6d6dSopenharmony_ci//  This file contains the Rule Based Break Iterator Rule Builder functions for
102e5b6d6dSopenharmony_ci//   scanning the rules and assembling a parse tree.  This is the first phase
112e5b6d6dSopenharmony_ci//   of compiling the rules.
122e5b6d6dSopenharmony_ci//
132e5b6d6dSopenharmony_ci//  The overall of the rules is managed by class RBBIRuleBuilder, which will
142e5b6d6dSopenharmony_ci//  create and use an instance of this class as part of the process.
152e5b6d6dSopenharmony_ci//
162e5b6d6dSopenharmony_ci
172e5b6d6dSopenharmony_ci#include "unicode/utypes.h"
182e5b6d6dSopenharmony_ci
192e5b6d6dSopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION
202e5b6d6dSopenharmony_ci
212e5b6d6dSopenharmony_ci#include "unicode/unistr.h"
222e5b6d6dSopenharmony_ci#include "unicode/uniset.h"
232e5b6d6dSopenharmony_ci#include "unicode/uchar.h"
242e5b6d6dSopenharmony_ci#include "unicode/uchriter.h"
252e5b6d6dSopenharmony_ci#include "unicode/parsepos.h"
262e5b6d6dSopenharmony_ci#include "unicode/parseerr.h"
272e5b6d6dSopenharmony_ci#include "cmemory.h"
282e5b6d6dSopenharmony_ci#include "cstring.h"
292e5b6d6dSopenharmony_ci
302e5b6d6dSopenharmony_ci#include "rbbirpt.h"   // Contains state table for the rbbi rules parser.
312e5b6d6dSopenharmony_ci                       //   generated by a Perl script.
322e5b6d6dSopenharmony_ci#include "rbbirb.h"
332e5b6d6dSopenharmony_ci#include "rbbinode.h"
342e5b6d6dSopenharmony_ci#include "rbbiscan.h"
352e5b6d6dSopenharmony_ci#include "rbbitblb.h"
362e5b6d6dSopenharmony_ci
372e5b6d6dSopenharmony_ci#include "uassert.h"
382e5b6d6dSopenharmony_ci
392e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
402e5b6d6dSopenharmony_ci//
412e5b6d6dSopenharmony_ci// Unicode Set init strings for each of the character classes needed for parsing a rule file.
422e5b6d6dSopenharmony_ci//               (Initialized with hex values for portability to EBCDIC based machines.
432e5b6d6dSopenharmony_ci//                Really ugly, but there's no good way to avoid it.)
442e5b6d6dSopenharmony_ci//
452e5b6d6dSopenharmony_ci//              The sets are referred to by name in the rbbirpt.txt, which is the
462e5b6d6dSopenharmony_ci//              source form of the state transition table for the RBBI rule parser.
472e5b6d6dSopenharmony_ci//
482e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
492e5b6d6dSopenharmony_cistatic const UChar gRuleSet_rule_char_pattern[]       = {
502e5b6d6dSopenharmony_ci // Characters that may appear as literals in patterns without escaping or quoting.
512e5b6d6dSopenharmony_ci //   [    ^      [    \     p     {      Z     }     \     u    0      0    2      0
522e5b6d6dSopenharmony_ci    0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30,
532e5b6d6dSopenharmony_ci //   -    \      u    0     0     7      f     ]     -     [    \      p
542e5b6d6dSopenharmony_ci    0x2d, 0x5c, 0x75, 0x30, 0x30, 0x37, 0x66, 0x5d, 0x2d, 0x5b, 0x5c, 0x70,
552e5b6d6dSopenharmony_ci //   {     L     }    ]     -     [      \     p     {     N    }      ]     ]
562e5b6d6dSopenharmony_ci    0x7b, 0x4c, 0x7d, 0x5d, 0x2d, 0x5b, 0x5c, 0x70, 0x7b, 0x4e, 0x7d, 0x5d, 0x5d, 0};
572e5b6d6dSopenharmony_ci
582e5b6d6dSopenharmony_cistatic const UChar gRuleSet_name_char_pattern[]       = {
592e5b6d6dSopenharmony_ci//    [    _      \    p     {     L      }     \     p     {    N      }     ]
602e5b6d6dSopenharmony_ci    0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5c, 0x70, 0x7b, 0x4e, 0x7d, 0x5d, 0};
612e5b6d6dSopenharmony_ci
622e5b6d6dSopenharmony_cistatic const UChar gRuleSet_digit_char_pattern[] = {
632e5b6d6dSopenharmony_ci//    [    0      -    9     ]
642e5b6d6dSopenharmony_ci    0x5b, 0x30, 0x2d, 0x39, 0x5d, 0};
652e5b6d6dSopenharmony_ci
662e5b6d6dSopenharmony_cistatic const UChar gRuleSet_name_start_char_pattern[] = {
672e5b6d6dSopenharmony_ci//    [    _      \    p     {     L      }     ]
682e5b6d6dSopenharmony_ci    0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5d, 0 };
692e5b6d6dSopenharmony_ci
702e5b6d6dSopenharmony_cistatic const UChar kAny[] = {0x61, 0x6e, 0x79, 0x00};  // "any"
712e5b6d6dSopenharmony_ci
722e5b6d6dSopenharmony_ci
732e5b6d6dSopenharmony_ciU_CDECL_BEGIN
742e5b6d6dSopenharmony_cistatic void U_CALLCONV RBBISetTable_deleter(void *p) {
752e5b6d6dSopenharmony_ci    icu::RBBISetTableEl *px = (icu::RBBISetTableEl *)p;
762e5b6d6dSopenharmony_ci    delete px->key;
772e5b6d6dSopenharmony_ci    // Note:  px->val is owned by the linked list "fSetsListHead" in scanner.
782e5b6d6dSopenharmony_ci    //        Don't delete the value nodes here.
792e5b6d6dSopenharmony_ci    uprv_free(px);
802e5b6d6dSopenharmony_ci}
812e5b6d6dSopenharmony_ciU_CDECL_END
822e5b6d6dSopenharmony_ci
832e5b6d6dSopenharmony_ciU_NAMESPACE_BEGIN
842e5b6d6dSopenharmony_ci
852e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
862e5b6d6dSopenharmony_ci//
872e5b6d6dSopenharmony_ci//  Constructor.
882e5b6d6dSopenharmony_ci//
892e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
902e5b6d6dSopenharmony_ciRBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb)
912e5b6d6dSopenharmony_ci{
922e5b6d6dSopenharmony_ci    fRB                 = rb;
932e5b6d6dSopenharmony_ci    fScanIndex          = 0;
942e5b6d6dSopenharmony_ci    fNextIndex          = 0;
952e5b6d6dSopenharmony_ci    fQuoteMode          = false;
962e5b6d6dSopenharmony_ci    fLineNum            = 1;
972e5b6d6dSopenharmony_ci    fCharNum            = 0;
982e5b6d6dSopenharmony_ci    fLastChar           = 0;
992e5b6d6dSopenharmony_ci
1002e5b6d6dSopenharmony_ci    fStateTable         = NULL;
1012e5b6d6dSopenharmony_ci    fStack[0]           = 0;
1022e5b6d6dSopenharmony_ci    fStackPtr           = 0;
1032e5b6d6dSopenharmony_ci    fNodeStack[0]       = NULL;
1042e5b6d6dSopenharmony_ci    fNodeStackPtr       = 0;
1052e5b6d6dSopenharmony_ci
1062e5b6d6dSopenharmony_ci    fReverseRule        = false;
1072e5b6d6dSopenharmony_ci    fLookAheadRule      = false;
1082e5b6d6dSopenharmony_ci    fNoChainInRule      = false;
1092e5b6d6dSopenharmony_ci
1102e5b6d6dSopenharmony_ci    fSymbolTable        = NULL;
1112e5b6d6dSopenharmony_ci    fSetTable           = NULL;
1122e5b6d6dSopenharmony_ci    fRuleNum            = 0;
1132e5b6d6dSopenharmony_ci    fOptionStart        = 0;
1142e5b6d6dSopenharmony_ci
1152e5b6d6dSopenharmony_ci    // Do not check status until after all critical fields are sufficiently initialized
1162e5b6d6dSopenharmony_ci    //   that the destructor can run cleanly.
1172e5b6d6dSopenharmony_ci    if (U_FAILURE(*rb->fStatus)) {
1182e5b6d6dSopenharmony_ci        return;
1192e5b6d6dSopenharmony_ci    }
1202e5b6d6dSopenharmony_ci
1212e5b6d6dSopenharmony_ci    //
1222e5b6d6dSopenharmony_ci    //  Set up the constant Unicode Sets.
1232e5b6d6dSopenharmony_ci    //     Note:  These could be made static, lazily initialized, and shared among
1242e5b6d6dSopenharmony_ci    //            all instances of RBBIRuleScanners.  BUT this is quite a bit simpler,
1252e5b6d6dSopenharmony_ci    //            and the time to build these few sets should be small compared to a
1262e5b6d6dSopenharmony_ci    //            full break iterator build.
1272e5b6d6dSopenharmony_ci    fRuleSets[kRuleSet_rule_char-128]
1282e5b6d6dSopenharmony_ci        = UnicodeSet(UnicodeString(gRuleSet_rule_char_pattern),       *rb->fStatus);
1292e5b6d6dSopenharmony_ci    // fRuleSets[kRuleSet_white_space-128] = [:Pattern_White_Space:]
1302e5b6d6dSopenharmony_ci    fRuleSets[kRuleSet_white_space-128].
1312e5b6d6dSopenharmony_ci        add(9, 0xd).add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
1322e5b6d6dSopenharmony_ci    fRuleSets[kRuleSet_name_char-128]
1332e5b6d6dSopenharmony_ci        = UnicodeSet(UnicodeString(gRuleSet_name_char_pattern),       *rb->fStatus);
1342e5b6d6dSopenharmony_ci    fRuleSets[kRuleSet_name_start_char-128]
1352e5b6d6dSopenharmony_ci        = UnicodeSet(UnicodeString(gRuleSet_name_start_char_pattern), *rb->fStatus);
1362e5b6d6dSopenharmony_ci    fRuleSets[kRuleSet_digit_char-128]
1372e5b6d6dSopenharmony_ci        = UnicodeSet(UnicodeString(gRuleSet_digit_char_pattern),      *rb->fStatus);
1382e5b6d6dSopenharmony_ci    if (*rb->fStatus == U_ILLEGAL_ARGUMENT_ERROR) {
1392e5b6d6dSopenharmony_ci        // This case happens if ICU's data is missing.  UnicodeSet tries to look up property
1402e5b6d6dSopenharmony_ci        //   names from the init string, can't find them, and claims an illegal argument.
1412e5b6d6dSopenharmony_ci        //   Change the error so that the actual problem will be clearer to users.
1422e5b6d6dSopenharmony_ci        *rb->fStatus = U_BRK_INIT_ERROR;
1432e5b6d6dSopenharmony_ci    }
1442e5b6d6dSopenharmony_ci    if (U_FAILURE(*rb->fStatus)) {
1452e5b6d6dSopenharmony_ci        return;
1462e5b6d6dSopenharmony_ci    }
1472e5b6d6dSopenharmony_ci
1482e5b6d6dSopenharmony_ci    fSymbolTable = new RBBISymbolTable(this, rb->fRules, *rb->fStatus);
1492e5b6d6dSopenharmony_ci    if (fSymbolTable == NULL) {
1502e5b6d6dSopenharmony_ci        *rb->fStatus = U_MEMORY_ALLOCATION_ERROR;
1512e5b6d6dSopenharmony_ci        return;
1522e5b6d6dSopenharmony_ci    }
1532e5b6d6dSopenharmony_ci    fSetTable    = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, rb->fStatus);
1542e5b6d6dSopenharmony_ci    if (U_FAILURE(*rb->fStatus)) {
1552e5b6d6dSopenharmony_ci        return;
1562e5b6d6dSopenharmony_ci    }
1572e5b6d6dSopenharmony_ci    uhash_setValueDeleter(fSetTable, RBBISetTable_deleter);
1582e5b6d6dSopenharmony_ci}
1592e5b6d6dSopenharmony_ci
1602e5b6d6dSopenharmony_ci
1612e5b6d6dSopenharmony_ci
1622e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
1632e5b6d6dSopenharmony_ci//
1642e5b6d6dSopenharmony_ci//  Destructor
1652e5b6d6dSopenharmony_ci//
1662e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
1672e5b6d6dSopenharmony_ciRBBIRuleScanner::~RBBIRuleScanner() {
1682e5b6d6dSopenharmony_ci    delete fSymbolTable;
1692e5b6d6dSopenharmony_ci    if (fSetTable != NULL) {
1702e5b6d6dSopenharmony_ci         uhash_close(fSetTable);
1712e5b6d6dSopenharmony_ci         fSetTable = NULL;
1722e5b6d6dSopenharmony_ci
1732e5b6d6dSopenharmony_ci    }
1742e5b6d6dSopenharmony_ci
1752e5b6d6dSopenharmony_ci
1762e5b6d6dSopenharmony_ci    // Node Stack.
1772e5b6d6dSopenharmony_ci    //   Normally has one entry, which is the entire parse tree for the rules.
1782e5b6d6dSopenharmony_ci    //   If errors occurred, there may be additional subtrees left on the stack.
1792e5b6d6dSopenharmony_ci    while (fNodeStackPtr > 0) {
1802e5b6d6dSopenharmony_ci        delete fNodeStack[fNodeStackPtr];
1812e5b6d6dSopenharmony_ci        fNodeStackPtr--;
1822e5b6d6dSopenharmony_ci    }
1832e5b6d6dSopenharmony_ci
1842e5b6d6dSopenharmony_ci}
1852e5b6d6dSopenharmony_ci
1862e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
1872e5b6d6dSopenharmony_ci//
1882e5b6d6dSopenharmony_ci//  doParseAction        Do some action during rule parsing.
1892e5b6d6dSopenharmony_ci//                       Called by the parse state machine.
1902e5b6d6dSopenharmony_ci//                       Actions build the parse tree and Unicode Sets,
1912e5b6d6dSopenharmony_ci//                       and maintain the parse stack for nested expressions.
1922e5b6d6dSopenharmony_ci//
1932e5b6d6dSopenharmony_ci//                       TODO:  unify EParseAction and RBBI_RuleParseAction enum types.
1942e5b6d6dSopenharmony_ci//                              They represent exactly the same thing.  They're separate
1952e5b6d6dSopenharmony_ci//                              only to work around enum forward declaration restrictions
1962e5b6d6dSopenharmony_ci//                              in some compilers, while at the same time avoiding multiple
1972e5b6d6dSopenharmony_ci//                              definitions problems.  I'm sure that there's a better way.
1982e5b6d6dSopenharmony_ci//
1992e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
2002e5b6d6dSopenharmony_ciUBool RBBIRuleScanner::doParseActions(int32_t action)
2012e5b6d6dSopenharmony_ci{
2022e5b6d6dSopenharmony_ci    RBBINode *n       = NULL;
2032e5b6d6dSopenharmony_ci
2042e5b6d6dSopenharmony_ci    UBool   returnVal = true;
2052e5b6d6dSopenharmony_ci
2062e5b6d6dSopenharmony_ci    switch (action) {
2072e5b6d6dSopenharmony_ci
2082e5b6d6dSopenharmony_ci    case doExprStart:
2092e5b6d6dSopenharmony_ci        pushNewNode(RBBINode::opStart);
2102e5b6d6dSopenharmony_ci        fRuleNum++;
2112e5b6d6dSopenharmony_ci        break;
2122e5b6d6dSopenharmony_ci
2132e5b6d6dSopenharmony_ci
2142e5b6d6dSopenharmony_ci    case doNoChain:
2152e5b6d6dSopenharmony_ci        // Scanned a '^' while on the rule start state.
2162e5b6d6dSopenharmony_ci        fNoChainInRule = true;
2172e5b6d6dSopenharmony_ci        break;
2182e5b6d6dSopenharmony_ci
2192e5b6d6dSopenharmony_ci
2202e5b6d6dSopenharmony_ci    case doExprOrOperator:
2212e5b6d6dSopenharmony_ci        {
2222e5b6d6dSopenharmony_ci            fixOpStack(RBBINode::precOpCat);
2232e5b6d6dSopenharmony_ci            RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
2242e5b6d6dSopenharmony_ci            RBBINode  *orNode      = pushNewNode(RBBINode::opOr);
2252e5b6d6dSopenharmony_ci            if (U_FAILURE(*fRB->fStatus)) {
2262e5b6d6dSopenharmony_ci                break;
2272e5b6d6dSopenharmony_ci            }
2282e5b6d6dSopenharmony_ci            orNode->fLeftChild     = operandNode;
2292e5b6d6dSopenharmony_ci            operandNode->fParent   = orNode;
2302e5b6d6dSopenharmony_ci        }
2312e5b6d6dSopenharmony_ci        break;
2322e5b6d6dSopenharmony_ci
2332e5b6d6dSopenharmony_ci    case doExprCatOperator:
2342e5b6d6dSopenharmony_ci        // concatenation operator.
2352e5b6d6dSopenharmony_ci        // For the implicit concatenation of adjacent terms in an expression that are
2362e5b6d6dSopenharmony_ci        //   not separated by any other operator.  Action is invoked between the
2372e5b6d6dSopenharmony_ci        //   actions for the two terms.
2382e5b6d6dSopenharmony_ci        {
2392e5b6d6dSopenharmony_ci            fixOpStack(RBBINode::precOpCat);
2402e5b6d6dSopenharmony_ci            RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
2412e5b6d6dSopenharmony_ci            RBBINode  *catNode     = pushNewNode(RBBINode::opCat);
2422e5b6d6dSopenharmony_ci            if (U_FAILURE(*fRB->fStatus)) {
2432e5b6d6dSopenharmony_ci                break;
2442e5b6d6dSopenharmony_ci            }
2452e5b6d6dSopenharmony_ci            catNode->fLeftChild    = operandNode;
2462e5b6d6dSopenharmony_ci            operandNode->fParent   = catNode;
2472e5b6d6dSopenharmony_ci        }
2482e5b6d6dSopenharmony_ci        break;
2492e5b6d6dSopenharmony_ci
2502e5b6d6dSopenharmony_ci    case doLParen:
2512e5b6d6dSopenharmony_ci        // Open Paren.
2522e5b6d6dSopenharmony_ci        //   The openParen node is a dummy operation type with a low precedence,
2532e5b6d6dSopenharmony_ci        //     which has the affect of ensuring that any real binary op that
2542e5b6d6dSopenharmony_ci        //     follows within the parens binds more tightly to the operands than
2552e5b6d6dSopenharmony_ci        //     stuff outside of the parens.
2562e5b6d6dSopenharmony_ci        pushNewNode(RBBINode::opLParen);
2572e5b6d6dSopenharmony_ci        break;
2582e5b6d6dSopenharmony_ci
2592e5b6d6dSopenharmony_ci    case doExprRParen:
2602e5b6d6dSopenharmony_ci        fixOpStack(RBBINode::precLParen);
2612e5b6d6dSopenharmony_ci        break;
2622e5b6d6dSopenharmony_ci
2632e5b6d6dSopenharmony_ci    case doNOP:
2642e5b6d6dSopenharmony_ci        break;
2652e5b6d6dSopenharmony_ci
2662e5b6d6dSopenharmony_ci    case doStartAssign:
2672e5b6d6dSopenharmony_ci        // We've just scanned "$variable = "
2682e5b6d6dSopenharmony_ci        // The top of the node stack has the $variable ref node.
2692e5b6d6dSopenharmony_ci
2702e5b6d6dSopenharmony_ci        // Save the start position of the RHS text in the StartExpression node
2712e5b6d6dSopenharmony_ci        //   that precedes the $variableReference node on the stack.
2722e5b6d6dSopenharmony_ci        //   This will eventually be used when saving the full $variable replacement
2732e5b6d6dSopenharmony_ci        //   text as a string.
2742e5b6d6dSopenharmony_ci        n = fNodeStack[fNodeStackPtr-1];
2752e5b6d6dSopenharmony_ci        n->fFirstPos = fNextIndex;              // move past the '='
2762e5b6d6dSopenharmony_ci
2772e5b6d6dSopenharmony_ci        // Push a new start-of-expression node; needed to keep parse of the
2782e5b6d6dSopenharmony_ci        //   RHS expression happy.
2792e5b6d6dSopenharmony_ci        pushNewNode(RBBINode::opStart);
2802e5b6d6dSopenharmony_ci        break;
2812e5b6d6dSopenharmony_ci
2822e5b6d6dSopenharmony_ci
2832e5b6d6dSopenharmony_ci
2842e5b6d6dSopenharmony_ci
2852e5b6d6dSopenharmony_ci    case doEndAssign:
2862e5b6d6dSopenharmony_ci        {
2872e5b6d6dSopenharmony_ci            // We have reached the end of an assignment statement.
2882e5b6d6dSopenharmony_ci            //   Current scan char is the ';' that terminates the assignment.
2892e5b6d6dSopenharmony_ci
2902e5b6d6dSopenharmony_ci            // Terminate expression, leaves expression parse tree rooted in TOS node.
2912e5b6d6dSopenharmony_ci            fixOpStack(RBBINode::precStart);
2922e5b6d6dSopenharmony_ci
2932e5b6d6dSopenharmony_ci            RBBINode *startExprNode  = fNodeStack[fNodeStackPtr-2];
2942e5b6d6dSopenharmony_ci            RBBINode *varRefNode     = fNodeStack[fNodeStackPtr-1];
2952e5b6d6dSopenharmony_ci            RBBINode *RHSExprNode    = fNodeStack[fNodeStackPtr];
2962e5b6d6dSopenharmony_ci
2972e5b6d6dSopenharmony_ci            // Save original text of right side of assignment, excluding the terminating ';'
2982e5b6d6dSopenharmony_ci            //  in the root of the node for the right-hand-side expression.
2992e5b6d6dSopenharmony_ci            RHSExprNode->fFirstPos = startExprNode->fFirstPos;
3002e5b6d6dSopenharmony_ci            RHSExprNode->fLastPos  = fScanIndex;
3012e5b6d6dSopenharmony_ci            fRB->fRules.extractBetween(RHSExprNode->fFirstPos, RHSExprNode->fLastPos, RHSExprNode->fText);
3022e5b6d6dSopenharmony_ci
3032e5b6d6dSopenharmony_ci            // Expression parse tree becomes l. child of the $variable reference node.
3042e5b6d6dSopenharmony_ci            varRefNode->fLeftChild = RHSExprNode;
3052e5b6d6dSopenharmony_ci            RHSExprNode->fParent   = varRefNode;
3062e5b6d6dSopenharmony_ci
3072e5b6d6dSopenharmony_ci            // Make a symbol table entry for the $variableRef node.
3082e5b6d6dSopenharmony_ci            fSymbolTable->addEntry(varRefNode->fText, varRefNode, *fRB->fStatus);
3092e5b6d6dSopenharmony_ci            if (U_FAILURE(*fRB->fStatus)) {
3102e5b6d6dSopenharmony_ci                // This is a round-about way to get the parse position set
3112e5b6d6dSopenharmony_ci                //  so that duplicate symbols error messages include a line number.
3122e5b6d6dSopenharmony_ci                UErrorCode t = *fRB->fStatus;
3132e5b6d6dSopenharmony_ci                *fRB->fStatus = U_ZERO_ERROR;
3142e5b6d6dSopenharmony_ci                error(t);
3152e5b6d6dSopenharmony_ci            }
3162e5b6d6dSopenharmony_ci
3172e5b6d6dSopenharmony_ci            // Clean up the stack.
3182e5b6d6dSopenharmony_ci            delete startExprNode;
3192e5b6d6dSopenharmony_ci            fNodeStackPtr-=3;
3202e5b6d6dSopenharmony_ci            break;
3212e5b6d6dSopenharmony_ci        }
3222e5b6d6dSopenharmony_ci
3232e5b6d6dSopenharmony_ci    case doEndOfRule:
3242e5b6d6dSopenharmony_ci        {
3252e5b6d6dSopenharmony_ci        fixOpStack(RBBINode::precStart);      // Terminate expression, leaves expression
3262e5b6d6dSopenharmony_ci        if (U_FAILURE(*fRB->fStatus)) {       //   parse tree rooted in TOS node.
3272e5b6d6dSopenharmony_ci            break;
3282e5b6d6dSopenharmony_ci        }
3292e5b6d6dSopenharmony_ci#ifdef RBBI_DEBUG
3302e5b6d6dSopenharmony_ci        if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeStack("end of rule");}
3312e5b6d6dSopenharmony_ci#endif
3322e5b6d6dSopenharmony_ci        U_ASSERT(fNodeStackPtr == 1);
3332e5b6d6dSopenharmony_ci        RBBINode *thisRule = fNodeStack[fNodeStackPtr];
3342e5b6d6dSopenharmony_ci
3352e5b6d6dSopenharmony_ci        // If this rule includes a look-ahead '/', add a endMark node to the
3362e5b6d6dSopenharmony_ci        //   expression tree.
3372e5b6d6dSopenharmony_ci        if (fLookAheadRule) {
3382e5b6d6dSopenharmony_ci            RBBINode  *endNode        = pushNewNode(RBBINode::endMark);
3392e5b6d6dSopenharmony_ci            RBBINode  *catNode        = pushNewNode(RBBINode::opCat);
3402e5b6d6dSopenharmony_ci            if (U_FAILURE(*fRB->fStatus)) {
3412e5b6d6dSopenharmony_ci                break;
3422e5b6d6dSopenharmony_ci            }
3432e5b6d6dSopenharmony_ci            fNodeStackPtr -= 2;
3442e5b6d6dSopenharmony_ci            catNode->fLeftChild       = thisRule;
3452e5b6d6dSopenharmony_ci            catNode->fRightChild      = endNode;
3462e5b6d6dSopenharmony_ci            fNodeStack[fNodeStackPtr] = catNode;
3472e5b6d6dSopenharmony_ci            endNode->fVal             = fRuleNum;
3482e5b6d6dSopenharmony_ci            endNode->fLookAheadEnd    = true;
3492e5b6d6dSopenharmony_ci            thisRule                  = catNode;
3502e5b6d6dSopenharmony_ci
3512e5b6d6dSopenharmony_ci            // TODO: Disable chaining out of look-ahead (hard break) rules.
3522e5b6d6dSopenharmony_ci            //   The break on rule match is forced, so there is no point in building up
3532e5b6d6dSopenharmony_ci            //   the state table to chain into another rule for a longer match.
3542e5b6d6dSopenharmony_ci        }
3552e5b6d6dSopenharmony_ci
3562e5b6d6dSopenharmony_ci        // Mark this node as being the root of a rule.
3572e5b6d6dSopenharmony_ci        thisRule->fRuleRoot = true;
3582e5b6d6dSopenharmony_ci
3592e5b6d6dSopenharmony_ci        // Flag if chaining into this rule is wanted.
3602e5b6d6dSopenharmony_ci        //
3612e5b6d6dSopenharmony_ci        if (fRB->fChainRules &&         // If rule chaining is enabled globally via !!chain
3622e5b6d6dSopenharmony_ci                !fNoChainInRule) {      //     and no '^' chain-in inhibit was on this rule
3632e5b6d6dSopenharmony_ci            thisRule->fChainIn = true;
3642e5b6d6dSopenharmony_ci        }
3652e5b6d6dSopenharmony_ci
3662e5b6d6dSopenharmony_ci
3672e5b6d6dSopenharmony_ci        // All rule expressions are ORed together.
3682e5b6d6dSopenharmony_ci        // The ';' that terminates an expression really just functions as a '|' with
3692e5b6d6dSopenharmony_ci        //   a low operator prededence.
3702e5b6d6dSopenharmony_ci        //
3712e5b6d6dSopenharmony_ci        // Each of the four sets of rules are collected separately.
3722e5b6d6dSopenharmony_ci        //  (forward, reverse, safe_forward, safe_reverse)
3732e5b6d6dSopenharmony_ci        //  OR this rule into the appropriate group of them.
3742e5b6d6dSopenharmony_ci        //
3752e5b6d6dSopenharmony_ci        RBBINode **destRules = (fReverseRule? &fRB->fSafeRevTree : fRB->fDefaultTree);
3762e5b6d6dSopenharmony_ci
3772e5b6d6dSopenharmony_ci        if (*destRules != NULL) {
3782e5b6d6dSopenharmony_ci            // This is not the first rule encountered.
3792e5b6d6dSopenharmony_ci            // OR previous stuff  (from *destRules)
3802e5b6d6dSopenharmony_ci            // with the current rule expression (on the Node Stack)
3812e5b6d6dSopenharmony_ci            //  with the resulting OR expression going to *destRules
3822e5b6d6dSopenharmony_ci            //
3832e5b6d6dSopenharmony_ci                       thisRule    = fNodeStack[fNodeStackPtr];
3842e5b6d6dSopenharmony_ci            RBBINode  *prevRules   = *destRules;
3852e5b6d6dSopenharmony_ci            RBBINode  *orNode      = pushNewNode(RBBINode::opOr);
3862e5b6d6dSopenharmony_ci            if (U_FAILURE(*fRB->fStatus)) {
3872e5b6d6dSopenharmony_ci                break;
3882e5b6d6dSopenharmony_ci            }
3892e5b6d6dSopenharmony_ci            orNode->fLeftChild     = prevRules;
3902e5b6d6dSopenharmony_ci            prevRules->fParent     = orNode;
3912e5b6d6dSopenharmony_ci            orNode->fRightChild    = thisRule;
3922e5b6d6dSopenharmony_ci            thisRule->fParent      = orNode;
3932e5b6d6dSopenharmony_ci            *destRules             = orNode;
3942e5b6d6dSopenharmony_ci        }
3952e5b6d6dSopenharmony_ci        else
3962e5b6d6dSopenharmony_ci        {
3972e5b6d6dSopenharmony_ci            // This is the first rule encountered (for this direction).
3982e5b6d6dSopenharmony_ci            // Just move its parse tree from the stack to *destRules.
3992e5b6d6dSopenharmony_ci            *destRules = fNodeStack[fNodeStackPtr];
4002e5b6d6dSopenharmony_ci        }
4012e5b6d6dSopenharmony_ci        fReverseRule   = false;   // in preparation for the next rule.
4022e5b6d6dSopenharmony_ci        fLookAheadRule = false;
4032e5b6d6dSopenharmony_ci        fNoChainInRule = false;
4042e5b6d6dSopenharmony_ci        fNodeStackPtr  = 0;
4052e5b6d6dSopenharmony_ci        }
4062e5b6d6dSopenharmony_ci        break;
4072e5b6d6dSopenharmony_ci
4082e5b6d6dSopenharmony_ci
4092e5b6d6dSopenharmony_ci    case doRuleError:
4102e5b6d6dSopenharmony_ci        error(U_BRK_RULE_SYNTAX);
4112e5b6d6dSopenharmony_ci        returnVal = false;
4122e5b6d6dSopenharmony_ci        break;
4132e5b6d6dSopenharmony_ci
4142e5b6d6dSopenharmony_ci
4152e5b6d6dSopenharmony_ci    case doVariableNameExpectedErr:
4162e5b6d6dSopenharmony_ci        error(U_BRK_RULE_SYNTAX);
4172e5b6d6dSopenharmony_ci        break;
4182e5b6d6dSopenharmony_ci
4192e5b6d6dSopenharmony_ci
4202e5b6d6dSopenharmony_ci    //
4212e5b6d6dSopenharmony_ci    //  Unary operands  + ? *
4222e5b6d6dSopenharmony_ci    //    These all appear after the operand to which they apply.
4232e5b6d6dSopenharmony_ci    //    When we hit one, the operand (may be a whole sub expression)
4242e5b6d6dSopenharmony_ci    //    will be on the top of the stack.
4252e5b6d6dSopenharmony_ci    //    Unary Operator becomes TOS, with the old TOS as its one child.
4262e5b6d6dSopenharmony_ci    case doUnaryOpPlus:
4272e5b6d6dSopenharmony_ci        {
4282e5b6d6dSopenharmony_ci            RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
4292e5b6d6dSopenharmony_ci            RBBINode  *plusNode    = pushNewNode(RBBINode::opPlus);
4302e5b6d6dSopenharmony_ci            if (U_FAILURE(*fRB->fStatus)) {
4312e5b6d6dSopenharmony_ci                break;
4322e5b6d6dSopenharmony_ci            }
4332e5b6d6dSopenharmony_ci            plusNode->fLeftChild   = operandNode;
4342e5b6d6dSopenharmony_ci            operandNode->fParent   = plusNode;
4352e5b6d6dSopenharmony_ci        }
4362e5b6d6dSopenharmony_ci        break;
4372e5b6d6dSopenharmony_ci
4382e5b6d6dSopenharmony_ci    case doUnaryOpQuestion:
4392e5b6d6dSopenharmony_ci        {
4402e5b6d6dSopenharmony_ci            RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
4412e5b6d6dSopenharmony_ci            RBBINode  *qNode       = pushNewNode(RBBINode::opQuestion);
4422e5b6d6dSopenharmony_ci            if (U_FAILURE(*fRB->fStatus)) {
4432e5b6d6dSopenharmony_ci                break;
4442e5b6d6dSopenharmony_ci            }
4452e5b6d6dSopenharmony_ci            qNode->fLeftChild      = operandNode;
4462e5b6d6dSopenharmony_ci            operandNode->fParent   = qNode;
4472e5b6d6dSopenharmony_ci        }
4482e5b6d6dSopenharmony_ci        break;
4492e5b6d6dSopenharmony_ci
4502e5b6d6dSopenharmony_ci    case doUnaryOpStar:
4512e5b6d6dSopenharmony_ci        {
4522e5b6d6dSopenharmony_ci            RBBINode  *operandNode = fNodeStack[fNodeStackPtr--];
4532e5b6d6dSopenharmony_ci            RBBINode  *starNode    = pushNewNode(RBBINode::opStar);
4542e5b6d6dSopenharmony_ci            if (U_FAILURE(*fRB->fStatus)) {
4552e5b6d6dSopenharmony_ci                break;
4562e5b6d6dSopenharmony_ci            }
4572e5b6d6dSopenharmony_ci            starNode->fLeftChild   = operandNode;
4582e5b6d6dSopenharmony_ci            operandNode->fParent   = starNode;
4592e5b6d6dSopenharmony_ci        }
4602e5b6d6dSopenharmony_ci        break;
4612e5b6d6dSopenharmony_ci
4622e5b6d6dSopenharmony_ci    case doRuleChar:
4632e5b6d6dSopenharmony_ci        // A "Rule Character" is any single character that is a literal part
4642e5b6d6dSopenharmony_ci        // of the regular expression.  Like a, b and c in the expression "(abc*) | [:L:]"
4652e5b6d6dSopenharmony_ci        // These are pretty uncommon in break rules; the terms are more commonly
4662e5b6d6dSopenharmony_ci        //  sets.  To keep things uniform, treat these characters like as
4672e5b6d6dSopenharmony_ci        // sets that just happen to contain only one character.
4682e5b6d6dSopenharmony_ci        {
4692e5b6d6dSopenharmony_ci            n = pushNewNode(RBBINode::setRef);
4702e5b6d6dSopenharmony_ci            if (U_FAILURE(*fRB->fStatus)) {
4712e5b6d6dSopenharmony_ci                break;
4722e5b6d6dSopenharmony_ci            }
4732e5b6d6dSopenharmony_ci            findSetFor(UnicodeString(fC.fChar), n);
4742e5b6d6dSopenharmony_ci            n->fFirstPos = fScanIndex;
4752e5b6d6dSopenharmony_ci            n->fLastPos  = fNextIndex;
4762e5b6d6dSopenharmony_ci            fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
4772e5b6d6dSopenharmony_ci            break;
4782e5b6d6dSopenharmony_ci        }
4792e5b6d6dSopenharmony_ci
4802e5b6d6dSopenharmony_ci    case doDotAny:
4812e5b6d6dSopenharmony_ci        // scanned a ".", meaning match any single character.
4822e5b6d6dSopenharmony_ci        {
4832e5b6d6dSopenharmony_ci            n = pushNewNode(RBBINode::setRef);
4842e5b6d6dSopenharmony_ci            if (U_FAILURE(*fRB->fStatus)) {
4852e5b6d6dSopenharmony_ci                break;
4862e5b6d6dSopenharmony_ci            }
4872e5b6d6dSopenharmony_ci            findSetFor(UnicodeString(true, kAny, 3), n);
4882e5b6d6dSopenharmony_ci            n->fFirstPos = fScanIndex;
4892e5b6d6dSopenharmony_ci            n->fLastPos  = fNextIndex;
4902e5b6d6dSopenharmony_ci            fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
4912e5b6d6dSopenharmony_ci            break;
4922e5b6d6dSopenharmony_ci        }
4932e5b6d6dSopenharmony_ci
4942e5b6d6dSopenharmony_ci    case doSlash:
4952e5b6d6dSopenharmony_ci        // Scanned a '/', which identifies a look-ahead break position in a rule.
4962e5b6d6dSopenharmony_ci        n = pushNewNode(RBBINode::lookAhead);
4972e5b6d6dSopenharmony_ci        if (U_FAILURE(*fRB->fStatus)) {
4982e5b6d6dSopenharmony_ci            break;
4992e5b6d6dSopenharmony_ci        }
5002e5b6d6dSopenharmony_ci        n->fVal      = fRuleNum;
5012e5b6d6dSopenharmony_ci        n->fFirstPos = fScanIndex;
5022e5b6d6dSopenharmony_ci        n->fLastPos  = fNextIndex;
5032e5b6d6dSopenharmony_ci        fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
5042e5b6d6dSopenharmony_ci        fLookAheadRule = true;
5052e5b6d6dSopenharmony_ci        break;
5062e5b6d6dSopenharmony_ci
5072e5b6d6dSopenharmony_ci
5082e5b6d6dSopenharmony_ci    case doStartTagValue:
5092e5b6d6dSopenharmony_ci        // Scanned a '{', the opening delimiter for a tag value within a rule.
5102e5b6d6dSopenharmony_ci        n = pushNewNode(RBBINode::tag);
5112e5b6d6dSopenharmony_ci        if (U_FAILURE(*fRB->fStatus)) {
5122e5b6d6dSopenharmony_ci            break;
5132e5b6d6dSopenharmony_ci        }
5142e5b6d6dSopenharmony_ci        n->fVal      = 0;
5152e5b6d6dSopenharmony_ci        n->fFirstPos = fScanIndex;
5162e5b6d6dSopenharmony_ci        n->fLastPos  = fNextIndex;
5172e5b6d6dSopenharmony_ci        break;
5182e5b6d6dSopenharmony_ci
5192e5b6d6dSopenharmony_ci    case doTagDigit:
5202e5b6d6dSopenharmony_ci        // Just scanned a decimal digit that's part of a tag value
5212e5b6d6dSopenharmony_ci        {
5222e5b6d6dSopenharmony_ci            n = fNodeStack[fNodeStackPtr];
5232e5b6d6dSopenharmony_ci            uint32_t v = u_charDigitValue(fC.fChar);
5242e5b6d6dSopenharmony_ci            U_ASSERT(v < 10);
5252e5b6d6dSopenharmony_ci            n->fVal = n->fVal*10 + v;
5262e5b6d6dSopenharmony_ci            break;
5272e5b6d6dSopenharmony_ci        }
5282e5b6d6dSopenharmony_ci
5292e5b6d6dSopenharmony_ci    case doTagValue:
5302e5b6d6dSopenharmony_ci        n = fNodeStack[fNodeStackPtr];
5312e5b6d6dSopenharmony_ci        n->fLastPos = fNextIndex;
5322e5b6d6dSopenharmony_ci        fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
5332e5b6d6dSopenharmony_ci        break;
5342e5b6d6dSopenharmony_ci
5352e5b6d6dSopenharmony_ci    case doTagExpectedError:
5362e5b6d6dSopenharmony_ci        error(U_BRK_MALFORMED_RULE_TAG);
5372e5b6d6dSopenharmony_ci        returnVal = false;
5382e5b6d6dSopenharmony_ci        break;
5392e5b6d6dSopenharmony_ci
5402e5b6d6dSopenharmony_ci    case doOptionStart:
5412e5b6d6dSopenharmony_ci        // Scanning a !!option.   At the start of string.
5422e5b6d6dSopenharmony_ci        fOptionStart = fScanIndex;
5432e5b6d6dSopenharmony_ci        break;
5442e5b6d6dSopenharmony_ci
5452e5b6d6dSopenharmony_ci    case doOptionEnd:
5462e5b6d6dSopenharmony_ci        {
5472e5b6d6dSopenharmony_ci            UnicodeString opt(fRB->fRules, fOptionStart, fScanIndex-fOptionStart);
5482e5b6d6dSopenharmony_ci            if (opt == UNICODE_STRING("chain", 5)) {
5492e5b6d6dSopenharmony_ci                fRB->fChainRules = true;
5502e5b6d6dSopenharmony_ci            } else if (opt == UNICODE_STRING("LBCMNoChain", 11)) {
5512e5b6d6dSopenharmony_ci                fRB->fLBCMNoChain = true;
5522e5b6d6dSopenharmony_ci            } else if (opt == UNICODE_STRING("forward", 7)) {
5532e5b6d6dSopenharmony_ci                fRB->fDefaultTree   = &fRB->fForwardTree;
5542e5b6d6dSopenharmony_ci            } else if (opt == UNICODE_STRING("reverse", 7)) {
5552e5b6d6dSopenharmony_ci                fRB->fDefaultTree   = &fRB->fReverseTree;
5562e5b6d6dSopenharmony_ci            } else if (opt == UNICODE_STRING("safe_forward", 12)) {
5572e5b6d6dSopenharmony_ci                fRB->fDefaultTree   = &fRB->fSafeFwdTree;
5582e5b6d6dSopenharmony_ci            } else if (opt == UNICODE_STRING("safe_reverse", 12)) {
5592e5b6d6dSopenharmony_ci                fRB->fDefaultTree   = &fRB->fSafeRevTree;
5602e5b6d6dSopenharmony_ci            } else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) {
5612e5b6d6dSopenharmony_ci                fRB->fLookAheadHardBreak = true;
5622e5b6d6dSopenharmony_ci            } else if (opt == UNICODE_STRING("quoted_literals_only", 20)) {
5632e5b6d6dSopenharmony_ci                fRuleSets[kRuleSet_rule_char-128].clear();
5642e5b6d6dSopenharmony_ci            } else if (opt == UNICODE_STRING("unquoted_literals",  17)) {
5652e5b6d6dSopenharmony_ci                fRuleSets[kRuleSet_rule_char-128].applyPattern(UnicodeString(gRuleSet_rule_char_pattern), *fRB->fStatus);
5662e5b6d6dSopenharmony_ci            } else {
5672e5b6d6dSopenharmony_ci                error(U_BRK_UNRECOGNIZED_OPTION);
5682e5b6d6dSopenharmony_ci            }
5692e5b6d6dSopenharmony_ci        }
5702e5b6d6dSopenharmony_ci        break;
5712e5b6d6dSopenharmony_ci
5722e5b6d6dSopenharmony_ci    case doReverseDir:
5732e5b6d6dSopenharmony_ci        fReverseRule = true;
5742e5b6d6dSopenharmony_ci        break;
5752e5b6d6dSopenharmony_ci
5762e5b6d6dSopenharmony_ci    case doStartVariableName:
5772e5b6d6dSopenharmony_ci        n = pushNewNode(RBBINode::varRef);
5782e5b6d6dSopenharmony_ci        if (U_FAILURE(*fRB->fStatus)) {
5792e5b6d6dSopenharmony_ci            break;
5802e5b6d6dSopenharmony_ci        }
5812e5b6d6dSopenharmony_ci        n->fFirstPos = fScanIndex;
5822e5b6d6dSopenharmony_ci        break;
5832e5b6d6dSopenharmony_ci
5842e5b6d6dSopenharmony_ci    case doEndVariableName:
5852e5b6d6dSopenharmony_ci        n = fNodeStack[fNodeStackPtr];
5862e5b6d6dSopenharmony_ci        if (n==NULL || n->fType != RBBINode::varRef) {
5872e5b6d6dSopenharmony_ci            error(U_BRK_INTERNAL_ERROR);
5882e5b6d6dSopenharmony_ci            break;
5892e5b6d6dSopenharmony_ci        }
5902e5b6d6dSopenharmony_ci        n->fLastPos = fScanIndex;
5912e5b6d6dSopenharmony_ci        fRB->fRules.extractBetween(n->fFirstPos+1, n->fLastPos, n->fText);
5922e5b6d6dSopenharmony_ci        // Look the newly scanned name up in the symbol table
5932e5b6d6dSopenharmony_ci        //   If there's an entry, set the l. child of the var ref to the replacement expression.
5942e5b6d6dSopenharmony_ci        //   (We also pass through here when scanning assignments, but no harm is done, other
5952e5b6d6dSopenharmony_ci        //    than a slight wasted effort that seems hard to avoid.  Lookup will be null)
5962e5b6d6dSopenharmony_ci        n->fLeftChild = fSymbolTable->lookupNode(n->fText);
5972e5b6d6dSopenharmony_ci        break;
5982e5b6d6dSopenharmony_ci
5992e5b6d6dSopenharmony_ci    case doCheckVarDef:
6002e5b6d6dSopenharmony_ci        n = fNodeStack[fNodeStackPtr];
6012e5b6d6dSopenharmony_ci        if (n->fLeftChild == NULL) {
6022e5b6d6dSopenharmony_ci            error(U_BRK_UNDEFINED_VARIABLE);
6032e5b6d6dSopenharmony_ci            returnVal = false;
6042e5b6d6dSopenharmony_ci        }
6052e5b6d6dSopenharmony_ci        break;
6062e5b6d6dSopenharmony_ci
6072e5b6d6dSopenharmony_ci    case doExprFinished:
6082e5b6d6dSopenharmony_ci        break;
6092e5b6d6dSopenharmony_ci
6102e5b6d6dSopenharmony_ci    case doRuleErrorAssignExpr:
6112e5b6d6dSopenharmony_ci        error(U_BRK_ASSIGN_ERROR);
6122e5b6d6dSopenharmony_ci        returnVal = false;
6132e5b6d6dSopenharmony_ci        break;
6142e5b6d6dSopenharmony_ci
6152e5b6d6dSopenharmony_ci    case doExit:
6162e5b6d6dSopenharmony_ci        returnVal = false;
6172e5b6d6dSopenharmony_ci        break;
6182e5b6d6dSopenharmony_ci
6192e5b6d6dSopenharmony_ci    case doScanUnicodeSet:
6202e5b6d6dSopenharmony_ci        scanSet();
6212e5b6d6dSopenharmony_ci        break;
6222e5b6d6dSopenharmony_ci
6232e5b6d6dSopenharmony_ci    default:
6242e5b6d6dSopenharmony_ci        error(U_BRK_INTERNAL_ERROR);
6252e5b6d6dSopenharmony_ci        returnVal = false;
6262e5b6d6dSopenharmony_ci        break;
6272e5b6d6dSopenharmony_ci    }
6282e5b6d6dSopenharmony_ci    return returnVal && U_SUCCESS(*fRB->fStatus);
6292e5b6d6dSopenharmony_ci}
6302e5b6d6dSopenharmony_ci
6312e5b6d6dSopenharmony_ci
6322e5b6d6dSopenharmony_ci
6332e5b6d6dSopenharmony_ci
6342e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
6352e5b6d6dSopenharmony_ci//
6362e5b6d6dSopenharmony_ci//  Error         Report a rule parse error.
6372e5b6d6dSopenharmony_ci//                Only report it if no previous error has been recorded.
6382e5b6d6dSopenharmony_ci//
6392e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
6402e5b6d6dSopenharmony_civoid RBBIRuleScanner::error(UErrorCode e) {
6412e5b6d6dSopenharmony_ci    if (U_SUCCESS(*fRB->fStatus)) {
6422e5b6d6dSopenharmony_ci        *fRB->fStatus = e;
6432e5b6d6dSopenharmony_ci        if (fRB->fParseError) {
6442e5b6d6dSopenharmony_ci            fRB->fParseError->line  = fLineNum;
6452e5b6d6dSopenharmony_ci            fRB->fParseError->offset = fCharNum;
6462e5b6d6dSopenharmony_ci            fRB->fParseError->preContext[0] = 0;
6472e5b6d6dSopenharmony_ci            fRB->fParseError->postContext[0] = 0;
6482e5b6d6dSopenharmony_ci        }
6492e5b6d6dSopenharmony_ci    }
6502e5b6d6dSopenharmony_ci}
6512e5b6d6dSopenharmony_ci
6522e5b6d6dSopenharmony_ci
6532e5b6d6dSopenharmony_ci
6542e5b6d6dSopenharmony_ci
6552e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
6562e5b6d6dSopenharmony_ci//
6572e5b6d6dSopenharmony_ci//  fixOpStack   The parse stack holds partially assembled chunks of the parse tree.
6582e5b6d6dSopenharmony_ci//               An entry on the stack may be as small as a single setRef node,
6592e5b6d6dSopenharmony_ci//               or as large as the parse tree
6602e5b6d6dSopenharmony_ci//               for an entire expression (this will be the one item left on the stack
6612e5b6d6dSopenharmony_ci//               when the parsing of an RBBI rule completes.
6622e5b6d6dSopenharmony_ci//
6632e5b6d6dSopenharmony_ci//               This function is called when a binary operator is encountered.
6642e5b6d6dSopenharmony_ci//               It looks back up the stack for operators that are not yet associated
6652e5b6d6dSopenharmony_ci//               with a right operand, and if the precedence of the stacked operator >=
6662e5b6d6dSopenharmony_ci//               the precedence of the current operator, binds the operand left,
6672e5b6d6dSopenharmony_ci//               to the previously encountered operator.
6682e5b6d6dSopenharmony_ci//
6692e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
6702e5b6d6dSopenharmony_civoid RBBIRuleScanner::fixOpStack(RBBINode::OpPrecedence p) {
6712e5b6d6dSopenharmony_ci    RBBINode *n;
6722e5b6d6dSopenharmony_ci    // printNodeStack("entering fixOpStack()");
6732e5b6d6dSopenharmony_ci    for (;;) {
6742e5b6d6dSopenharmony_ci        n = fNodeStack[fNodeStackPtr-1];   // an operator node
6752e5b6d6dSopenharmony_ci        if (n->fPrecedence == 0) {
6762e5b6d6dSopenharmony_ci            RBBIDebugPuts("RBBIRuleScanner::fixOpStack, bad operator node");
6772e5b6d6dSopenharmony_ci            error(U_BRK_INTERNAL_ERROR);
6782e5b6d6dSopenharmony_ci            return;
6792e5b6d6dSopenharmony_ci        }
6802e5b6d6dSopenharmony_ci
6812e5b6d6dSopenharmony_ci        if (n->fPrecedence < p || n->fPrecedence <= RBBINode::precLParen) {
6822e5b6d6dSopenharmony_ci            // The most recent operand goes with the current operator,
6832e5b6d6dSopenharmony_ci            //   not with the previously stacked one.
6842e5b6d6dSopenharmony_ci            break;
6852e5b6d6dSopenharmony_ci        }
6862e5b6d6dSopenharmony_ci            // Stack operator is a binary op  ( '|' or concatenation)
6872e5b6d6dSopenharmony_ci            //   TOS operand becomes right child of this operator.
6882e5b6d6dSopenharmony_ci            //   Resulting subexpression becomes the TOS operand.
6892e5b6d6dSopenharmony_ci            n->fRightChild = fNodeStack[fNodeStackPtr];
6902e5b6d6dSopenharmony_ci            fNodeStack[fNodeStackPtr]->fParent = n;
6912e5b6d6dSopenharmony_ci            fNodeStackPtr--;
6922e5b6d6dSopenharmony_ci        // printNodeStack("looping in fixOpStack()   ");
6932e5b6d6dSopenharmony_ci    }
6942e5b6d6dSopenharmony_ci
6952e5b6d6dSopenharmony_ci    if (p <= RBBINode::precLParen) {
6962e5b6d6dSopenharmony_ci        // Scan is at a right paren or end of expression.
6972e5b6d6dSopenharmony_ci        //  The scanned item must match the stack, or else there was an error.
6982e5b6d6dSopenharmony_ci        //  Discard the left paren (or start expr) node from the stack,
6992e5b6d6dSopenharmony_ci            //  leaving the completed (sub)expression as TOS.
7002e5b6d6dSopenharmony_ci            if (n->fPrecedence != p) {
7012e5b6d6dSopenharmony_ci                // Right paren encountered matched start of expression node, or
7022e5b6d6dSopenharmony_ci                // end of expression matched with a left paren node.
7032e5b6d6dSopenharmony_ci                error(U_BRK_MISMATCHED_PAREN);
7042e5b6d6dSopenharmony_ci            }
7052e5b6d6dSopenharmony_ci            fNodeStack[fNodeStackPtr-1] = fNodeStack[fNodeStackPtr];
7062e5b6d6dSopenharmony_ci            fNodeStackPtr--;
7072e5b6d6dSopenharmony_ci            // Delete the now-discarded LParen or Start node.
7082e5b6d6dSopenharmony_ci            delete n;
7092e5b6d6dSopenharmony_ci    }
7102e5b6d6dSopenharmony_ci    // printNodeStack("leaving fixOpStack()");
7112e5b6d6dSopenharmony_ci}
7122e5b6d6dSopenharmony_ci
7132e5b6d6dSopenharmony_ci
7142e5b6d6dSopenharmony_ci
7152e5b6d6dSopenharmony_ci
7162e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
7172e5b6d6dSopenharmony_ci//
7182e5b6d6dSopenharmony_ci//   findSetFor    given a UnicodeString,
7192e5b6d6dSopenharmony_ci//                  - find the corresponding Unicode Set  (uset node)
7202e5b6d6dSopenharmony_ci//                         (create one if necessary)
7212e5b6d6dSopenharmony_ci//                  - Set fLeftChild of the caller's node (should be a setRef node)
7222e5b6d6dSopenharmony_ci//                         to the uset node
7232e5b6d6dSopenharmony_ci//                 Maintain a hash table of uset nodes, so the same one is always used
7242e5b6d6dSopenharmony_ci//                    for the same string.
7252e5b6d6dSopenharmony_ci//                 If a "to adopt" set is provided and we haven't seen this key before,
7262e5b6d6dSopenharmony_ci//                    add the provided set to the hash table.
7272e5b6d6dSopenharmony_ci//                 If the string is one (32 bit) char in length, the set contains
7282e5b6d6dSopenharmony_ci//                    just one element which is the char in question.
7292e5b6d6dSopenharmony_ci//                 If the string is "any", return a set containing all chars.
7302e5b6d6dSopenharmony_ci//
7312e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
7322e5b6d6dSopenharmony_civoid RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt) {
7332e5b6d6dSopenharmony_ci
7342e5b6d6dSopenharmony_ci    RBBISetTableEl   *el;
7352e5b6d6dSopenharmony_ci
7362e5b6d6dSopenharmony_ci    // First check whether we've already cached a set for this string.
7372e5b6d6dSopenharmony_ci    // If so, just use the cached set in the new node.
7382e5b6d6dSopenharmony_ci    //   delete any set provided by the caller, since we own it.
7392e5b6d6dSopenharmony_ci    el = (RBBISetTableEl *)uhash_get(fSetTable, &s);
7402e5b6d6dSopenharmony_ci    if (el != NULL) {
7412e5b6d6dSopenharmony_ci        delete setToAdopt;
7422e5b6d6dSopenharmony_ci        node->fLeftChild = el->val;
7432e5b6d6dSopenharmony_ci        U_ASSERT(node->fLeftChild->fType == RBBINode::uset);
7442e5b6d6dSopenharmony_ci        return;
7452e5b6d6dSopenharmony_ci    }
7462e5b6d6dSopenharmony_ci
7472e5b6d6dSopenharmony_ci    // Haven't seen this set before.
7482e5b6d6dSopenharmony_ci    // If the caller didn't provide us with a prebuilt set,
7492e5b6d6dSopenharmony_ci    //   create a new UnicodeSet now.
7502e5b6d6dSopenharmony_ci    if (setToAdopt == NULL) {
7512e5b6d6dSopenharmony_ci        if (s.compare(kAny, -1) == 0) {
7522e5b6d6dSopenharmony_ci            setToAdopt = new UnicodeSet(0x000000, 0x10ffff);
7532e5b6d6dSopenharmony_ci        } else {
7542e5b6d6dSopenharmony_ci            UChar32 c;
7552e5b6d6dSopenharmony_ci            c = s.char32At(0);
7562e5b6d6dSopenharmony_ci            setToAdopt = new UnicodeSet(c, c);
7572e5b6d6dSopenharmony_ci        }
7582e5b6d6dSopenharmony_ci    }
7592e5b6d6dSopenharmony_ci
7602e5b6d6dSopenharmony_ci    //
7612e5b6d6dSopenharmony_ci    // Make a new uset node to refer to this UnicodeSet
7622e5b6d6dSopenharmony_ci    // This new uset node becomes the child of the caller's setReference node.
7632e5b6d6dSopenharmony_ci    //
7642e5b6d6dSopenharmony_ci    RBBINode *usetNode    = new RBBINode(RBBINode::uset);
7652e5b6d6dSopenharmony_ci    if (usetNode == NULL) {
7662e5b6d6dSopenharmony_ci        error(U_MEMORY_ALLOCATION_ERROR);
7672e5b6d6dSopenharmony_ci        return;
7682e5b6d6dSopenharmony_ci    }
7692e5b6d6dSopenharmony_ci    usetNode->fInputSet   = setToAdopt;
7702e5b6d6dSopenharmony_ci    usetNode->fParent     = node;
7712e5b6d6dSopenharmony_ci    node->fLeftChild      = usetNode;
7722e5b6d6dSopenharmony_ci    usetNode->fText = s;
7732e5b6d6dSopenharmony_ci
7742e5b6d6dSopenharmony_ci
7752e5b6d6dSopenharmony_ci    //
7762e5b6d6dSopenharmony_ci    // Add the new uset node to the list of all uset nodes.
7772e5b6d6dSopenharmony_ci    //
7782e5b6d6dSopenharmony_ci    fRB->fUSetNodes->addElement(usetNode, *fRB->fStatus);
7792e5b6d6dSopenharmony_ci
7802e5b6d6dSopenharmony_ci
7812e5b6d6dSopenharmony_ci    //
7822e5b6d6dSopenharmony_ci    // Add the new set to the set hash table.
7832e5b6d6dSopenharmony_ci    //
7842e5b6d6dSopenharmony_ci    el      = (RBBISetTableEl *)uprv_malloc(sizeof(RBBISetTableEl));
7852e5b6d6dSopenharmony_ci    UnicodeString *tkey = new UnicodeString(s);
7862e5b6d6dSopenharmony_ci    if (tkey == NULL || el == NULL || setToAdopt == NULL) {
7872e5b6d6dSopenharmony_ci        // Delete to avoid memory leak
7882e5b6d6dSopenharmony_ci        delete tkey;
7892e5b6d6dSopenharmony_ci        tkey = NULL;
7902e5b6d6dSopenharmony_ci        uprv_free(el);
7912e5b6d6dSopenharmony_ci        el = NULL;
7922e5b6d6dSopenharmony_ci        delete setToAdopt;
7932e5b6d6dSopenharmony_ci        setToAdopt = NULL;
7942e5b6d6dSopenharmony_ci
7952e5b6d6dSopenharmony_ci        error(U_MEMORY_ALLOCATION_ERROR);
7962e5b6d6dSopenharmony_ci        return;
7972e5b6d6dSopenharmony_ci    }
7982e5b6d6dSopenharmony_ci    el->key = tkey;
7992e5b6d6dSopenharmony_ci    el->val = usetNode;
8002e5b6d6dSopenharmony_ci    uhash_put(fSetTable, el->key, el, fRB->fStatus);
8012e5b6d6dSopenharmony_ci
8022e5b6d6dSopenharmony_ci    return;
8032e5b6d6dSopenharmony_ci}
8042e5b6d6dSopenharmony_ci
8052e5b6d6dSopenharmony_ci
8062e5b6d6dSopenharmony_ci
8072e5b6d6dSopenharmony_ci//
8082e5b6d6dSopenharmony_ci//  Assorted Unicode character constants.
8092e5b6d6dSopenharmony_ci//     Numeric because there is no portable way to enter them as literals.
8102e5b6d6dSopenharmony_ci//     (Think EBCDIC).
8112e5b6d6dSopenharmony_ci//
8122e5b6d6dSopenharmony_cistatic const UChar      chCR        = 0x0d;      // New lines, for terminating comments.
8132e5b6d6dSopenharmony_cistatic const UChar      chLF        = 0x0a;
8142e5b6d6dSopenharmony_cistatic const UChar      chNEL       = 0x85;      //    NEL newline variant
8152e5b6d6dSopenharmony_cistatic const UChar      chLS        = 0x2028;    //    Unicode Line Separator
8162e5b6d6dSopenharmony_cistatic const UChar      chApos      = 0x27;      //  single quote, for quoted chars.
8172e5b6d6dSopenharmony_cistatic const UChar      chPound     = 0x23;      // '#', introduces a comment.
8182e5b6d6dSopenharmony_cistatic const UChar      chBackSlash = 0x5c;      // '\'  introduces a char escape
8192e5b6d6dSopenharmony_cistatic const UChar      chLParen    = 0x28;
8202e5b6d6dSopenharmony_cistatic const UChar      chRParen    = 0x29;
8212e5b6d6dSopenharmony_ci
8222e5b6d6dSopenharmony_ci
8232e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
8242e5b6d6dSopenharmony_ci//
8252e5b6d6dSopenharmony_ci//  stripRules    Return a rules string without extra spaces.
8262e5b6d6dSopenharmony_ci//                (Comments are removed separately, during rule parsing.)
8272e5b6d6dSopenharmony_ci//
8282e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
8292e5b6d6dSopenharmony_ciUnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) {
8302e5b6d6dSopenharmony_ci    UnicodeString strippedRules;
8312e5b6d6dSopenharmony_ci    int32_t rulesLength = rules.length();
8322e5b6d6dSopenharmony_ci
8332e5b6d6dSopenharmony_ci    for (int32_t idx=0; idx<rulesLength; idx = rules.moveIndex32(idx, 1)) {
8342e5b6d6dSopenharmony_ci        UChar32 cp = rules.char32At(idx);
8352e5b6d6dSopenharmony_ci        bool whiteSpace = u_hasBinaryProperty(cp, UCHAR_PATTERN_WHITE_SPACE);
8362e5b6d6dSopenharmony_ci        if (whiteSpace) {
8372e5b6d6dSopenharmony_ci            continue;
8382e5b6d6dSopenharmony_ci        }
8392e5b6d6dSopenharmony_ci        strippedRules.append(cp);
8402e5b6d6dSopenharmony_ci    }
8412e5b6d6dSopenharmony_ci    return strippedRules;
8422e5b6d6dSopenharmony_ci}
8432e5b6d6dSopenharmony_ci
8442e5b6d6dSopenharmony_ci
8452e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
8462e5b6d6dSopenharmony_ci//
8472e5b6d6dSopenharmony_ci//  nextCharLL    Low Level Next Char from rule input source.
8482e5b6d6dSopenharmony_ci//                Get a char from the input character iterator,
8492e5b6d6dSopenharmony_ci//                keep track of input position for error reporting.
8502e5b6d6dSopenharmony_ci//
8512e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
8522e5b6d6dSopenharmony_ciUChar32  RBBIRuleScanner::nextCharLL() {
8532e5b6d6dSopenharmony_ci    UChar32  ch;
8542e5b6d6dSopenharmony_ci
8552e5b6d6dSopenharmony_ci    if (fNextIndex >= fRB->fRules.length()) {
8562e5b6d6dSopenharmony_ci        return (UChar32)-1;
8572e5b6d6dSopenharmony_ci    }
8582e5b6d6dSopenharmony_ci    ch         = fRB->fRules.char32At(fNextIndex);
8592e5b6d6dSopenharmony_ci    if (U_IS_SURROGATE(ch)) {
8602e5b6d6dSopenharmony_ci        error(U_ILLEGAL_CHAR_FOUND);
8612e5b6d6dSopenharmony_ci        return U_SENTINEL;
8622e5b6d6dSopenharmony_ci    }
8632e5b6d6dSopenharmony_ci    fNextIndex = fRB->fRules.moveIndex32(fNextIndex, 1);
8642e5b6d6dSopenharmony_ci
8652e5b6d6dSopenharmony_ci    if (ch == chCR ||
8662e5b6d6dSopenharmony_ci        ch == chNEL ||
8672e5b6d6dSopenharmony_ci        ch == chLS   ||
8682e5b6d6dSopenharmony_ci        (ch == chLF && fLastChar != chCR)) {
8692e5b6d6dSopenharmony_ci        // Character is starting a new line.  Bump up the line number, and
8702e5b6d6dSopenharmony_ci        //  reset the column to 0.
8712e5b6d6dSopenharmony_ci        fLineNum++;
8722e5b6d6dSopenharmony_ci        fCharNum=0;
8732e5b6d6dSopenharmony_ci        if (fQuoteMode) {
8742e5b6d6dSopenharmony_ci            error(U_BRK_NEW_LINE_IN_QUOTED_STRING);
8752e5b6d6dSopenharmony_ci            fQuoteMode = false;
8762e5b6d6dSopenharmony_ci        }
8772e5b6d6dSopenharmony_ci    }
8782e5b6d6dSopenharmony_ci    else {
8792e5b6d6dSopenharmony_ci        // Character is not starting a new line.  Except in the case of a
8802e5b6d6dSopenharmony_ci        //   LF following a CR, increment the column position.
8812e5b6d6dSopenharmony_ci        if (ch != chLF) {
8822e5b6d6dSopenharmony_ci            fCharNum++;
8832e5b6d6dSopenharmony_ci        }
8842e5b6d6dSopenharmony_ci    }
8852e5b6d6dSopenharmony_ci    fLastChar = ch;
8862e5b6d6dSopenharmony_ci    return ch;
8872e5b6d6dSopenharmony_ci}
8882e5b6d6dSopenharmony_ci
8892e5b6d6dSopenharmony_ci
8902e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
8912e5b6d6dSopenharmony_ci//
8922e5b6d6dSopenharmony_ci//   nextChar     for rules scanning.  At this level, we handle stripping
8932e5b6d6dSopenharmony_ci//                out comments and processing backslash character escapes.
8942e5b6d6dSopenharmony_ci//                The rest of the rules grammar is handled at the next level up.
8952e5b6d6dSopenharmony_ci//
8962e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
8972e5b6d6dSopenharmony_civoid RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
8982e5b6d6dSopenharmony_ci
8992e5b6d6dSopenharmony_ci    // Unicode Character constants needed for the processing done by nextChar(),
9002e5b6d6dSopenharmony_ci    //   in hex because literals wont work on EBCDIC machines.
9012e5b6d6dSopenharmony_ci
9022e5b6d6dSopenharmony_ci    fScanIndex = fNextIndex;
9032e5b6d6dSopenharmony_ci    c.fChar    = nextCharLL();
9042e5b6d6dSopenharmony_ci    c.fEscaped = false;
9052e5b6d6dSopenharmony_ci
9062e5b6d6dSopenharmony_ci    //
9072e5b6d6dSopenharmony_ci    //  check for '' sequence.
9082e5b6d6dSopenharmony_ci    //  These are recognized in all contexts, whether in quoted text or not.
9092e5b6d6dSopenharmony_ci    //
9102e5b6d6dSopenharmony_ci    if (c.fChar == chApos) {
9112e5b6d6dSopenharmony_ci        if (fRB->fRules.char32At(fNextIndex) == chApos) {
9122e5b6d6dSopenharmony_ci            c.fChar    = nextCharLL();        // get nextChar officially so character counts
9132e5b6d6dSopenharmony_ci            c.fEscaped = true;                //   stay correct.
9142e5b6d6dSopenharmony_ci        }
9152e5b6d6dSopenharmony_ci        else
9162e5b6d6dSopenharmony_ci        {
9172e5b6d6dSopenharmony_ci            // Single quote, by itself.
9182e5b6d6dSopenharmony_ci            //   Toggle quoting mode.
9192e5b6d6dSopenharmony_ci            //   Return either '('  or ')', because quotes cause a grouping of the quoted text.
9202e5b6d6dSopenharmony_ci            fQuoteMode = !fQuoteMode;
9212e5b6d6dSopenharmony_ci            if (fQuoteMode == true) {
9222e5b6d6dSopenharmony_ci                c.fChar = chLParen;
9232e5b6d6dSopenharmony_ci            } else {
9242e5b6d6dSopenharmony_ci                c.fChar = chRParen;
9252e5b6d6dSopenharmony_ci            }
9262e5b6d6dSopenharmony_ci            c.fEscaped = false;      // The paren that we return is not escaped.
9272e5b6d6dSopenharmony_ci            return;
9282e5b6d6dSopenharmony_ci        }
9292e5b6d6dSopenharmony_ci    }
9302e5b6d6dSopenharmony_ci
9312e5b6d6dSopenharmony_ci    if (fQuoteMode) {
9322e5b6d6dSopenharmony_ci        c.fEscaped = true;
9332e5b6d6dSopenharmony_ci    }
9342e5b6d6dSopenharmony_ci    else
9352e5b6d6dSopenharmony_ci    {
9362e5b6d6dSopenharmony_ci        // We are not in a 'quoted region' of the source.
9372e5b6d6dSopenharmony_ci        //
9382e5b6d6dSopenharmony_ci        if (c.fChar == chPound) {
9392e5b6d6dSopenharmony_ci            // Start of a comment.  Consume the rest of it.
9402e5b6d6dSopenharmony_ci            //  The new-line char that terminates the comment is always returned.
9412e5b6d6dSopenharmony_ci            //  It will be treated as white-space, and serves to break up anything
9422e5b6d6dSopenharmony_ci            //    that might otherwise incorrectly clump together with a comment in
9432e5b6d6dSopenharmony_ci            //    the middle (a variable name, for example.)
9442e5b6d6dSopenharmony_ci            int32_t commentStart = fScanIndex;
9452e5b6d6dSopenharmony_ci            for (;;) {
9462e5b6d6dSopenharmony_ci                c.fChar = nextCharLL();
9472e5b6d6dSopenharmony_ci                if (c.fChar == (UChar32)-1 ||  // EOF
9482e5b6d6dSopenharmony_ci                    c.fChar == chCR     ||
9492e5b6d6dSopenharmony_ci                    c.fChar == chLF     ||
9502e5b6d6dSopenharmony_ci                    c.fChar == chNEL    ||
9512e5b6d6dSopenharmony_ci                    c.fChar == chLS)       {break;}
9522e5b6d6dSopenharmony_ci            }
9532e5b6d6dSopenharmony_ci            for (int32_t i=commentStart; i<fNextIndex-1; ++i) {
9542e5b6d6dSopenharmony_ci                fRB->fStrippedRules.setCharAt(i, u' ');
9552e5b6d6dSopenharmony_ci            }
9562e5b6d6dSopenharmony_ci        }
9572e5b6d6dSopenharmony_ci        if (c.fChar == (UChar32)-1) {
9582e5b6d6dSopenharmony_ci            return;
9592e5b6d6dSopenharmony_ci        }
9602e5b6d6dSopenharmony_ci
9612e5b6d6dSopenharmony_ci        //
9622e5b6d6dSopenharmony_ci        //  check for backslash escaped characters.
9632e5b6d6dSopenharmony_ci        //  Use UnicodeString::unescapeAt() to handle them.
9642e5b6d6dSopenharmony_ci        //
9652e5b6d6dSopenharmony_ci        if (c.fChar == chBackSlash) {
9662e5b6d6dSopenharmony_ci            c.fEscaped = true;
9672e5b6d6dSopenharmony_ci            int32_t startX = fNextIndex;
9682e5b6d6dSopenharmony_ci            c.fChar = fRB->fRules.unescapeAt(fNextIndex);
9692e5b6d6dSopenharmony_ci            if (fNextIndex == startX) {
9702e5b6d6dSopenharmony_ci                error(U_BRK_HEX_DIGITS_EXPECTED);
9712e5b6d6dSopenharmony_ci            }
9722e5b6d6dSopenharmony_ci            fCharNum += fNextIndex-startX;
9732e5b6d6dSopenharmony_ci        }
9742e5b6d6dSopenharmony_ci    }
9752e5b6d6dSopenharmony_ci    // putc(c.fChar, stdout);
9762e5b6d6dSopenharmony_ci}
9772e5b6d6dSopenharmony_ci
9782e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
9792e5b6d6dSopenharmony_ci//
9802e5b6d6dSopenharmony_ci//  Parse RBBI rules.   The state machine for rules parsing is here.
9812e5b6d6dSopenharmony_ci//                      The state tables are hand-written in the file rbbirpt.txt,
9822e5b6d6dSopenharmony_ci//                      and converted to the form used here by a perl
9832e5b6d6dSopenharmony_ci//                      script rbbicst.pl
9842e5b6d6dSopenharmony_ci//
9852e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
9862e5b6d6dSopenharmony_civoid RBBIRuleScanner::parse() {
9872e5b6d6dSopenharmony_ci    uint16_t                state;
9882e5b6d6dSopenharmony_ci    const RBBIRuleTableEl  *tableEl;
9892e5b6d6dSopenharmony_ci
9902e5b6d6dSopenharmony_ci    if (U_FAILURE(*fRB->fStatus)) {
9912e5b6d6dSopenharmony_ci        return;
9922e5b6d6dSopenharmony_ci    }
9932e5b6d6dSopenharmony_ci
9942e5b6d6dSopenharmony_ci    state = 1;
9952e5b6d6dSopenharmony_ci    nextChar(fC);
9962e5b6d6dSopenharmony_ci    //
9972e5b6d6dSopenharmony_ci    // Main loop for the rule parsing state machine.
9982e5b6d6dSopenharmony_ci    //   Runs once per state transition.
9992e5b6d6dSopenharmony_ci    //   Each time through optionally performs, depending on the state table,
10002e5b6d6dSopenharmony_ci    //      - an advance to the the next input char
10012e5b6d6dSopenharmony_ci    //      - an action to be performed.
10022e5b6d6dSopenharmony_ci    //      - pushing or popping a state to/from the local state return stack.
10032e5b6d6dSopenharmony_ci    //
10042e5b6d6dSopenharmony_ci    for (;;) {
10052e5b6d6dSopenharmony_ci        //  Bail out if anything has gone wrong.
10062e5b6d6dSopenharmony_ci        //  RBBI rule file parsing stops on the first error encountered.
10072e5b6d6dSopenharmony_ci        if (U_FAILURE(*fRB->fStatus)) {
10082e5b6d6dSopenharmony_ci            break;
10092e5b6d6dSopenharmony_ci        }
10102e5b6d6dSopenharmony_ci
10112e5b6d6dSopenharmony_ci        // Quit if state == 0.  This is the normal way to exit the state machine.
10122e5b6d6dSopenharmony_ci        //
10132e5b6d6dSopenharmony_ci        if (state == 0) {
10142e5b6d6dSopenharmony_ci            break;
10152e5b6d6dSopenharmony_ci        }
10162e5b6d6dSopenharmony_ci
10172e5b6d6dSopenharmony_ci        // Find the state table element that matches the input char from the rule, or the
10182e5b6d6dSopenharmony_ci        //    class of the input character.  Start with the first table row for this
10192e5b6d6dSopenharmony_ci        //    state, then linearly scan forward until we find a row that matches the
10202e5b6d6dSopenharmony_ci        //    character.  The last row for each state always matches all characters, so
10212e5b6d6dSopenharmony_ci        //    the search will stop there, if not before.
10222e5b6d6dSopenharmony_ci        //
10232e5b6d6dSopenharmony_ci        tableEl = &gRuleParseStateTable[state];
10242e5b6d6dSopenharmony_ci        #ifdef RBBI_DEBUG
10252e5b6d6dSopenharmony_ci            if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) {
10262e5b6d6dSopenharmony_ci                RBBIDebugPrintf("char, line, col = (\'%c\', %d, %d)    state=%s ",
10272e5b6d6dSopenharmony_ci                    fC.fChar, fLineNum, fCharNum, RBBIRuleStateNames[state]);
10282e5b6d6dSopenharmony_ci            }
10292e5b6d6dSopenharmony_ci        #endif
10302e5b6d6dSopenharmony_ci
10312e5b6d6dSopenharmony_ci        for (;;) {
10322e5b6d6dSopenharmony_ci            #ifdef RBBI_DEBUG
10332e5b6d6dSopenharmony_ci                if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf("."); fflush(stdout);}
10342e5b6d6dSopenharmony_ci            #endif
10352e5b6d6dSopenharmony_ci            if (tableEl->fCharClass < 127 && fC.fEscaped == false &&   tableEl->fCharClass == fC.fChar) {
10362e5b6d6dSopenharmony_ci                // Table row specified an individual character, not a set, and
10372e5b6d6dSopenharmony_ci                //   the input character is not escaped, and
10382e5b6d6dSopenharmony_ci                //   the input character matched it.
10392e5b6d6dSopenharmony_ci                break;
10402e5b6d6dSopenharmony_ci            }
10412e5b6d6dSopenharmony_ci            if (tableEl->fCharClass == 255) {
10422e5b6d6dSopenharmony_ci                // Table row specified default, match anything character class.
10432e5b6d6dSopenharmony_ci                break;
10442e5b6d6dSopenharmony_ci            }
10452e5b6d6dSopenharmony_ci            if (tableEl->fCharClass == 254 && fC.fEscaped)  {
10462e5b6d6dSopenharmony_ci                // Table row specified "escaped" and the char was escaped.
10472e5b6d6dSopenharmony_ci                break;
10482e5b6d6dSopenharmony_ci            }
10492e5b6d6dSopenharmony_ci            if (tableEl->fCharClass == 253 && fC.fEscaped &&
10502e5b6d6dSopenharmony_ci                (fC.fChar == 0x50 || fC.fChar == 0x70 ))  {
10512e5b6d6dSopenharmony_ci                // Table row specified "escaped P" and the char is either 'p' or 'P'.
10522e5b6d6dSopenharmony_ci                break;
10532e5b6d6dSopenharmony_ci            }
10542e5b6d6dSopenharmony_ci            if (tableEl->fCharClass == 252 && fC.fChar == (UChar32)-1)  {
10552e5b6d6dSopenharmony_ci                // Table row specified eof and we hit eof on the input.
10562e5b6d6dSopenharmony_ci                break;
10572e5b6d6dSopenharmony_ci            }
10582e5b6d6dSopenharmony_ci
10592e5b6d6dSopenharmony_ci            if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 &&   // Table specs a char class &&
10602e5b6d6dSopenharmony_ci                fC.fEscaped == false &&                                      //   char is not escaped &&
10612e5b6d6dSopenharmony_ci                fC.fChar != (UChar32)-1) {                                   //   char is not EOF
10622e5b6d6dSopenharmony_ci                U_ASSERT((tableEl->fCharClass-128) < UPRV_LENGTHOF(fRuleSets));
10632e5b6d6dSopenharmony_ci                if (fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) {
10642e5b6d6dSopenharmony_ci                    // Table row specified a character class, or set of characters,
10652e5b6d6dSopenharmony_ci                    //   and the current char matches it.
10662e5b6d6dSopenharmony_ci                    break;
10672e5b6d6dSopenharmony_ci                }
10682e5b6d6dSopenharmony_ci            }
10692e5b6d6dSopenharmony_ci
10702e5b6d6dSopenharmony_ci            // No match on this row, advance to the next  row for this state,
10712e5b6d6dSopenharmony_ci            tableEl++;
10722e5b6d6dSopenharmony_ci        }
10732e5b6d6dSopenharmony_ci        if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPuts("");}
10742e5b6d6dSopenharmony_ci
10752e5b6d6dSopenharmony_ci        //
10762e5b6d6dSopenharmony_ci        // We've found the row of the state table that matches the current input
10772e5b6d6dSopenharmony_ci        //   character from the rules string.
10782e5b6d6dSopenharmony_ci        // Perform any action specified  by this row in the state table.
10792e5b6d6dSopenharmony_ci        if (doParseActions((int32_t)tableEl->fAction) == false) {
10802e5b6d6dSopenharmony_ci            // Break out of the state machine loop if the
10812e5b6d6dSopenharmony_ci            //   the action signalled some kind of error, or
10822e5b6d6dSopenharmony_ci            //   the action was to exit, occurs on normal end-of-rules-input.
10832e5b6d6dSopenharmony_ci            break;
10842e5b6d6dSopenharmony_ci        }
10852e5b6d6dSopenharmony_ci
10862e5b6d6dSopenharmony_ci        if (tableEl->fPushState != 0) {
10872e5b6d6dSopenharmony_ci            fStackPtr++;
10882e5b6d6dSopenharmony_ci            if (fStackPtr >= kStackSize) {
10892e5b6d6dSopenharmony_ci                error(U_BRK_INTERNAL_ERROR);
10902e5b6d6dSopenharmony_ci                RBBIDebugPuts("RBBIRuleScanner::parse() - state stack overflow.");
10912e5b6d6dSopenharmony_ci                fStackPtr--;
10922e5b6d6dSopenharmony_ci            }
10932e5b6d6dSopenharmony_ci            fStack[fStackPtr] = tableEl->fPushState;
10942e5b6d6dSopenharmony_ci        }
10952e5b6d6dSopenharmony_ci
10962e5b6d6dSopenharmony_ci        if (tableEl->fNextChar) {
10972e5b6d6dSopenharmony_ci            nextChar(fC);
10982e5b6d6dSopenharmony_ci        }
10992e5b6d6dSopenharmony_ci
11002e5b6d6dSopenharmony_ci        // Get the next state from the table entry, or from the
11012e5b6d6dSopenharmony_ci        //   state stack if the next state was specified as "pop".
11022e5b6d6dSopenharmony_ci        if (tableEl->fNextState != 255) {
11032e5b6d6dSopenharmony_ci            state = tableEl->fNextState;
11042e5b6d6dSopenharmony_ci        } else {
11052e5b6d6dSopenharmony_ci            state = fStack[fStackPtr];
11062e5b6d6dSopenharmony_ci            fStackPtr--;
11072e5b6d6dSopenharmony_ci            if (fStackPtr < 0) {
11082e5b6d6dSopenharmony_ci                error(U_BRK_INTERNAL_ERROR);
11092e5b6d6dSopenharmony_ci                RBBIDebugPuts("RBBIRuleScanner::parse() - state stack underflow.");
11102e5b6d6dSopenharmony_ci                fStackPtr++;
11112e5b6d6dSopenharmony_ci            }
11122e5b6d6dSopenharmony_ci        }
11132e5b6d6dSopenharmony_ci
11142e5b6d6dSopenharmony_ci    }
11152e5b6d6dSopenharmony_ci
11162e5b6d6dSopenharmony_ci    if (U_FAILURE(*fRB->fStatus)) {
11172e5b6d6dSopenharmony_ci        return;
11182e5b6d6dSopenharmony_ci    }
11192e5b6d6dSopenharmony_ci
11202e5b6d6dSopenharmony_ci    // If there are no forward rules set an error.
11212e5b6d6dSopenharmony_ci    //
11222e5b6d6dSopenharmony_ci    if (fRB->fForwardTree == NULL) {
11232e5b6d6dSopenharmony_ci        error(U_BRK_RULE_SYNTAX);
11242e5b6d6dSopenharmony_ci        return;
11252e5b6d6dSopenharmony_ci    }
11262e5b6d6dSopenharmony_ci
11272e5b6d6dSopenharmony_ci    //
11282e5b6d6dSopenharmony_ci    // Parsing of the input RBBI rules is complete.
11292e5b6d6dSopenharmony_ci    // We now have a parse tree for the rule expressions
11302e5b6d6dSopenharmony_ci    // and a list of all UnicodeSets that are referenced.
11312e5b6d6dSopenharmony_ci    //
11322e5b6d6dSopenharmony_ci#ifdef RBBI_DEBUG
11332e5b6d6dSopenharmony_ci    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "symbols")) {fSymbolTable->rbbiSymtablePrint();}
11342e5b6d6dSopenharmony_ci    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ptree")) {
11352e5b6d6dSopenharmony_ci        RBBIDebugPrintf("Completed Forward Rules Parse Tree...\n");
11362e5b6d6dSopenharmony_ci        RBBINode::printTree(fRB->fForwardTree, true);
11372e5b6d6dSopenharmony_ci        RBBIDebugPrintf("\nCompleted Reverse Rules Parse Tree...\n");
11382e5b6d6dSopenharmony_ci        RBBINode::printTree(fRB->fReverseTree, true);
11392e5b6d6dSopenharmony_ci        RBBIDebugPrintf("\nCompleted Safe Point Forward Rules Parse Tree...\n");
11402e5b6d6dSopenharmony_ci        RBBINode::printTree(fRB->fSafeFwdTree, true);
11412e5b6d6dSopenharmony_ci        RBBIDebugPrintf("\nCompleted Safe Point Reverse Rules Parse Tree...\n");
11422e5b6d6dSopenharmony_ci        RBBINode::printTree(fRB->fSafeRevTree, true);
11432e5b6d6dSopenharmony_ci    }
11442e5b6d6dSopenharmony_ci#endif
11452e5b6d6dSopenharmony_ci}
11462e5b6d6dSopenharmony_ci
11472e5b6d6dSopenharmony_ci
11482e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
11492e5b6d6dSopenharmony_ci//
11502e5b6d6dSopenharmony_ci//  printNodeStack     for debugging...
11512e5b6d6dSopenharmony_ci//
11522e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
11532e5b6d6dSopenharmony_ci#ifdef RBBI_DEBUG
11542e5b6d6dSopenharmony_civoid RBBIRuleScanner::printNodeStack(const char *title) {
11552e5b6d6dSopenharmony_ci    int i;
11562e5b6d6dSopenharmony_ci    RBBIDebugPrintf("%s.  Dumping node stack...\n", title);
11572e5b6d6dSopenharmony_ci    for (i=fNodeStackPtr; i>0; i--) {RBBINode::printTree(fNodeStack[i], true);}
11582e5b6d6dSopenharmony_ci}
11592e5b6d6dSopenharmony_ci#endif
11602e5b6d6dSopenharmony_ci
11612e5b6d6dSopenharmony_ci
11622e5b6d6dSopenharmony_ci
11632e5b6d6dSopenharmony_ci
11642e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
11652e5b6d6dSopenharmony_ci//
11662e5b6d6dSopenharmony_ci//  pushNewNode   create a new RBBINode of the specified type and push it
11672e5b6d6dSopenharmony_ci//                onto the stack of nodes.
11682e5b6d6dSopenharmony_ci//
11692e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
11702e5b6d6dSopenharmony_ciRBBINode  *RBBIRuleScanner::pushNewNode(RBBINode::NodeType  t) {
11712e5b6d6dSopenharmony_ci    if (U_FAILURE(*fRB->fStatus)) {
11722e5b6d6dSopenharmony_ci        return NULL;
11732e5b6d6dSopenharmony_ci    }
11742e5b6d6dSopenharmony_ci    if (fNodeStackPtr >= kStackSize - 1) {
11752e5b6d6dSopenharmony_ci        error(U_BRK_RULE_SYNTAX);
11762e5b6d6dSopenharmony_ci        RBBIDebugPuts("RBBIRuleScanner::pushNewNode - stack overflow.");
11772e5b6d6dSopenharmony_ci        return NULL;
11782e5b6d6dSopenharmony_ci    }
11792e5b6d6dSopenharmony_ci    fNodeStackPtr++;
11802e5b6d6dSopenharmony_ci    fNodeStack[fNodeStackPtr] = new RBBINode(t);
11812e5b6d6dSopenharmony_ci    if (fNodeStack[fNodeStackPtr] == NULL) {
11822e5b6d6dSopenharmony_ci        *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR;
11832e5b6d6dSopenharmony_ci    }
11842e5b6d6dSopenharmony_ci    return fNodeStack[fNodeStackPtr];
11852e5b6d6dSopenharmony_ci}
11862e5b6d6dSopenharmony_ci
11872e5b6d6dSopenharmony_ci
11882e5b6d6dSopenharmony_ci
11892e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
11902e5b6d6dSopenharmony_ci//
11912e5b6d6dSopenharmony_ci//  scanSet    Construct a UnicodeSet from the text at the current scan
11922e5b6d6dSopenharmony_ci//             position.  Advance the scan position to the first character
11932e5b6d6dSopenharmony_ci//             after the set.
11942e5b6d6dSopenharmony_ci//
11952e5b6d6dSopenharmony_ci//             A new RBBI setref node referring to the set is pushed onto the node
11962e5b6d6dSopenharmony_ci//             stack.
11972e5b6d6dSopenharmony_ci//
11982e5b6d6dSopenharmony_ci//             The scan position is normally under the control of the state machine
11992e5b6d6dSopenharmony_ci//             that controls rule parsing.  UnicodeSets, however, are parsed by
12002e5b6d6dSopenharmony_ci//             the UnicodeSet constructor, not by the RBBI rule parser.
12012e5b6d6dSopenharmony_ci//
12022e5b6d6dSopenharmony_ci//------------------------------------------------------------------------------
12032e5b6d6dSopenharmony_civoid RBBIRuleScanner::scanSet() {
12042e5b6d6dSopenharmony_ci    UnicodeSet    *uset;
12052e5b6d6dSopenharmony_ci    ParsePosition  pos;
12062e5b6d6dSopenharmony_ci    int            startPos;
12072e5b6d6dSopenharmony_ci    int            i;
12082e5b6d6dSopenharmony_ci
12092e5b6d6dSopenharmony_ci    if (U_FAILURE(*fRB->fStatus)) {
12102e5b6d6dSopenharmony_ci        return;
12112e5b6d6dSopenharmony_ci    }
12122e5b6d6dSopenharmony_ci
12132e5b6d6dSopenharmony_ci    pos.setIndex(fScanIndex);
12142e5b6d6dSopenharmony_ci    startPos = fScanIndex;
12152e5b6d6dSopenharmony_ci    UErrorCode localStatus = U_ZERO_ERROR;
12162e5b6d6dSopenharmony_ci    uset = new UnicodeSet();
12172e5b6d6dSopenharmony_ci    if (uset == NULL) {
12182e5b6d6dSopenharmony_ci        localStatus = U_MEMORY_ALLOCATION_ERROR;
12192e5b6d6dSopenharmony_ci    } else {
12202e5b6d6dSopenharmony_ci        uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus);
12212e5b6d6dSopenharmony_ci    }
12222e5b6d6dSopenharmony_ci    if (U_FAILURE(localStatus)) {
12232e5b6d6dSopenharmony_ci        //  TODO:  Get more accurate position of the error from UnicodeSet's return info.
12242e5b6d6dSopenharmony_ci        //         UnicodeSet appears to not be reporting correctly at this time.
12252e5b6d6dSopenharmony_ci        #ifdef RBBI_DEBUG
12262e5b6d6dSopenharmony_ci            RBBIDebugPrintf("UnicodeSet parse position.ErrorIndex = %d\n", pos.getIndex());
12272e5b6d6dSopenharmony_ci        #endif
12282e5b6d6dSopenharmony_ci        error(localStatus);
12292e5b6d6dSopenharmony_ci        delete uset;
12302e5b6d6dSopenharmony_ci        return;
12312e5b6d6dSopenharmony_ci    }
12322e5b6d6dSopenharmony_ci
12332e5b6d6dSopenharmony_ci    // Verify that the set contains at least one code point.
12342e5b6d6dSopenharmony_ci    //
12352e5b6d6dSopenharmony_ci    U_ASSERT(uset!=NULL);
12362e5b6d6dSopenharmony_ci    if (uset->isEmpty()) {
12372e5b6d6dSopenharmony_ci        // This set is empty.
12382e5b6d6dSopenharmony_ci        //  Make it an error, because it almost certainly is not what the user wanted.
12392e5b6d6dSopenharmony_ci        //  Also, avoids having to think about corner cases in the tree manipulation code
12402e5b6d6dSopenharmony_ci        //   that occurs later on.
12412e5b6d6dSopenharmony_ci        error(U_BRK_RULE_EMPTY_SET);
12422e5b6d6dSopenharmony_ci        delete uset;
12432e5b6d6dSopenharmony_ci        return;
12442e5b6d6dSopenharmony_ci    }
12452e5b6d6dSopenharmony_ci
12462e5b6d6dSopenharmony_ci
12472e5b6d6dSopenharmony_ci    // Advance the RBBI parse position over the UnicodeSet pattern.
12482e5b6d6dSopenharmony_ci    //   Don't just set fScanIndex because the line/char positions maintained
12492e5b6d6dSopenharmony_ci    //   for error reporting would be thrown off.
12502e5b6d6dSopenharmony_ci    i = pos.getIndex();
12512e5b6d6dSopenharmony_ci    for (;;) {
12522e5b6d6dSopenharmony_ci        if (fNextIndex >= i) {
12532e5b6d6dSopenharmony_ci            break;
12542e5b6d6dSopenharmony_ci        }
12552e5b6d6dSopenharmony_ci        nextCharLL();
12562e5b6d6dSopenharmony_ci    }
12572e5b6d6dSopenharmony_ci
12582e5b6d6dSopenharmony_ci    if (U_SUCCESS(*fRB->fStatus)) {
12592e5b6d6dSopenharmony_ci        RBBINode         *n;
12602e5b6d6dSopenharmony_ci
12612e5b6d6dSopenharmony_ci        n = pushNewNode(RBBINode::setRef);
12622e5b6d6dSopenharmony_ci        if (U_FAILURE(*fRB->fStatus)) {
12632e5b6d6dSopenharmony_ci            return;
12642e5b6d6dSopenharmony_ci        }
12652e5b6d6dSopenharmony_ci        n->fFirstPos = startPos;
12662e5b6d6dSopenharmony_ci        n->fLastPos  = fNextIndex;
12672e5b6d6dSopenharmony_ci        fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
12682e5b6d6dSopenharmony_ci        //  findSetFor() serves several purposes here:
12692e5b6d6dSopenharmony_ci        //     - Adopts storage for the UnicodeSet, will be responsible for deleting.
12702e5b6d6dSopenharmony_ci        //     - Maintains collection of all sets in use, needed later for establishing
12712e5b6d6dSopenharmony_ci        //          character categories for run time engine.
12722e5b6d6dSopenharmony_ci        //     - Eliminates mulitiple instances of the same set.
12732e5b6d6dSopenharmony_ci        //     - Creates a new uset node if necessary (if this isn't a duplicate.)
12742e5b6d6dSopenharmony_ci        findSetFor(n->fText, n, uset);
12752e5b6d6dSopenharmony_ci    }
12762e5b6d6dSopenharmony_ci
12772e5b6d6dSopenharmony_ci}
12782e5b6d6dSopenharmony_ci
12792e5b6d6dSopenharmony_ciint32_t RBBIRuleScanner::numRules() {
12802e5b6d6dSopenharmony_ci    return fRuleNum;
12812e5b6d6dSopenharmony_ci}
12822e5b6d6dSopenharmony_ci
12832e5b6d6dSopenharmony_ciU_NAMESPACE_END
12842e5b6d6dSopenharmony_ci
12852e5b6d6dSopenharmony_ci#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1286