12e5b6d6dSopenharmony_ci 22e5b6d6dSopenharmony_ci#***************************************************************************** 32e5b6d6dSopenharmony_ci# 42e5b6d6dSopenharmony_ci# Copyright (C) 2016 and later: Unicode, Inc. and others. 52e5b6d6dSopenharmony_ci# License & terms of use: http://www.unicode.org/copyright.html 62e5b6d6dSopenharmony_ci# 72e5b6d6dSopenharmony_ci#***************************************************************************** 82e5b6d6dSopenharmony_ci#***************************************************************************** 92e5b6d6dSopenharmony_ci# 102e5b6d6dSopenharmony_ci# Copyright (C) 2002-2016, International Business Machines Corporation and others. 112e5b6d6dSopenharmony_ci# All Rights Reserved. 122e5b6d6dSopenharmony_ci# 132e5b6d6dSopenharmony_ci#***************************************************************************** 142e5b6d6dSopenharmony_ci# 152e5b6d6dSopenharmony_ci# file: rbbirpt.txt 162e5b6d6dSopenharmony_ci# ICU Break Iterator Rule Parser State Table 172e5b6d6dSopenharmony_ci# 182e5b6d6dSopenharmony_ci# This state table is used when reading and parsing a set of RBBI rules 192e5b6d6dSopenharmony_ci# The rule parser uses a state machine; the data in this file define the 202e5b6d6dSopenharmony_ci# state transitions that occur for each input character. 212e5b6d6dSopenharmony_ci# 222e5b6d6dSopenharmony_ci# *** This file defines the RBBI rule grammar. This is it. 232e5b6d6dSopenharmony_ci# *** The determination of what is accepted is here. 242e5b6d6dSopenharmony_ci# 252e5b6d6dSopenharmony_ci# This file is processed by a perl script "rbbicst.pl" to produce initialized C arrays 262e5b6d6dSopenharmony_ci# that are then built with the rule parser. 272e5b6d6dSopenharmony_ci# 282e5b6d6dSopenharmony_ci# perl rbbicst.pl < rbbirpt.txt > rbbirpt.h 292e5b6d6dSopenharmony_ci 302e5b6d6dSopenharmony_ci# 312e5b6d6dSopenharmony_ci# Here is the syntax of the state definitions in this file: 322e5b6d6dSopenharmony_ci# 332e5b6d6dSopenharmony_ci# 342e5b6d6dSopenharmony_ci#StateName: 352e5b6d6dSopenharmony_ci# input-char n next-state ^push-state action 362e5b6d6dSopenharmony_ci# input-char n next-state ^push-state action 372e5b6d6dSopenharmony_ci# | | | | | 382e5b6d6dSopenharmony_ci# | | | | |--- action to be performed by state machine 392e5b6d6dSopenharmony_ci# | | | | See function RBBIRuleScanner::doParseActions() 402e5b6d6dSopenharmony_ci# | | | | 412e5b6d6dSopenharmony_ci# | | | |--- Push this named state onto the state stack. 422e5b6d6dSopenharmony_ci# | | | Later, when next state is specified as "pop", 432e5b6d6dSopenharmony_ci# | | | the pushed state will become the current state. 442e5b6d6dSopenharmony_ci# | | | 452e5b6d6dSopenharmony_ci# | | |--- Transition to this state if the current input character matches the input 462e5b6d6dSopenharmony_ci# | | character or char class in the left hand column. "pop" causes the next 472e5b6d6dSopenharmony_ci# | | state to be popped from the state stack. 482e5b6d6dSopenharmony_ci# | | 492e5b6d6dSopenharmony_ci# | |--- When making the state transition specified on this line, advance to the next 502e5b6d6dSopenharmony_ci# | character from the input only if 'n' appears here. 512e5b6d6dSopenharmony_ci# | 522e5b6d6dSopenharmony_ci# |--- Character or named character classes to test for. If the current character being scanned 532e5b6d6dSopenharmony_ci# matches, perform the actions and go to the state specified on this line. 542e5b6d6dSopenharmony_ci# The input character is tested sequentally, in the order written. The characters and 552e5b6d6dSopenharmony_ci# character classes tested for do not need to be mutually exclusive. The first match wins. 562e5b6d6dSopenharmony_ci# 572e5b6d6dSopenharmony_ci 582e5b6d6dSopenharmony_ci 592e5b6d6dSopenharmony_ci 602e5b6d6dSopenharmony_ci 612e5b6d6dSopenharmony_ci# 622e5b6d6dSopenharmony_ci# start state, scan position is at the beginning of the rules file, or in between two rules. 632e5b6d6dSopenharmony_ci# 642e5b6d6dSopenharmony_cistart: 652e5b6d6dSopenharmony_ci escaped term ^break-rule-end doExprStart 662e5b6d6dSopenharmony_ci white_space n start 672e5b6d6dSopenharmony_ci '^' n start-after-caret ^break-rule-end doNoChain 682e5b6d6dSopenharmony_ci '$' scan-var-name ^assign-or-rule doExprStart 692e5b6d6dSopenharmony_ci '!' n rev-option 702e5b6d6dSopenharmony_ci ';' n start # ignore empty rules. 712e5b6d6dSopenharmony_ci eof exit 722e5b6d6dSopenharmony_ci default term ^break-rule-end doExprStart 732e5b6d6dSopenharmony_ci 742e5b6d6dSopenharmony_ci# 752e5b6d6dSopenharmony_ci# break-rule-end: Returned from doing a break-rule expression. 762e5b6d6dSopenharmony_ci# 772e5b6d6dSopenharmony_cibreak-rule-end: 782e5b6d6dSopenharmony_ci ';' n start doEndOfRule 792e5b6d6dSopenharmony_ci white_space n break-rule-end 802e5b6d6dSopenharmony_ci default errorDeath doRuleError 812e5b6d6dSopenharmony_ci 822e5b6d6dSopenharmony_ci# 832e5b6d6dSopenharmony_ci# start of a rule, after having seen a '^' (inhibits rule chain in). 842e5b6d6dSopenharmony_ci# Similar to the main 'start' state in most respects, except 852e5b6d6dSopenharmony_ci# - empty rule is an error. 862e5b6d6dSopenharmony_ci# - A second '^' is an error. 872e5b6d6dSopenharmony_ci# 882e5b6d6dSopenharmony_cistart-after-caret: 892e5b6d6dSopenharmony_ci escaped term doExprStart 902e5b6d6dSopenharmony_ci white_space n start-after-caret 912e5b6d6dSopenharmony_ci '^' errorDeath doRuleError # two '^'s 922e5b6d6dSopenharmony_ci '$' scan-var-name ^term-var-ref doExprStart 932e5b6d6dSopenharmony_ci ';' errorDeath doRuleError # ^ ; 942e5b6d6dSopenharmony_ci eof errorDeath doRuleError 952e5b6d6dSopenharmony_ci default term doExprStart 962e5b6d6dSopenharmony_ci 972e5b6d6dSopenharmony_ci# 982e5b6d6dSopenharmony_ci# ! We've just scanned a '!', indicating either a !!key word flag or a 992e5b6d6dSopenharmony_ci# !Reverse rule. 1002e5b6d6dSopenharmony_ci# 1012e5b6d6dSopenharmony_cirev-option: 1022e5b6d6dSopenharmony_ci '!' n option-scan1 1032e5b6d6dSopenharmony_ci default reverse-rule ^break-rule-end doReverseDir 1042e5b6d6dSopenharmony_ci 1052e5b6d6dSopenharmony_cioption-scan1: 1062e5b6d6dSopenharmony_ci name_start_char n option-scan2 doOptionStart 1072e5b6d6dSopenharmony_ci default errorDeath doRuleError 1082e5b6d6dSopenharmony_ci 1092e5b6d6dSopenharmony_cioption-scan2: 1102e5b6d6dSopenharmony_ci name_char n option-scan2 1112e5b6d6dSopenharmony_ci default option-scan3 doOptionEnd 1122e5b6d6dSopenharmony_ci 1132e5b6d6dSopenharmony_cioption-scan3: 1142e5b6d6dSopenharmony_ci ';' n start 1152e5b6d6dSopenharmony_ci white_space n option-scan3 1162e5b6d6dSopenharmony_ci default errorDeath doRuleError 1172e5b6d6dSopenharmony_ci 1182e5b6d6dSopenharmony_ci 1192e5b6d6dSopenharmony_cireverse-rule: 1202e5b6d6dSopenharmony_ci default term ^break-rule-end doExprStart 1212e5b6d6dSopenharmony_ci 1222e5b6d6dSopenharmony_ci 1232e5b6d6dSopenharmony_ci# 1242e5b6d6dSopenharmony_ci# term. Eat through a single rule character, or a composite thing, which 1252e5b6d6dSopenharmony_ci# could be a parenthesized expression, a variable name, or a Unicode Set. 1262e5b6d6dSopenharmony_ci# 1272e5b6d6dSopenharmony_citerm: 1282e5b6d6dSopenharmony_ci escaped n expr-mod doRuleChar 1292e5b6d6dSopenharmony_ci white_space n term 1302e5b6d6dSopenharmony_ci rule_char n expr-mod doRuleChar 1312e5b6d6dSopenharmony_ci '[' scan-unicode-set ^expr-mod 1322e5b6d6dSopenharmony_ci '(' n term ^expr-mod doLParen 1332e5b6d6dSopenharmony_ci '$' scan-var-name ^term-var-ref 1342e5b6d6dSopenharmony_ci '.' n expr-mod doDotAny 1352e5b6d6dSopenharmony_ci default errorDeath doRuleError 1362e5b6d6dSopenharmony_ci 1372e5b6d6dSopenharmony_ci 1382e5b6d6dSopenharmony_ci 1392e5b6d6dSopenharmony_ci# 1402e5b6d6dSopenharmony_ci# term-var-ref We've just finished scanning a reference to a $variable. 1412e5b6d6dSopenharmony_ci# Check that the variable was defined. 1422e5b6d6dSopenharmony_ci# The variable name scanning is in common with assignment statements, 1432e5b6d6dSopenharmony_ci# so the check can't be done there. 1442e5b6d6dSopenharmony_citerm-var-ref: 1452e5b6d6dSopenharmony_ci default expr-mod doCheckVarDef 1462e5b6d6dSopenharmony_ci 1472e5b6d6dSopenharmony_ci 1482e5b6d6dSopenharmony_ci# 1492e5b6d6dSopenharmony_ci# expr-mod We've just finished scanning a term, now look for the optional 1502e5b6d6dSopenharmony_ci# trailing '*', '?', '+' 1512e5b6d6dSopenharmony_ci# 1522e5b6d6dSopenharmony_ciexpr-mod: 1532e5b6d6dSopenharmony_ci white_space n expr-mod 1542e5b6d6dSopenharmony_ci '*' n expr-cont doUnaryOpStar 1552e5b6d6dSopenharmony_ci '+' n expr-cont doUnaryOpPlus 1562e5b6d6dSopenharmony_ci '?' n expr-cont doUnaryOpQuestion 1572e5b6d6dSopenharmony_ci default expr-cont 1582e5b6d6dSopenharmony_ci 1592e5b6d6dSopenharmony_ci 1602e5b6d6dSopenharmony_ci# 1612e5b6d6dSopenharmony_ci# expr-cont Expression, continuation. At a point where additional terms are 1622e5b6d6dSopenharmony_ci# allowed, but not required. 1632e5b6d6dSopenharmony_ci# 1642e5b6d6dSopenharmony_ciexpr-cont: 1652e5b6d6dSopenharmony_ci escaped term doExprCatOperator 1662e5b6d6dSopenharmony_ci white_space n expr-cont 1672e5b6d6dSopenharmony_ci rule_char term doExprCatOperator 1682e5b6d6dSopenharmony_ci '[' term doExprCatOperator 1692e5b6d6dSopenharmony_ci '(' term doExprCatOperator 1702e5b6d6dSopenharmony_ci '$' term doExprCatOperator 1712e5b6d6dSopenharmony_ci '.' term doExprCatOperator 1722e5b6d6dSopenharmony_ci '/' look-ahead doExprCatOperator 1732e5b6d6dSopenharmony_ci '{' n tag-open doExprCatOperator 1742e5b6d6dSopenharmony_ci '|' n term doExprOrOperator 1752e5b6d6dSopenharmony_ci ')' n pop doExprRParen 1762e5b6d6dSopenharmony_ci default pop doExprFinished 1772e5b6d6dSopenharmony_ci 1782e5b6d6dSopenharmony_ci 1792e5b6d6dSopenharmony_ci# 1802e5b6d6dSopenharmony_ci# look-ahead Scanning a '/', which identifies a break point, assuming that the 1812e5b6d6dSopenharmony_ci# remainder of the expression matches. 1822e5b6d6dSopenharmony_ci# 1832e5b6d6dSopenharmony_ci# Generate a parse tree as if this was a special kind of input symbol 1842e5b6d6dSopenharmony_ci# appearing in an otherwise normal concatenation expression. 1852e5b6d6dSopenharmony_ci# 1862e5b6d6dSopenharmony_cilook-ahead: 1872e5b6d6dSopenharmony_ci '/' n expr-cont-no-slash doSlash 1882e5b6d6dSopenharmony_ci default errorDeath 1892e5b6d6dSopenharmony_ci 1902e5b6d6dSopenharmony_ci 1912e5b6d6dSopenharmony_ci# 1922e5b6d6dSopenharmony_ci# expr-cont-no-slash Expression, continuation. At a point where additional terms are 1932e5b6d6dSopenharmony_ci# allowed, but not required. Just like 1942e5b6d6dSopenharmony_ci# expr-cont, above, except that no '/' 1952e5b6d6dSopenharmony_ci# look-ahead symbol is permitted. 1962e5b6d6dSopenharmony_ci# 1972e5b6d6dSopenharmony_ciexpr-cont-no-slash: 1982e5b6d6dSopenharmony_ci escaped term doExprCatOperator 1992e5b6d6dSopenharmony_ci white_space n expr-cont 2002e5b6d6dSopenharmony_ci rule_char term doExprCatOperator 2012e5b6d6dSopenharmony_ci '[' term doExprCatOperator 2022e5b6d6dSopenharmony_ci '(' term doExprCatOperator 2032e5b6d6dSopenharmony_ci '$' term doExprCatOperator 2042e5b6d6dSopenharmony_ci '.' term doExprCatOperator 2052e5b6d6dSopenharmony_ci '|' n term doExprOrOperator 2062e5b6d6dSopenharmony_ci ')' n pop doExprRParen 2072e5b6d6dSopenharmony_ci default pop doExprFinished 2082e5b6d6dSopenharmony_ci 2092e5b6d6dSopenharmony_ci 2102e5b6d6dSopenharmony_ci# 2112e5b6d6dSopenharmony_ci# tags scanning a '{', the opening delimiter for a tag that identifies 2122e5b6d6dSopenharmony_ci# the kind of match. Scan the whole {dddd} tag, where d=digit 2132e5b6d6dSopenharmony_ci# 2142e5b6d6dSopenharmony_citag-open: 2152e5b6d6dSopenharmony_ci white_space n tag-open 2162e5b6d6dSopenharmony_ci digit_char tag-value doStartTagValue 2172e5b6d6dSopenharmony_ci default errorDeath doTagExpectedError 2182e5b6d6dSopenharmony_ci 2192e5b6d6dSopenharmony_citag-value: 2202e5b6d6dSopenharmony_ci white_space n tag-close 2212e5b6d6dSopenharmony_ci '}' tag-close 2222e5b6d6dSopenharmony_ci digit_char n tag-value doTagDigit 2232e5b6d6dSopenharmony_ci default errorDeath doTagExpectedError 2242e5b6d6dSopenharmony_ci 2252e5b6d6dSopenharmony_citag-close: 2262e5b6d6dSopenharmony_ci white_space n tag-close 2272e5b6d6dSopenharmony_ci '}' n expr-cont-no-tag doTagValue 2282e5b6d6dSopenharmony_ci default errorDeath doTagExpectedError 2292e5b6d6dSopenharmony_ci 2302e5b6d6dSopenharmony_ci 2312e5b6d6dSopenharmony_ci 2322e5b6d6dSopenharmony_ci# 2332e5b6d6dSopenharmony_ci# expr-cont-no-tag Expression, continuation. At a point where additional terms are 2342e5b6d6dSopenharmony_ci# allowed, but not required. Just like 2352e5b6d6dSopenharmony_ci# expr-cont, above, except that no "{ddd}" 2362e5b6d6dSopenharmony_ci# tagging is permitted. 2372e5b6d6dSopenharmony_ci# 2382e5b6d6dSopenharmony_ciexpr-cont-no-tag: 2392e5b6d6dSopenharmony_ci escaped term doExprCatOperator 2402e5b6d6dSopenharmony_ci white_space n expr-cont-no-tag 2412e5b6d6dSopenharmony_ci rule_char term doExprCatOperator 2422e5b6d6dSopenharmony_ci '[' term doExprCatOperator 2432e5b6d6dSopenharmony_ci '(' term doExprCatOperator 2442e5b6d6dSopenharmony_ci '$' term doExprCatOperator 2452e5b6d6dSopenharmony_ci '.' term doExprCatOperator 2462e5b6d6dSopenharmony_ci '/' look-ahead doExprCatOperator 2472e5b6d6dSopenharmony_ci '|' n term doExprOrOperator 2482e5b6d6dSopenharmony_ci ')' n pop doExprRParen 2492e5b6d6dSopenharmony_ci default pop doExprFinished 2502e5b6d6dSopenharmony_ci 2512e5b6d6dSopenharmony_ci 2522e5b6d6dSopenharmony_ci 2532e5b6d6dSopenharmony_ci 2542e5b6d6dSopenharmony_ci# 2552e5b6d6dSopenharmony_ci# Variable Name Scanning. 2562e5b6d6dSopenharmony_ci# 2572e5b6d6dSopenharmony_ci# The state that branched to here must have pushed a return state 2582e5b6d6dSopenharmony_ci# to go to after completion of the variable name scanning. 2592e5b6d6dSopenharmony_ci# 2602e5b6d6dSopenharmony_ci# The current input character must be the $ that introduces the name. 2612e5b6d6dSopenharmony_ci# The $ is consumed here rather than in the state that first detected it 2622e5b6d6dSopenharmony_ci# so that the doStartVariableName action only needs to happen in one 2632e5b6d6dSopenharmony_ci# place (here), and the other states don't need to worry about it. 2642e5b6d6dSopenharmony_ci# 2652e5b6d6dSopenharmony_ciscan-var-name: 2662e5b6d6dSopenharmony_ci '$' n scan-var-start doStartVariableName 2672e5b6d6dSopenharmony_ci default errorDeath 2682e5b6d6dSopenharmony_ci 2692e5b6d6dSopenharmony_ci 2702e5b6d6dSopenharmony_ciscan-var-start: 2712e5b6d6dSopenharmony_ci name_start_char n scan-var-body 2722e5b6d6dSopenharmony_ci default errorDeath doVariableNameExpectedErr 2732e5b6d6dSopenharmony_ci 2742e5b6d6dSopenharmony_ciscan-var-body: 2752e5b6d6dSopenharmony_ci name_char n scan-var-body 2762e5b6d6dSopenharmony_ci default pop doEndVariableName 2772e5b6d6dSopenharmony_ci 2782e5b6d6dSopenharmony_ci 2792e5b6d6dSopenharmony_ci 2802e5b6d6dSopenharmony_ci# 2812e5b6d6dSopenharmony_ci# scan-unicode-set Unicode Sets are parsed by the the UnicodeSet class. 2822e5b6d6dSopenharmony_ci# Within the RBBI parser, after finding the first character 2832e5b6d6dSopenharmony_ci# of a Unicode Set, we just hand the rule input at that 2842e5b6d6dSopenharmony_ci# point of to the Unicode Set constructor, then pick 2852e5b6d6dSopenharmony_ci# up parsing after the close of the set. 2862e5b6d6dSopenharmony_ci# 2872e5b6d6dSopenharmony_ci# The action for this state invokes the UnicodeSet parser. 2882e5b6d6dSopenharmony_ci# 2892e5b6d6dSopenharmony_ciscan-unicode-set: 2902e5b6d6dSopenharmony_ci '[' n pop doScanUnicodeSet 2912e5b6d6dSopenharmony_ci 'p' n pop doScanUnicodeSet 2922e5b6d6dSopenharmony_ci 'P' n pop doScanUnicodeSet 2932e5b6d6dSopenharmony_ci default errorDeath 2942e5b6d6dSopenharmony_ci 2952e5b6d6dSopenharmony_ci 2962e5b6d6dSopenharmony_ci 2972e5b6d6dSopenharmony_ci 2982e5b6d6dSopenharmony_ci 2992e5b6d6dSopenharmony_ci 3002e5b6d6dSopenharmony_ci 3012e5b6d6dSopenharmony_ci# 3022e5b6d6dSopenharmony_ci# assign-or-rule. A $variable was encountered at the start of something, could be 3032e5b6d6dSopenharmony_ci# either an assignment statement or a rule, depending on whether an '=' 3042e5b6d6dSopenharmony_ci# follows the variable name. We get to this state when the variable name 3052e5b6d6dSopenharmony_ci# scanning does a return. 3062e5b6d6dSopenharmony_ci# 3072e5b6d6dSopenharmony_ciassign-or-rule: 3082e5b6d6dSopenharmony_ci white_space n assign-or-rule 3092e5b6d6dSopenharmony_ci '=' n term ^assign-end doStartAssign # variable was target of assignment 3102e5b6d6dSopenharmony_ci default term-var-ref ^break-rule-end # variable was a term in a rule 3112e5b6d6dSopenharmony_ci 3122e5b6d6dSopenharmony_ci 3132e5b6d6dSopenharmony_ci 3142e5b6d6dSopenharmony_ci# 3152e5b6d6dSopenharmony_ci# assign-end This state is entered when the end of the expression on the 3162e5b6d6dSopenharmony_ci# right hand side of an assignment is found. We get here via 3172e5b6d6dSopenharmony_ci# a pop; this state is pushed when the '=' in an assignment is found. 3182e5b6d6dSopenharmony_ci# 3192e5b6d6dSopenharmony_ci# The only thing allowed at this point is a ';'. The RHS of an 3202e5b6d6dSopenharmony_ci# assignment must look like a rule expression, and we come here 3212e5b6d6dSopenharmony_ci# when what is being scanned no longer looks like an expression. 3222e5b6d6dSopenharmony_ci# 3232e5b6d6dSopenharmony_ciassign-end: 3242e5b6d6dSopenharmony_ci ';' n start doEndAssign 3252e5b6d6dSopenharmony_ci default errorDeath doRuleErrorAssignExpr 3262e5b6d6dSopenharmony_ci 3272e5b6d6dSopenharmony_ci 3282e5b6d6dSopenharmony_ci 3292e5b6d6dSopenharmony_ci# 3302e5b6d6dSopenharmony_ci# errorDeath. This state is specified as the next state whenever a syntax error 3312e5b6d6dSopenharmony_ci# in the source rules is detected. Barring bugs, the state machine will never 3322e5b6d6dSopenharmony_ci# actually get here, but will stop because of the action associated with the error. 3332e5b6d6dSopenharmony_ci# But, just in case, this state asks the state machine to exit. 3342e5b6d6dSopenharmony_cierrorDeath: 3352e5b6d6dSopenharmony_ci default n errorDeath doExit 3362e5b6d6dSopenharmony_ci 3372e5b6d6dSopenharmony_ci 338