11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 31cb0ef41Sopenharmony_ci// 41cb0ef41Sopenharmony_ci// file: rbbirb.cpp 51cb0ef41Sopenharmony_ci// 61cb0ef41Sopenharmony_ci// Copyright (C) 2002-2011, International Business Machines Corporation and others. 71cb0ef41Sopenharmony_ci// All Rights Reserved. 81cb0ef41Sopenharmony_ci// 91cb0ef41Sopenharmony_ci// This file contains the RBBIRuleBuilder class implementation. This is the main class for 101cb0ef41Sopenharmony_ci// building (compiling) break rules into the tables required by the runtime 111cb0ef41Sopenharmony_ci// RBBI engine. 121cb0ef41Sopenharmony_ci// 131cb0ef41Sopenharmony_ci 141cb0ef41Sopenharmony_ci#include "unicode/utypes.h" 151cb0ef41Sopenharmony_ci 161cb0ef41Sopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION 171cb0ef41Sopenharmony_ci 181cb0ef41Sopenharmony_ci#include "unicode/brkiter.h" 191cb0ef41Sopenharmony_ci#include "unicode/rbbi.h" 201cb0ef41Sopenharmony_ci#include "unicode/ubrk.h" 211cb0ef41Sopenharmony_ci#include "unicode/unistr.h" 221cb0ef41Sopenharmony_ci#include "unicode/uniset.h" 231cb0ef41Sopenharmony_ci#include "unicode/uchar.h" 241cb0ef41Sopenharmony_ci#include "unicode/uchriter.h" 251cb0ef41Sopenharmony_ci#include "unicode/ustring.h" 261cb0ef41Sopenharmony_ci#include "unicode/parsepos.h" 271cb0ef41Sopenharmony_ci#include "unicode/parseerr.h" 281cb0ef41Sopenharmony_ci 291cb0ef41Sopenharmony_ci#include "cmemory.h" 301cb0ef41Sopenharmony_ci#include "cstring.h" 311cb0ef41Sopenharmony_ci#include "rbbirb.h" 321cb0ef41Sopenharmony_ci#include "rbbinode.h" 331cb0ef41Sopenharmony_ci#include "rbbiscan.h" 341cb0ef41Sopenharmony_ci#include "rbbisetb.h" 351cb0ef41Sopenharmony_ci#include "rbbitblb.h" 361cb0ef41Sopenharmony_ci#include "rbbidata.h" 371cb0ef41Sopenharmony_ci#include "uassert.h" 381cb0ef41Sopenharmony_ci 391cb0ef41Sopenharmony_ci 401cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN 411cb0ef41Sopenharmony_ci 421cb0ef41Sopenharmony_ci 431cb0ef41Sopenharmony_ci//---------------------------------------------------------------------------------------- 441cb0ef41Sopenharmony_ci// 451cb0ef41Sopenharmony_ci// Constructor. 461cb0ef41Sopenharmony_ci// 471cb0ef41Sopenharmony_ci//---------------------------------------------------------------------------------------- 481cb0ef41Sopenharmony_ciRBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules, 491cb0ef41Sopenharmony_ci UParseError *parseErr, 501cb0ef41Sopenharmony_ci UErrorCode &status) 511cb0ef41Sopenharmony_ci : fRules(rules), fStrippedRules(rules) 521cb0ef41Sopenharmony_ci{ 531cb0ef41Sopenharmony_ci fStatus = &status; // status is checked below 541cb0ef41Sopenharmony_ci fParseError = parseErr; 551cb0ef41Sopenharmony_ci fDebugEnv = nullptr; 561cb0ef41Sopenharmony_ci#ifdef RBBI_DEBUG 571cb0ef41Sopenharmony_ci fDebugEnv = getenv("U_RBBIDEBUG"); 581cb0ef41Sopenharmony_ci#endif 591cb0ef41Sopenharmony_ci 601cb0ef41Sopenharmony_ci 611cb0ef41Sopenharmony_ci fForwardTree = nullptr; 621cb0ef41Sopenharmony_ci fReverseTree = nullptr; 631cb0ef41Sopenharmony_ci fSafeFwdTree = nullptr; 641cb0ef41Sopenharmony_ci fSafeRevTree = nullptr; 651cb0ef41Sopenharmony_ci fDefaultTree = &fForwardTree; 661cb0ef41Sopenharmony_ci fForwardTable = nullptr; 671cb0ef41Sopenharmony_ci fRuleStatusVals = nullptr; 681cb0ef41Sopenharmony_ci fChainRules = false; 691cb0ef41Sopenharmony_ci fLookAheadHardBreak = false; 701cb0ef41Sopenharmony_ci fUSetNodes = nullptr; 711cb0ef41Sopenharmony_ci fRuleStatusVals = nullptr; 721cb0ef41Sopenharmony_ci fScanner = nullptr; 731cb0ef41Sopenharmony_ci fSetBuilder = nullptr; 741cb0ef41Sopenharmony_ci if (parseErr) { 751cb0ef41Sopenharmony_ci uprv_memset(parseErr, 0, sizeof(UParseError)); 761cb0ef41Sopenharmony_ci } 771cb0ef41Sopenharmony_ci 781cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { 791cb0ef41Sopenharmony_ci return; 801cb0ef41Sopenharmony_ci } 811cb0ef41Sopenharmony_ci 821cb0ef41Sopenharmony_ci fUSetNodes = new UVector(status); // bcos status gets overwritten here 831cb0ef41Sopenharmony_ci fRuleStatusVals = new UVector(status); 841cb0ef41Sopenharmony_ci fScanner = new RBBIRuleScanner(this); 851cb0ef41Sopenharmony_ci fSetBuilder = new RBBISetBuilder(this); 861cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { 871cb0ef41Sopenharmony_ci return; 881cb0ef41Sopenharmony_ci } 891cb0ef41Sopenharmony_ci if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) { 901cb0ef41Sopenharmony_ci status = U_MEMORY_ALLOCATION_ERROR; 911cb0ef41Sopenharmony_ci } 921cb0ef41Sopenharmony_ci} 931cb0ef41Sopenharmony_ci 941cb0ef41Sopenharmony_ci 951cb0ef41Sopenharmony_ci 961cb0ef41Sopenharmony_ci//---------------------------------------------------------------------------------------- 971cb0ef41Sopenharmony_ci// 981cb0ef41Sopenharmony_ci// Destructor 991cb0ef41Sopenharmony_ci// 1001cb0ef41Sopenharmony_ci//---------------------------------------------------------------------------------------- 1011cb0ef41Sopenharmony_ciRBBIRuleBuilder::~RBBIRuleBuilder() { 1021cb0ef41Sopenharmony_ci 1031cb0ef41Sopenharmony_ci int i; 1041cb0ef41Sopenharmony_ci for (i=0; ; i++) { 1051cb0ef41Sopenharmony_ci RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i); 1061cb0ef41Sopenharmony_ci if (n==nullptr) { 1071cb0ef41Sopenharmony_ci break; 1081cb0ef41Sopenharmony_ci } 1091cb0ef41Sopenharmony_ci delete n; 1101cb0ef41Sopenharmony_ci } 1111cb0ef41Sopenharmony_ci 1121cb0ef41Sopenharmony_ci delete fUSetNodes; 1131cb0ef41Sopenharmony_ci delete fSetBuilder; 1141cb0ef41Sopenharmony_ci delete fForwardTable; 1151cb0ef41Sopenharmony_ci delete fForwardTree; 1161cb0ef41Sopenharmony_ci delete fReverseTree; 1171cb0ef41Sopenharmony_ci delete fSafeFwdTree; 1181cb0ef41Sopenharmony_ci delete fSafeRevTree; 1191cb0ef41Sopenharmony_ci delete fScanner; 1201cb0ef41Sopenharmony_ci delete fRuleStatusVals; 1211cb0ef41Sopenharmony_ci} 1221cb0ef41Sopenharmony_ci 1231cb0ef41Sopenharmony_ci 1241cb0ef41Sopenharmony_ci 1251cb0ef41Sopenharmony_ci 1261cb0ef41Sopenharmony_ci 1271cb0ef41Sopenharmony_ci//---------------------------------------------------------------------------------------- 1281cb0ef41Sopenharmony_ci// 1291cb0ef41Sopenharmony_ci// flattenData() - Collect up the compiled RBBI rule data and put it into 1301cb0ef41Sopenharmony_ci// the format for saving in ICU data files, 1311cb0ef41Sopenharmony_ci// which is also the format needed by the RBBI runtime engine. 1321cb0ef41Sopenharmony_ci// 1331cb0ef41Sopenharmony_ci//---------------------------------------------------------------------------------------- 1341cb0ef41Sopenharmony_cistatic int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;} 1351cb0ef41Sopenharmony_ci 1361cb0ef41Sopenharmony_ciRBBIDataHeader *RBBIRuleBuilder::flattenData() { 1371cb0ef41Sopenharmony_ci int32_t i; 1381cb0ef41Sopenharmony_ci 1391cb0ef41Sopenharmony_ci if (U_FAILURE(*fStatus)) { 1401cb0ef41Sopenharmony_ci return nullptr; 1411cb0ef41Sopenharmony_ci } 1421cb0ef41Sopenharmony_ci 1431cb0ef41Sopenharmony_ci // Remove whitespace from the rules to make it smaller. 1441cb0ef41Sopenharmony_ci // The rule parser has already removed comments. 1451cb0ef41Sopenharmony_ci fStrippedRules = fScanner->stripRules(fStrippedRules); 1461cb0ef41Sopenharmony_ci 1471cb0ef41Sopenharmony_ci // Calculate the size of each section in the data. 1481cb0ef41Sopenharmony_ci // Sizes here are padded up to a multiple of 8 for better memory alignment. 1491cb0ef41Sopenharmony_ci // Sections sizes actually stored in the header are for the actual data 1501cb0ef41Sopenharmony_ci // without the padding. 1511cb0ef41Sopenharmony_ci // 1521cb0ef41Sopenharmony_ci int32_t headerSize = align8(sizeof(RBBIDataHeader)); 1531cb0ef41Sopenharmony_ci int32_t forwardTableSize = align8(fForwardTable->getTableSize()); 1541cb0ef41Sopenharmony_ci int32_t reverseTableSize = align8(fForwardTable->getSafeTableSize()); 1551cb0ef41Sopenharmony_ci int32_t trieSize = align8(fSetBuilder->getTrieSize()); 1561cb0ef41Sopenharmony_ci int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t)); 1571cb0ef41Sopenharmony_ci 1581cb0ef41Sopenharmony_ci int32_t rulesLengthInUTF8 = 0; 1591cb0ef41Sopenharmony_ci u_strToUTF8WithSub(0, 0, &rulesLengthInUTF8, 1601cb0ef41Sopenharmony_ci fStrippedRules.getBuffer(), fStrippedRules.length(), 1611cb0ef41Sopenharmony_ci 0xfffd, nullptr, fStatus); 1621cb0ef41Sopenharmony_ci *fStatus = U_ZERO_ERROR; 1631cb0ef41Sopenharmony_ci 1641cb0ef41Sopenharmony_ci int32_t rulesSize = align8((rulesLengthInUTF8+1)); 1651cb0ef41Sopenharmony_ci 1661cb0ef41Sopenharmony_ci int32_t totalSize = headerSize 1671cb0ef41Sopenharmony_ci + forwardTableSize 1681cb0ef41Sopenharmony_ci + reverseTableSize 1691cb0ef41Sopenharmony_ci + statusTableSize + trieSize + rulesSize; 1701cb0ef41Sopenharmony_ci 1711cb0ef41Sopenharmony_ci#ifdef RBBI_DEBUG 1721cb0ef41Sopenharmony_ci if (fDebugEnv && uprv_strstr(fDebugEnv, "size")) { 1731cb0ef41Sopenharmony_ci RBBIDebugPrintf("Header Size: %8d\n", headerSize); 1741cb0ef41Sopenharmony_ci RBBIDebugPrintf("Forward Table Size: %8d\n", forwardTableSize); 1751cb0ef41Sopenharmony_ci RBBIDebugPrintf("Reverse Table Size: %8d\n", reverseTableSize); 1761cb0ef41Sopenharmony_ci RBBIDebugPrintf("Trie Size: %8d\n", trieSize); 1771cb0ef41Sopenharmony_ci RBBIDebugPrintf("Status Table Size: %8d\n", statusTableSize); 1781cb0ef41Sopenharmony_ci RBBIDebugPrintf("Rules Size: %8d\n", rulesSize); 1791cb0ef41Sopenharmony_ci RBBIDebugPrintf("-----------------------------\n"); 1801cb0ef41Sopenharmony_ci RBBIDebugPrintf("Total Size: %8d\n", totalSize); 1811cb0ef41Sopenharmony_ci } 1821cb0ef41Sopenharmony_ci#endif 1831cb0ef41Sopenharmony_ci 1841cb0ef41Sopenharmony_ci RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize); 1851cb0ef41Sopenharmony_ci if (data == nullptr) { 1861cb0ef41Sopenharmony_ci *fStatus = U_MEMORY_ALLOCATION_ERROR; 1871cb0ef41Sopenharmony_ci return nullptr; 1881cb0ef41Sopenharmony_ci } 1891cb0ef41Sopenharmony_ci uprv_memset(data, 0, totalSize); 1901cb0ef41Sopenharmony_ci 1911cb0ef41Sopenharmony_ci 1921cb0ef41Sopenharmony_ci data->fMagic = 0xb1a0; 1931cb0ef41Sopenharmony_ci data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0]; 1941cb0ef41Sopenharmony_ci data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1]; 1951cb0ef41Sopenharmony_ci data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2]; 1961cb0ef41Sopenharmony_ci data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3]; 1971cb0ef41Sopenharmony_ci data->fLength = totalSize; 1981cb0ef41Sopenharmony_ci data->fCatCount = fSetBuilder->getNumCharCategories(); 1991cb0ef41Sopenharmony_ci 2001cb0ef41Sopenharmony_ci data->fFTable = headerSize; 2011cb0ef41Sopenharmony_ci data->fFTableLen = forwardTableSize; 2021cb0ef41Sopenharmony_ci 2031cb0ef41Sopenharmony_ci data->fRTable = data->fFTable + data->fFTableLen; 2041cb0ef41Sopenharmony_ci data->fRTableLen = reverseTableSize; 2051cb0ef41Sopenharmony_ci 2061cb0ef41Sopenharmony_ci data->fTrie = data->fRTable + data->fRTableLen; 2071cb0ef41Sopenharmony_ci data->fTrieLen = trieSize; 2081cb0ef41Sopenharmony_ci data->fStatusTable = data->fTrie + data->fTrieLen; 2091cb0ef41Sopenharmony_ci data->fStatusTableLen= statusTableSize; 2101cb0ef41Sopenharmony_ci data->fRuleSource = data->fStatusTable + statusTableSize; 2111cb0ef41Sopenharmony_ci data->fRuleSourceLen = rulesLengthInUTF8; 2121cb0ef41Sopenharmony_ci 2131cb0ef41Sopenharmony_ci uprv_memset(data->fReserved, 0, sizeof(data->fReserved)); 2141cb0ef41Sopenharmony_ci 2151cb0ef41Sopenharmony_ci fForwardTable->exportTable((uint8_t *)data + data->fFTable); 2161cb0ef41Sopenharmony_ci fForwardTable->exportSafeTable((uint8_t *)data + data->fRTable); 2171cb0ef41Sopenharmony_ci fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie); 2181cb0ef41Sopenharmony_ci 2191cb0ef41Sopenharmony_ci int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable); 2201cb0ef41Sopenharmony_ci for (i=0; i<fRuleStatusVals->size(); i++) { 2211cb0ef41Sopenharmony_ci ruleStatusTable[i] = fRuleStatusVals->elementAti(i); 2221cb0ef41Sopenharmony_ci } 2231cb0ef41Sopenharmony_ci 2241cb0ef41Sopenharmony_ci u_strToUTF8WithSub((char *)data+data->fRuleSource, rulesSize, &rulesLengthInUTF8, 2251cb0ef41Sopenharmony_ci fStrippedRules.getBuffer(), fStrippedRules.length(), 2261cb0ef41Sopenharmony_ci 0xfffd, nullptr, fStatus); 2271cb0ef41Sopenharmony_ci if (U_FAILURE(*fStatus)) { 2281cb0ef41Sopenharmony_ci return nullptr; 2291cb0ef41Sopenharmony_ci } 2301cb0ef41Sopenharmony_ci 2311cb0ef41Sopenharmony_ci return data; 2321cb0ef41Sopenharmony_ci} 2331cb0ef41Sopenharmony_ci 2341cb0ef41Sopenharmony_ci 2351cb0ef41Sopenharmony_ci//---------------------------------------------------------------------------------------- 2361cb0ef41Sopenharmony_ci// 2371cb0ef41Sopenharmony_ci// createRuleBasedBreakIterator construct from source rules that are passed in 2381cb0ef41Sopenharmony_ci// in a UnicodeString 2391cb0ef41Sopenharmony_ci// 2401cb0ef41Sopenharmony_ci//---------------------------------------------------------------------------------------- 2411cb0ef41Sopenharmony_ciBreakIterator * 2421cb0ef41Sopenharmony_ciRBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, 2431cb0ef41Sopenharmony_ci UParseError *parseError, 2441cb0ef41Sopenharmony_ci UErrorCode &status) 2451cb0ef41Sopenharmony_ci{ 2461cb0ef41Sopenharmony_ci // 2471cb0ef41Sopenharmony_ci // Read the input rules, generate a parse tree, symbol table, 2481cb0ef41Sopenharmony_ci // and list of all Unicode Sets referenced by the rules. 2491cb0ef41Sopenharmony_ci // 2501cb0ef41Sopenharmony_ci RBBIRuleBuilder builder(rules, parseError, status); 2511cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { // status checked here bcos build below doesn't 2521cb0ef41Sopenharmony_ci return nullptr; 2531cb0ef41Sopenharmony_ci } 2541cb0ef41Sopenharmony_ci 2551cb0ef41Sopenharmony_ci RBBIDataHeader *data = builder.build(status); 2561cb0ef41Sopenharmony_ci 2571cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { 2581cb0ef41Sopenharmony_ci return nullptr; 2591cb0ef41Sopenharmony_ci } 2601cb0ef41Sopenharmony_ci 2611cb0ef41Sopenharmony_ci // 2621cb0ef41Sopenharmony_ci // Create a break iterator from the compiled rules. 2631cb0ef41Sopenharmony_ci // (Identical to creation from stored pre-compiled rules) 2641cb0ef41Sopenharmony_ci // 2651cb0ef41Sopenharmony_ci // status is checked after init in construction. 2661cb0ef41Sopenharmony_ci RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status); 2671cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { 2681cb0ef41Sopenharmony_ci delete This; 2691cb0ef41Sopenharmony_ci This = nullptr; 2701cb0ef41Sopenharmony_ci } 2711cb0ef41Sopenharmony_ci else if(This == nullptr) { // test for nullptr 2721cb0ef41Sopenharmony_ci status = U_MEMORY_ALLOCATION_ERROR; 2731cb0ef41Sopenharmony_ci } 2741cb0ef41Sopenharmony_ci return This; 2751cb0ef41Sopenharmony_ci} 2761cb0ef41Sopenharmony_ci 2771cb0ef41Sopenharmony_ciRBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) { 2781cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { 2791cb0ef41Sopenharmony_ci return nullptr; 2801cb0ef41Sopenharmony_ci } 2811cb0ef41Sopenharmony_ci 2821cb0ef41Sopenharmony_ci fScanner->parse(); 2831cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { 2841cb0ef41Sopenharmony_ci return nullptr; 2851cb0ef41Sopenharmony_ci } 2861cb0ef41Sopenharmony_ci 2871cb0ef41Sopenharmony_ci // 2881cb0ef41Sopenharmony_ci // UnicodeSet processing. 2891cb0ef41Sopenharmony_ci // Munge the Unicode Sets to create an initial set of character categories. 2901cb0ef41Sopenharmony_ci // 2911cb0ef41Sopenharmony_ci fSetBuilder->buildRanges(); 2921cb0ef41Sopenharmony_ci 2931cb0ef41Sopenharmony_ci // 2941cb0ef41Sopenharmony_ci // Generate the DFA state transition table. 2951cb0ef41Sopenharmony_ci // 2961cb0ef41Sopenharmony_ci fForwardTable = new RBBITableBuilder(this, &fForwardTree, status); 2971cb0ef41Sopenharmony_ci if (fForwardTable == nullptr) { 2981cb0ef41Sopenharmony_ci status = U_MEMORY_ALLOCATION_ERROR; 2991cb0ef41Sopenharmony_ci return nullptr; 3001cb0ef41Sopenharmony_ci } 3011cb0ef41Sopenharmony_ci 3021cb0ef41Sopenharmony_ci fForwardTable->buildForwardTable(); 3031cb0ef41Sopenharmony_ci 3041cb0ef41Sopenharmony_ci // State table and character category optimization. 3051cb0ef41Sopenharmony_ci // Merge equivalent rows and columns. 3061cb0ef41Sopenharmony_ci // Note that this process alters the initial set of character categories, 3071cb0ef41Sopenharmony_ci // causing the representation of UnicodeSets in the parse tree to become invalid. 3081cb0ef41Sopenharmony_ci 3091cb0ef41Sopenharmony_ci optimizeTables(); 3101cb0ef41Sopenharmony_ci fForwardTable->buildSafeReverseTable(status); 3111cb0ef41Sopenharmony_ci 3121cb0ef41Sopenharmony_ci 3131cb0ef41Sopenharmony_ci#ifdef RBBI_DEBUG 3141cb0ef41Sopenharmony_ci if (fDebugEnv && uprv_strstr(fDebugEnv, "states")) { 3151cb0ef41Sopenharmony_ci fForwardTable->printStates(); 3161cb0ef41Sopenharmony_ci fForwardTable->printRuleStatusTable(); 3171cb0ef41Sopenharmony_ci fForwardTable->printReverseTable(); 3181cb0ef41Sopenharmony_ci } 3191cb0ef41Sopenharmony_ci#endif 3201cb0ef41Sopenharmony_ci 3211cb0ef41Sopenharmony_ci // Generate the mapping tables (TRIE) from input code points to 3221cb0ef41Sopenharmony_ci // the character categories. 3231cb0ef41Sopenharmony_ci // 3241cb0ef41Sopenharmony_ci fSetBuilder->buildTrie(); 3251cb0ef41Sopenharmony_ci 3261cb0ef41Sopenharmony_ci // 3271cb0ef41Sopenharmony_ci // Package up the compiled data into a memory image 3281cb0ef41Sopenharmony_ci // in the run-time format. 3291cb0ef41Sopenharmony_ci // 3301cb0ef41Sopenharmony_ci RBBIDataHeader *data = flattenData(); // returns nullptr if error 3311cb0ef41Sopenharmony_ci if (U_FAILURE(status)) { 3321cb0ef41Sopenharmony_ci return nullptr; 3331cb0ef41Sopenharmony_ci } 3341cb0ef41Sopenharmony_ci return data; 3351cb0ef41Sopenharmony_ci} 3361cb0ef41Sopenharmony_ci 3371cb0ef41Sopenharmony_civoid RBBIRuleBuilder::optimizeTables() { 3381cb0ef41Sopenharmony_ci bool didSomething; 3391cb0ef41Sopenharmony_ci do { 3401cb0ef41Sopenharmony_ci didSomething = false; 3411cb0ef41Sopenharmony_ci 3421cb0ef41Sopenharmony_ci // Begin looking for duplicates with char class 3. 3431cb0ef41Sopenharmony_ci // Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively, 3441cb0ef41Sopenharmony_ci // and should not have other categories merged into them. 3451cb0ef41Sopenharmony_ci IntPair duplPair = {3, 0}; 3461cb0ef41Sopenharmony_ci while (fForwardTable->findDuplCharClassFrom(&duplPair)) { 3471cb0ef41Sopenharmony_ci fSetBuilder->mergeCategories(duplPair); 3481cb0ef41Sopenharmony_ci fForwardTable->removeColumn(duplPair.second); 3491cb0ef41Sopenharmony_ci didSomething = true; 3501cb0ef41Sopenharmony_ci } 3511cb0ef41Sopenharmony_ci 3521cb0ef41Sopenharmony_ci while (fForwardTable->removeDuplicateStates() > 0) { 3531cb0ef41Sopenharmony_ci didSomething = true; 3541cb0ef41Sopenharmony_ci } 3551cb0ef41Sopenharmony_ci } while (didSomething); 3561cb0ef41Sopenharmony_ci} 3571cb0ef41Sopenharmony_ci 3581cb0ef41Sopenharmony_ciU_NAMESPACE_END 3591cb0ef41Sopenharmony_ci 3601cb0ef41Sopenharmony_ci#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 361