11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
31cb0ef41Sopenharmony_ci//
41cb0ef41Sopenharmony_ci//  file:  rbbirb.cpp
51cb0ef41Sopenharmony_ci//
61cb0ef41Sopenharmony_ci//  Copyright (C) 2002-2011, International Business Machines Corporation and others.
71cb0ef41Sopenharmony_ci//  All Rights Reserved.
81cb0ef41Sopenharmony_ci//
91cb0ef41Sopenharmony_ci//  This file contains the RBBIRuleBuilder class implementation.  This is the main class for
101cb0ef41Sopenharmony_ci//    building (compiling) break rules into the tables required by the runtime
111cb0ef41Sopenharmony_ci//    RBBI engine.
121cb0ef41Sopenharmony_ci//
131cb0ef41Sopenharmony_ci
141cb0ef41Sopenharmony_ci#include "unicode/utypes.h"
151cb0ef41Sopenharmony_ci
161cb0ef41Sopenharmony_ci#if !UCONFIG_NO_BREAK_ITERATION
171cb0ef41Sopenharmony_ci
181cb0ef41Sopenharmony_ci#include "unicode/brkiter.h"
191cb0ef41Sopenharmony_ci#include "unicode/rbbi.h"
201cb0ef41Sopenharmony_ci#include "unicode/ubrk.h"
211cb0ef41Sopenharmony_ci#include "unicode/unistr.h"
221cb0ef41Sopenharmony_ci#include "unicode/uniset.h"
231cb0ef41Sopenharmony_ci#include "unicode/uchar.h"
241cb0ef41Sopenharmony_ci#include "unicode/uchriter.h"
251cb0ef41Sopenharmony_ci#include "unicode/ustring.h"
261cb0ef41Sopenharmony_ci#include "unicode/parsepos.h"
271cb0ef41Sopenharmony_ci#include "unicode/parseerr.h"
281cb0ef41Sopenharmony_ci
291cb0ef41Sopenharmony_ci#include "cmemory.h"
301cb0ef41Sopenharmony_ci#include "cstring.h"
311cb0ef41Sopenharmony_ci#include "rbbirb.h"
321cb0ef41Sopenharmony_ci#include "rbbinode.h"
331cb0ef41Sopenharmony_ci#include "rbbiscan.h"
341cb0ef41Sopenharmony_ci#include "rbbisetb.h"
351cb0ef41Sopenharmony_ci#include "rbbitblb.h"
361cb0ef41Sopenharmony_ci#include "rbbidata.h"
371cb0ef41Sopenharmony_ci#include "uassert.h"
381cb0ef41Sopenharmony_ci
391cb0ef41Sopenharmony_ci
401cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN
411cb0ef41Sopenharmony_ci
421cb0ef41Sopenharmony_ci
431cb0ef41Sopenharmony_ci//----------------------------------------------------------------------------------------
441cb0ef41Sopenharmony_ci//
451cb0ef41Sopenharmony_ci//  Constructor.
461cb0ef41Sopenharmony_ci//
471cb0ef41Sopenharmony_ci//----------------------------------------------------------------------------------------
481cb0ef41Sopenharmony_ciRBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString   &rules,
491cb0ef41Sopenharmony_ci                                       UParseError     *parseErr,
501cb0ef41Sopenharmony_ci                                       UErrorCode      &status)
511cb0ef41Sopenharmony_ci : fRules(rules), fStrippedRules(rules)
521cb0ef41Sopenharmony_ci{
531cb0ef41Sopenharmony_ci    fStatus = &status; // status is checked below
541cb0ef41Sopenharmony_ci    fParseError = parseErr;
551cb0ef41Sopenharmony_ci    fDebugEnv   = nullptr;
561cb0ef41Sopenharmony_ci#ifdef RBBI_DEBUG
571cb0ef41Sopenharmony_ci    fDebugEnv   = getenv("U_RBBIDEBUG");
581cb0ef41Sopenharmony_ci#endif
591cb0ef41Sopenharmony_ci
601cb0ef41Sopenharmony_ci
611cb0ef41Sopenharmony_ci    fForwardTree        = nullptr;
621cb0ef41Sopenharmony_ci    fReverseTree        = nullptr;
631cb0ef41Sopenharmony_ci    fSafeFwdTree        = nullptr;
641cb0ef41Sopenharmony_ci    fSafeRevTree        = nullptr;
651cb0ef41Sopenharmony_ci    fDefaultTree        = &fForwardTree;
661cb0ef41Sopenharmony_ci    fForwardTable       = nullptr;
671cb0ef41Sopenharmony_ci    fRuleStatusVals     = nullptr;
681cb0ef41Sopenharmony_ci    fChainRules         = false;
691cb0ef41Sopenharmony_ci    fLookAheadHardBreak = false;
701cb0ef41Sopenharmony_ci    fUSetNodes          = nullptr;
711cb0ef41Sopenharmony_ci    fRuleStatusVals     = nullptr;
721cb0ef41Sopenharmony_ci    fScanner            = nullptr;
731cb0ef41Sopenharmony_ci    fSetBuilder         = nullptr;
741cb0ef41Sopenharmony_ci    if (parseErr) {
751cb0ef41Sopenharmony_ci        uprv_memset(parseErr, 0, sizeof(UParseError));
761cb0ef41Sopenharmony_ci    }
771cb0ef41Sopenharmony_ci
781cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) {
791cb0ef41Sopenharmony_ci        return;
801cb0ef41Sopenharmony_ci    }
811cb0ef41Sopenharmony_ci
821cb0ef41Sopenharmony_ci    fUSetNodes          = new UVector(status); // bcos status gets overwritten here
831cb0ef41Sopenharmony_ci    fRuleStatusVals     = new UVector(status);
841cb0ef41Sopenharmony_ci    fScanner            = new RBBIRuleScanner(this);
851cb0ef41Sopenharmony_ci    fSetBuilder         = new RBBISetBuilder(this);
861cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) {
871cb0ef41Sopenharmony_ci        return;
881cb0ef41Sopenharmony_ci    }
891cb0ef41Sopenharmony_ci    if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
901cb0ef41Sopenharmony_ci        status = U_MEMORY_ALLOCATION_ERROR;
911cb0ef41Sopenharmony_ci    }
921cb0ef41Sopenharmony_ci}
931cb0ef41Sopenharmony_ci
941cb0ef41Sopenharmony_ci
951cb0ef41Sopenharmony_ci
961cb0ef41Sopenharmony_ci//----------------------------------------------------------------------------------------
971cb0ef41Sopenharmony_ci//
981cb0ef41Sopenharmony_ci//  Destructor
991cb0ef41Sopenharmony_ci//
1001cb0ef41Sopenharmony_ci//----------------------------------------------------------------------------------------
1011cb0ef41Sopenharmony_ciRBBIRuleBuilder::~RBBIRuleBuilder() {
1021cb0ef41Sopenharmony_ci
1031cb0ef41Sopenharmony_ci    int        i;
1041cb0ef41Sopenharmony_ci    for (i=0; ; i++) {
1051cb0ef41Sopenharmony_ci        RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i);
1061cb0ef41Sopenharmony_ci        if (n==nullptr) {
1071cb0ef41Sopenharmony_ci            break;
1081cb0ef41Sopenharmony_ci        }
1091cb0ef41Sopenharmony_ci        delete n;
1101cb0ef41Sopenharmony_ci    }
1111cb0ef41Sopenharmony_ci
1121cb0ef41Sopenharmony_ci    delete fUSetNodes;
1131cb0ef41Sopenharmony_ci    delete fSetBuilder;
1141cb0ef41Sopenharmony_ci    delete fForwardTable;
1151cb0ef41Sopenharmony_ci    delete fForwardTree;
1161cb0ef41Sopenharmony_ci    delete fReverseTree;
1171cb0ef41Sopenharmony_ci    delete fSafeFwdTree;
1181cb0ef41Sopenharmony_ci    delete fSafeRevTree;
1191cb0ef41Sopenharmony_ci    delete fScanner;
1201cb0ef41Sopenharmony_ci    delete fRuleStatusVals;
1211cb0ef41Sopenharmony_ci}
1221cb0ef41Sopenharmony_ci
1231cb0ef41Sopenharmony_ci
1241cb0ef41Sopenharmony_ci
1251cb0ef41Sopenharmony_ci
1261cb0ef41Sopenharmony_ci
1271cb0ef41Sopenharmony_ci//----------------------------------------------------------------------------------------
1281cb0ef41Sopenharmony_ci//
1291cb0ef41Sopenharmony_ci//   flattenData() -  Collect up the compiled RBBI rule data and put it into
1301cb0ef41Sopenharmony_ci//                    the format for saving in ICU data files,
1311cb0ef41Sopenharmony_ci//                    which is also the format needed by the RBBI runtime engine.
1321cb0ef41Sopenharmony_ci//
1331cb0ef41Sopenharmony_ci//----------------------------------------------------------------------------------------
1341cb0ef41Sopenharmony_cistatic int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;}
1351cb0ef41Sopenharmony_ci
1361cb0ef41Sopenharmony_ciRBBIDataHeader *RBBIRuleBuilder::flattenData() {
1371cb0ef41Sopenharmony_ci    int32_t    i;
1381cb0ef41Sopenharmony_ci
1391cb0ef41Sopenharmony_ci    if (U_FAILURE(*fStatus)) {
1401cb0ef41Sopenharmony_ci        return nullptr;
1411cb0ef41Sopenharmony_ci    }
1421cb0ef41Sopenharmony_ci
1431cb0ef41Sopenharmony_ci    // Remove whitespace from the rules to make it smaller.
1441cb0ef41Sopenharmony_ci    // The rule parser has already removed comments.
1451cb0ef41Sopenharmony_ci    fStrippedRules = fScanner->stripRules(fStrippedRules);
1461cb0ef41Sopenharmony_ci
1471cb0ef41Sopenharmony_ci    // Calculate the size of each section in the data.
1481cb0ef41Sopenharmony_ci    //   Sizes here are padded up to a multiple of 8 for better memory alignment.
1491cb0ef41Sopenharmony_ci    //   Sections sizes actually stored in the header are for the actual data
1501cb0ef41Sopenharmony_ci    //     without the padding.
1511cb0ef41Sopenharmony_ci    //
1521cb0ef41Sopenharmony_ci    int32_t headerSize        = align8(sizeof(RBBIDataHeader));
1531cb0ef41Sopenharmony_ci    int32_t forwardTableSize  = align8(fForwardTable->getTableSize());
1541cb0ef41Sopenharmony_ci    int32_t reverseTableSize  = align8(fForwardTable->getSafeTableSize());
1551cb0ef41Sopenharmony_ci    int32_t trieSize          = align8(fSetBuilder->getTrieSize());
1561cb0ef41Sopenharmony_ci    int32_t statusTableSize   = align8(fRuleStatusVals->size() * sizeof(int32_t));
1571cb0ef41Sopenharmony_ci
1581cb0ef41Sopenharmony_ci    int32_t rulesLengthInUTF8 = 0;
1591cb0ef41Sopenharmony_ci    u_strToUTF8WithSub(0, 0, &rulesLengthInUTF8,
1601cb0ef41Sopenharmony_ci                       fStrippedRules.getBuffer(), fStrippedRules.length(),
1611cb0ef41Sopenharmony_ci                       0xfffd, nullptr, fStatus);
1621cb0ef41Sopenharmony_ci    *fStatus = U_ZERO_ERROR;
1631cb0ef41Sopenharmony_ci
1641cb0ef41Sopenharmony_ci    int32_t rulesSize         = align8((rulesLengthInUTF8+1));
1651cb0ef41Sopenharmony_ci
1661cb0ef41Sopenharmony_ci    int32_t         totalSize = headerSize
1671cb0ef41Sopenharmony_ci                                + forwardTableSize
1681cb0ef41Sopenharmony_ci                                + reverseTableSize
1691cb0ef41Sopenharmony_ci                                + statusTableSize + trieSize + rulesSize;
1701cb0ef41Sopenharmony_ci
1711cb0ef41Sopenharmony_ci#ifdef RBBI_DEBUG
1721cb0ef41Sopenharmony_ci    if (fDebugEnv && uprv_strstr(fDebugEnv, "size")) {
1731cb0ef41Sopenharmony_ci        RBBIDebugPrintf("Header Size:        %8d\n", headerSize);
1741cb0ef41Sopenharmony_ci        RBBIDebugPrintf("Forward Table Size: %8d\n", forwardTableSize);
1751cb0ef41Sopenharmony_ci        RBBIDebugPrintf("Reverse Table Size: %8d\n", reverseTableSize);
1761cb0ef41Sopenharmony_ci        RBBIDebugPrintf("Trie Size:          %8d\n", trieSize);
1771cb0ef41Sopenharmony_ci        RBBIDebugPrintf("Status Table Size:  %8d\n", statusTableSize);
1781cb0ef41Sopenharmony_ci        RBBIDebugPrintf("Rules Size:         %8d\n", rulesSize);
1791cb0ef41Sopenharmony_ci        RBBIDebugPrintf("-----------------------------\n");
1801cb0ef41Sopenharmony_ci        RBBIDebugPrintf("Total Size:         %8d\n", totalSize);
1811cb0ef41Sopenharmony_ci    }
1821cb0ef41Sopenharmony_ci#endif
1831cb0ef41Sopenharmony_ci
1841cb0ef41Sopenharmony_ci    RBBIDataHeader  *data     = (RBBIDataHeader *)uprv_malloc(totalSize);
1851cb0ef41Sopenharmony_ci    if (data == nullptr) {
1861cb0ef41Sopenharmony_ci        *fStatus = U_MEMORY_ALLOCATION_ERROR;
1871cb0ef41Sopenharmony_ci        return nullptr;
1881cb0ef41Sopenharmony_ci    }
1891cb0ef41Sopenharmony_ci    uprv_memset(data, 0, totalSize);
1901cb0ef41Sopenharmony_ci
1911cb0ef41Sopenharmony_ci
1921cb0ef41Sopenharmony_ci    data->fMagic            = 0xb1a0;
1931cb0ef41Sopenharmony_ci    data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0];
1941cb0ef41Sopenharmony_ci    data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1];
1951cb0ef41Sopenharmony_ci    data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2];
1961cb0ef41Sopenharmony_ci    data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3];
1971cb0ef41Sopenharmony_ci    data->fLength           = totalSize;
1981cb0ef41Sopenharmony_ci    data->fCatCount         = fSetBuilder->getNumCharCategories();
1991cb0ef41Sopenharmony_ci
2001cb0ef41Sopenharmony_ci    data->fFTable        = headerSize;
2011cb0ef41Sopenharmony_ci    data->fFTableLen     = forwardTableSize;
2021cb0ef41Sopenharmony_ci
2031cb0ef41Sopenharmony_ci    data->fRTable        = data->fFTable  + data->fFTableLen;
2041cb0ef41Sopenharmony_ci    data->fRTableLen     = reverseTableSize;
2051cb0ef41Sopenharmony_ci
2061cb0ef41Sopenharmony_ci    data->fTrie          = data->fRTable + data->fRTableLen;
2071cb0ef41Sopenharmony_ci    data->fTrieLen       = trieSize;
2081cb0ef41Sopenharmony_ci    data->fStatusTable   = data->fTrie    + data->fTrieLen;
2091cb0ef41Sopenharmony_ci    data->fStatusTableLen= statusTableSize;
2101cb0ef41Sopenharmony_ci    data->fRuleSource    = data->fStatusTable + statusTableSize;
2111cb0ef41Sopenharmony_ci    data->fRuleSourceLen = rulesLengthInUTF8;
2121cb0ef41Sopenharmony_ci
2131cb0ef41Sopenharmony_ci    uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
2141cb0ef41Sopenharmony_ci
2151cb0ef41Sopenharmony_ci    fForwardTable->exportTable((uint8_t *)data + data->fFTable);
2161cb0ef41Sopenharmony_ci    fForwardTable->exportSafeTable((uint8_t *)data + data->fRTable);
2171cb0ef41Sopenharmony_ci    fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
2181cb0ef41Sopenharmony_ci
2191cb0ef41Sopenharmony_ci    int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
2201cb0ef41Sopenharmony_ci    for (i=0; i<fRuleStatusVals->size(); i++) {
2211cb0ef41Sopenharmony_ci        ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
2221cb0ef41Sopenharmony_ci    }
2231cb0ef41Sopenharmony_ci
2241cb0ef41Sopenharmony_ci    u_strToUTF8WithSub((char *)data+data->fRuleSource, rulesSize, &rulesLengthInUTF8,
2251cb0ef41Sopenharmony_ci                       fStrippedRules.getBuffer(), fStrippedRules.length(),
2261cb0ef41Sopenharmony_ci                       0xfffd, nullptr, fStatus);
2271cb0ef41Sopenharmony_ci    if (U_FAILURE(*fStatus)) {
2281cb0ef41Sopenharmony_ci        return nullptr;
2291cb0ef41Sopenharmony_ci    }
2301cb0ef41Sopenharmony_ci
2311cb0ef41Sopenharmony_ci    return data;
2321cb0ef41Sopenharmony_ci}
2331cb0ef41Sopenharmony_ci
2341cb0ef41Sopenharmony_ci
2351cb0ef41Sopenharmony_ci//----------------------------------------------------------------------------------------
2361cb0ef41Sopenharmony_ci//
2371cb0ef41Sopenharmony_ci//  createRuleBasedBreakIterator    construct from source rules that are passed in
2381cb0ef41Sopenharmony_ci//                                  in a UnicodeString
2391cb0ef41Sopenharmony_ci//
2401cb0ef41Sopenharmony_ci//----------------------------------------------------------------------------------------
2411cb0ef41Sopenharmony_ciBreakIterator *
2421cb0ef41Sopenharmony_ciRBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString    &rules,
2431cb0ef41Sopenharmony_ci                                    UParseError      *parseError,
2441cb0ef41Sopenharmony_ci                                    UErrorCode       &status)
2451cb0ef41Sopenharmony_ci{
2461cb0ef41Sopenharmony_ci    //
2471cb0ef41Sopenharmony_ci    // Read the input rules, generate a parse tree, symbol table,
2481cb0ef41Sopenharmony_ci    // and list of all Unicode Sets referenced by the rules.
2491cb0ef41Sopenharmony_ci    //
2501cb0ef41Sopenharmony_ci    RBBIRuleBuilder  builder(rules, parseError, status);
2511cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) { // status checked here bcos build below doesn't
2521cb0ef41Sopenharmony_ci        return nullptr;
2531cb0ef41Sopenharmony_ci    }
2541cb0ef41Sopenharmony_ci
2551cb0ef41Sopenharmony_ci    RBBIDataHeader *data = builder.build(status);
2561cb0ef41Sopenharmony_ci
2571cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) {
2581cb0ef41Sopenharmony_ci        return nullptr;
2591cb0ef41Sopenharmony_ci    }
2601cb0ef41Sopenharmony_ci
2611cb0ef41Sopenharmony_ci    //
2621cb0ef41Sopenharmony_ci    //  Create a break iterator from the compiled rules.
2631cb0ef41Sopenharmony_ci    //     (Identical to creation from stored pre-compiled rules)
2641cb0ef41Sopenharmony_ci    //
2651cb0ef41Sopenharmony_ci    // status is checked after init in construction.
2661cb0ef41Sopenharmony_ci    RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status);
2671cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) {
2681cb0ef41Sopenharmony_ci        delete This;
2691cb0ef41Sopenharmony_ci        This = nullptr;
2701cb0ef41Sopenharmony_ci    }
2711cb0ef41Sopenharmony_ci    else if(This == nullptr) { // test for nullptr
2721cb0ef41Sopenharmony_ci        status = U_MEMORY_ALLOCATION_ERROR;
2731cb0ef41Sopenharmony_ci    }
2741cb0ef41Sopenharmony_ci    return This;
2751cb0ef41Sopenharmony_ci}
2761cb0ef41Sopenharmony_ci
2771cb0ef41Sopenharmony_ciRBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) {
2781cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) {
2791cb0ef41Sopenharmony_ci        return nullptr;
2801cb0ef41Sopenharmony_ci    }
2811cb0ef41Sopenharmony_ci
2821cb0ef41Sopenharmony_ci    fScanner->parse();
2831cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) {
2841cb0ef41Sopenharmony_ci        return nullptr;
2851cb0ef41Sopenharmony_ci    }
2861cb0ef41Sopenharmony_ci
2871cb0ef41Sopenharmony_ci    //
2881cb0ef41Sopenharmony_ci    // UnicodeSet processing.
2891cb0ef41Sopenharmony_ci    //    Munge the Unicode Sets to create an initial set of character categories.
2901cb0ef41Sopenharmony_ci    //
2911cb0ef41Sopenharmony_ci    fSetBuilder->buildRanges();
2921cb0ef41Sopenharmony_ci
2931cb0ef41Sopenharmony_ci    //
2941cb0ef41Sopenharmony_ci    //   Generate the DFA state transition table.
2951cb0ef41Sopenharmony_ci    //
2961cb0ef41Sopenharmony_ci    fForwardTable = new RBBITableBuilder(this, &fForwardTree, status);
2971cb0ef41Sopenharmony_ci    if (fForwardTable == nullptr) {
2981cb0ef41Sopenharmony_ci        status = U_MEMORY_ALLOCATION_ERROR;
2991cb0ef41Sopenharmony_ci        return nullptr;
3001cb0ef41Sopenharmony_ci    }
3011cb0ef41Sopenharmony_ci
3021cb0ef41Sopenharmony_ci    fForwardTable->buildForwardTable();
3031cb0ef41Sopenharmony_ci
3041cb0ef41Sopenharmony_ci    // State table and character category optimization.
3051cb0ef41Sopenharmony_ci    // Merge equivalent rows and columns.
3061cb0ef41Sopenharmony_ci    // Note that this process alters the initial set of character categories,
3071cb0ef41Sopenharmony_ci    // causing the representation of UnicodeSets in the parse tree to become invalid.
3081cb0ef41Sopenharmony_ci
3091cb0ef41Sopenharmony_ci    optimizeTables();
3101cb0ef41Sopenharmony_ci    fForwardTable->buildSafeReverseTable(status);
3111cb0ef41Sopenharmony_ci
3121cb0ef41Sopenharmony_ci
3131cb0ef41Sopenharmony_ci#ifdef RBBI_DEBUG
3141cb0ef41Sopenharmony_ci    if (fDebugEnv && uprv_strstr(fDebugEnv, "states")) {
3151cb0ef41Sopenharmony_ci        fForwardTable->printStates();
3161cb0ef41Sopenharmony_ci        fForwardTable->printRuleStatusTable();
3171cb0ef41Sopenharmony_ci        fForwardTable->printReverseTable();
3181cb0ef41Sopenharmony_ci    }
3191cb0ef41Sopenharmony_ci#endif
3201cb0ef41Sopenharmony_ci
3211cb0ef41Sopenharmony_ci    //    Generate the mapping tables (TRIE) from input code points to
3221cb0ef41Sopenharmony_ci    //    the character categories.
3231cb0ef41Sopenharmony_ci    //
3241cb0ef41Sopenharmony_ci    fSetBuilder->buildTrie();
3251cb0ef41Sopenharmony_ci
3261cb0ef41Sopenharmony_ci    //
3271cb0ef41Sopenharmony_ci    //   Package up the compiled data into a memory image
3281cb0ef41Sopenharmony_ci    //      in the run-time format.
3291cb0ef41Sopenharmony_ci    //
3301cb0ef41Sopenharmony_ci    RBBIDataHeader *data = flattenData(); // returns nullptr if error
3311cb0ef41Sopenharmony_ci    if (U_FAILURE(status)) {
3321cb0ef41Sopenharmony_ci        return nullptr;
3331cb0ef41Sopenharmony_ci    }
3341cb0ef41Sopenharmony_ci    return data;
3351cb0ef41Sopenharmony_ci}
3361cb0ef41Sopenharmony_ci
3371cb0ef41Sopenharmony_civoid RBBIRuleBuilder::optimizeTables() {
3381cb0ef41Sopenharmony_ci    bool didSomething;
3391cb0ef41Sopenharmony_ci    do {
3401cb0ef41Sopenharmony_ci        didSomething = false;
3411cb0ef41Sopenharmony_ci
3421cb0ef41Sopenharmony_ci        // Begin looking for duplicates with char class 3.
3431cb0ef41Sopenharmony_ci        // Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
3441cb0ef41Sopenharmony_ci        // and should not have other categories merged into them.
3451cb0ef41Sopenharmony_ci        IntPair duplPair = {3, 0};
3461cb0ef41Sopenharmony_ci        while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
3471cb0ef41Sopenharmony_ci            fSetBuilder->mergeCategories(duplPair);
3481cb0ef41Sopenharmony_ci            fForwardTable->removeColumn(duplPair.second);
3491cb0ef41Sopenharmony_ci            didSomething = true;
3501cb0ef41Sopenharmony_ci        }
3511cb0ef41Sopenharmony_ci
3521cb0ef41Sopenharmony_ci        while (fForwardTable->removeDuplicateStates() > 0) {
3531cb0ef41Sopenharmony_ci            didSomething = true;
3541cb0ef41Sopenharmony_ci        }
3551cb0ef41Sopenharmony_ci    } while (didSomething);
3561cb0ef41Sopenharmony_ci}
3571cb0ef41Sopenharmony_ci
3581cb0ef41Sopenharmony_ciU_NAMESPACE_END
3591cb0ef41Sopenharmony_ci
3601cb0ef41Sopenharmony_ci#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
361