12e5b6d6dSopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
32e5b6d6dSopenharmony_ci/*
42e5b6d6dSopenharmony_ci*******************************************************************************
52e5b6d6dSopenharmony_ci*
62e5b6d6dSopenharmony_ci*   Copyright (C) 1999-2014 International Business Machines
72e5b6d6dSopenharmony_ci*   Corporation and others.  All Rights Reserved.
82e5b6d6dSopenharmony_ci*
92e5b6d6dSopenharmony_ci*******************************************************************************
102e5b6d6dSopenharmony_ci*   file name:  rbbidata.h
112e5b6d6dSopenharmony_ci*   encoding:   UTF-8
122e5b6d6dSopenharmony_ci*   tab size:   8 (not used)
132e5b6d6dSopenharmony_ci*   indentation:4
142e5b6d6dSopenharmony_ci*
152e5b6d6dSopenharmony_ci*   RBBI data formats  Includes
162e5b6d6dSopenharmony_ci*
172e5b6d6dSopenharmony_ci*                          Structs that describes the format of the Binary RBBI data,
182e5b6d6dSopenharmony_ci*                          as it is stored in ICU's data file.
192e5b6d6dSopenharmony_ci*
202e5b6d6dSopenharmony_ci*      RBBIDataWrapper  -  Instances of this class sit between the
212e5b6d6dSopenharmony_ci*                          raw data structs and the RulesBasedBreakIterator objects
222e5b6d6dSopenharmony_ci*                          that are created by applications.  The wrapper class
232e5b6d6dSopenharmony_ci*                          provides reference counting for the underlying data,
242e5b6d6dSopenharmony_ci*                          and direct pointers to data that would not otherwise
252e5b6d6dSopenharmony_ci*                          be accessible without ugly pointer arithmetic.  The
262e5b6d6dSopenharmony_ci*                          wrapper does not attempt to provide any higher level
272e5b6d6dSopenharmony_ci*                          abstractions for the data itself.
282e5b6d6dSopenharmony_ci*
292e5b6d6dSopenharmony_ci*                          There will be only one instance of RBBIDataWrapper for any
302e5b6d6dSopenharmony_ci*                          set of RBBI run time data being shared by instances
312e5b6d6dSopenharmony_ci*                          (clones) of RulesBasedBreakIterator.
322e5b6d6dSopenharmony_ci*/
332e5b6d6dSopenharmony_ci
342e5b6d6dSopenharmony_ci#ifndef __RBBIDATA_H__
352e5b6d6dSopenharmony_ci#define __RBBIDATA_H__
362e5b6d6dSopenharmony_ci
372e5b6d6dSopenharmony_ci#include "unicode/utypes.h"
382e5b6d6dSopenharmony_ci#include "unicode/udata.h"
392e5b6d6dSopenharmony_ci#include "udataswp.h"
402e5b6d6dSopenharmony_ci
412e5b6d6dSopenharmony_ci/**
422e5b6d6dSopenharmony_ci * Swap RBBI data. See udataswp.h.
432e5b6d6dSopenharmony_ci * @internal
442e5b6d6dSopenharmony_ci */
452e5b6d6dSopenharmony_ciU_CAPI int32_t U_EXPORT2
462e5b6d6dSopenharmony_ciubrk_swap(const UDataSwapper *ds,
472e5b6d6dSopenharmony_ci          const void *inData, int32_t length, void *outData,
482e5b6d6dSopenharmony_ci          UErrorCode *pErrorCode);
492e5b6d6dSopenharmony_ci
502e5b6d6dSopenharmony_ci#ifdef __cplusplus
512e5b6d6dSopenharmony_ci
522e5b6d6dSopenharmony_ci#include "unicode/ucptrie.h"
532e5b6d6dSopenharmony_ci#include "unicode/uobject.h"
542e5b6d6dSopenharmony_ci#include "unicode/unistr.h"
552e5b6d6dSopenharmony_ci#include "unicode/uversion.h"
562e5b6d6dSopenharmony_ci#include "umutex.h"
572e5b6d6dSopenharmony_ci
582e5b6d6dSopenharmony_ci
592e5b6d6dSopenharmony_ciU_NAMESPACE_BEGIN
602e5b6d6dSopenharmony_ci
612e5b6d6dSopenharmony_ci// The current RBBI data format version.
622e5b6d6dSopenharmony_cistatic const uint8_t RBBI_DATA_FORMAT_VERSION[] = {6, 0, 0, 0};
632e5b6d6dSopenharmony_ci
642e5b6d6dSopenharmony_ci/*
652e5b6d6dSopenharmony_ci *   The following structs map exactly onto the raw data from ICU common data file.
662e5b6d6dSopenharmony_ci */
672e5b6d6dSopenharmony_cistruct RBBIDataHeader {
682e5b6d6dSopenharmony_ci    uint32_t         fMagic;           /*  == 0xbla0                                               */
692e5b6d6dSopenharmony_ci    UVersionInfo     fFormatVersion;   /* Data Format.  Same as the value in struct UDataInfo      */
702e5b6d6dSopenharmony_ci                                       /*   if there is one associated with this data.             */
712e5b6d6dSopenharmony_ci                                       /*     (version originates in rbbi, is copied to UDataInfo) */
722e5b6d6dSopenharmony_ci    uint32_t         fLength;          /*  Total length in bytes of this RBBI Data,                */
732e5b6d6dSopenharmony_ci                                       /*      including all sections, not just the header.        */
742e5b6d6dSopenharmony_ci    uint32_t         fCatCount;        /*  Number of character categories.                         */
752e5b6d6dSopenharmony_ci
762e5b6d6dSopenharmony_ci    /*                                                                        */
772e5b6d6dSopenharmony_ci    /*  Offsets and sizes of each of the subsections within the RBBI data.    */
782e5b6d6dSopenharmony_ci    /*  All offsets are bytes from the start of the RBBIDataHeader.           */
792e5b6d6dSopenharmony_ci    /*  All sizes are in bytes.                                               */
802e5b6d6dSopenharmony_ci    /*                                                                        */
812e5b6d6dSopenharmony_ci    uint32_t         fFTable;         /*  forward state transition table. */
822e5b6d6dSopenharmony_ci    uint32_t         fFTableLen;
832e5b6d6dSopenharmony_ci    uint32_t         fRTable;         /*  Offset to the reverse state transition table. */
842e5b6d6dSopenharmony_ci    uint32_t         fRTableLen;
852e5b6d6dSopenharmony_ci    uint32_t         fTrie;           /*  Offset to Trie data for character categories */
862e5b6d6dSopenharmony_ci    uint32_t         fTrieLen;
872e5b6d6dSopenharmony_ci    uint32_t         fRuleSource;     /*  Offset to the source for for the break */
882e5b6d6dSopenharmony_ci    uint32_t         fRuleSourceLen;  /*    rules.  Stored UChar *. */
892e5b6d6dSopenharmony_ci    uint32_t         fStatusTable;    /* Offset to the table of rule status values */
902e5b6d6dSopenharmony_ci    uint32_t         fStatusTableLen;
912e5b6d6dSopenharmony_ci
922e5b6d6dSopenharmony_ci    uint32_t         fReserved[6];    /*  Reserved for expansion */
932e5b6d6dSopenharmony_ci
942e5b6d6dSopenharmony_ci};
952e5b6d6dSopenharmony_ci
962e5b6d6dSopenharmony_ci
972e5b6d6dSopenharmony_ci
982e5b6d6dSopenharmony_citemplate <typename T>
992e5b6d6dSopenharmony_cistruct RBBIStateTableRowT {
1002e5b6d6dSopenharmony_ci    T               fAccepting;    //  Non-zero if this row is for an accepting state.
1012e5b6d6dSopenharmony_ci                                   //  Value 0: not an accepting state.
1022e5b6d6dSopenharmony_ci                                   //        1: (ACCEPTING_UNCONDITIONAL) Unconditional Accepting state.
1032e5b6d6dSopenharmony_ci                                   //       >1: Look-ahead match has completed.
1042e5b6d6dSopenharmony_ci                                   //           Actual boundary position happened earlier.
1052e5b6d6dSopenharmony_ci                                   //           Value here == fLookAhead in earlier
1062e5b6d6dSopenharmony_ci                                   //           state, at actual boundary pos.
1072e5b6d6dSopenharmony_ci    T               fLookAhead;    //  Non-zero if this row is for a state that
1082e5b6d6dSopenharmony_ci                                   //    corresponds to a '/' in the rule source.
1092e5b6d6dSopenharmony_ci                                   //    Value is the same as the fAccepting
1102e5b6d6dSopenharmony_ci                                   //    value for the rule (which will appear
1112e5b6d6dSopenharmony_ci                                   //    in a different state.
1122e5b6d6dSopenharmony_ci    T               fTagsIdx;      //  Non-zero if this row covers a {tagged} position
1132e5b6d6dSopenharmony_ci                                   //    from a rule.  Value is the index in the
1142e5b6d6dSopenharmony_ci                                   //    StatusTable of the set of matching
1152e5b6d6dSopenharmony_ci                                   //    tags (rule status values)
1162e5b6d6dSopenharmony_ci    T               fNextState[1]; //  Next State, indexed by char category.
1172e5b6d6dSopenharmony_ci                                   //    Variable-length array declared with length 1
1182e5b6d6dSopenharmony_ci                                   //    to disable bounds checkers.
1192e5b6d6dSopenharmony_ci                                   //    Array Size is actually fData->fHeader->fCatCount
1202e5b6d6dSopenharmony_ci                                   //    CAUTION:  see RBBITableBuilder::getTableSize()
1212e5b6d6dSopenharmony_ci                                   //              before changing anything here.
1222e5b6d6dSopenharmony_ci};
1232e5b6d6dSopenharmony_ci
1242e5b6d6dSopenharmony_citypedef RBBIStateTableRowT<uint8_t> RBBIStateTableRow8;
1252e5b6d6dSopenharmony_citypedef RBBIStateTableRowT<uint16_t> RBBIStateTableRow16;
1262e5b6d6dSopenharmony_ci
1272e5b6d6dSopenharmony_ciconstexpr uint16_t ACCEPTING_UNCONDITIONAL = 1;   // Value constant for RBBIStateTableRow::fAccepting
1282e5b6d6dSopenharmony_ci
1292e5b6d6dSopenharmony_ciunion RBBIStateTableRow {
1302e5b6d6dSopenharmony_ci  RBBIStateTableRow16 r16;
1312e5b6d6dSopenharmony_ci  RBBIStateTableRow8 r8;
1322e5b6d6dSopenharmony_ci};
1332e5b6d6dSopenharmony_ci
1342e5b6d6dSopenharmony_cistruct RBBIStateTable {
1352e5b6d6dSopenharmony_ci    uint32_t         fNumStates;            // Number of states.
1362e5b6d6dSopenharmony_ci    uint32_t         fRowLen;               // Length of a state table row, in bytes.
1372e5b6d6dSopenharmony_ci    uint32_t         fDictCategoriesStart;  // Char category number of the first dictionary
1382e5b6d6dSopenharmony_ci                                            //   char class, or the the largest category number + 1
1392e5b6d6dSopenharmony_ci                                            //   if there are no dictionary categories.
1402e5b6d6dSopenharmony_ci    uint32_t         fLookAheadResultsSize; // Size of run-time array required for holding
1412e5b6d6dSopenharmony_ci                                            //   look-ahead results. Indexed by row.fLookAhead.
1422e5b6d6dSopenharmony_ci    uint32_t         fFlags;                // Option Flags for this state table.
1432e5b6d6dSopenharmony_ci    char             fTableData[1];         // First RBBIStateTableRow begins here.
1442e5b6d6dSopenharmony_ci                                            //   Variable-length array declared with length 1
1452e5b6d6dSopenharmony_ci                                            //   to disable bounds checkers.
1462e5b6d6dSopenharmony_ci                                            //   (making it char[] simplifies ugly address
1472e5b6d6dSopenharmony_ci                                            //   arithmetic for indexing variable length rows.)
1482e5b6d6dSopenharmony_ci};
1492e5b6d6dSopenharmony_ci
1502e5b6d6dSopenharmony_ciconstexpr uint32_t RBBI_LOOKAHEAD_HARD_BREAK = 1;
1512e5b6d6dSopenharmony_ciconstexpr uint32_t RBBI_BOF_REQUIRED = 2;
1522e5b6d6dSopenharmony_ciconstexpr uint32_t RBBI_8BITS_ROWS = 4;
1532e5b6d6dSopenharmony_ci
1542e5b6d6dSopenharmony_ci
1552e5b6d6dSopenharmony_ci/*                                        */
1562e5b6d6dSopenharmony_ci/*   The reference counting wrapper class */
1572e5b6d6dSopenharmony_ci/*                                        */
1582e5b6d6dSopenharmony_ciclass RBBIDataWrapper : public UMemory {
1592e5b6d6dSopenharmony_cipublic:
1602e5b6d6dSopenharmony_ci    enum EDontAdopt {
1612e5b6d6dSopenharmony_ci        kDontAdopt
1622e5b6d6dSopenharmony_ci    };
1632e5b6d6dSopenharmony_ci    RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
1642e5b6d6dSopenharmony_ci    RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
1652e5b6d6dSopenharmony_ci    RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
1662e5b6d6dSopenharmony_ci    ~RBBIDataWrapper();
1672e5b6d6dSopenharmony_ci
1682e5b6d6dSopenharmony_ci    static UBool          isDataVersionAcceptable(const UVersionInfo version);
1692e5b6d6dSopenharmony_ci
1702e5b6d6dSopenharmony_ci    void                  init0();
1712e5b6d6dSopenharmony_ci    void                  init(const RBBIDataHeader *data, UErrorCode &status);
1722e5b6d6dSopenharmony_ci    RBBIDataWrapper      *addReference();
1732e5b6d6dSopenharmony_ci    void                  removeReference();
1742e5b6d6dSopenharmony_ci    bool                  operator ==(const RBBIDataWrapper &other) const;
1752e5b6d6dSopenharmony_ci    int32_t               hashCode();
1762e5b6d6dSopenharmony_ci    const UnicodeString  &getRuleSourceString() const;
1772e5b6d6dSopenharmony_ci    void                  printData();
1782e5b6d6dSopenharmony_ci    void                  printTable(const char *heading, const RBBIStateTable *table);
1792e5b6d6dSopenharmony_ci
1802e5b6d6dSopenharmony_ci    /*                                     */
1812e5b6d6dSopenharmony_ci    /*   Pointers to items within the data */
1822e5b6d6dSopenharmony_ci    /*                                     */
1832e5b6d6dSopenharmony_ci    const RBBIDataHeader     *fHeader;
1842e5b6d6dSopenharmony_ci    const RBBIStateTable     *fForwardTable;
1852e5b6d6dSopenharmony_ci    const RBBIStateTable     *fReverseTable;
1862e5b6d6dSopenharmony_ci    const char               *fRuleSource;
1872e5b6d6dSopenharmony_ci    const int32_t            *fRuleStatusTable;
1882e5b6d6dSopenharmony_ci
1892e5b6d6dSopenharmony_ci    /* number of int32_t values in the rule status table.   Used to sanity check indexing */
1902e5b6d6dSopenharmony_ci    int32_t             fStatusMaxIdx;
1912e5b6d6dSopenharmony_ci
1922e5b6d6dSopenharmony_ci    UCPTrie             *fTrie;
1932e5b6d6dSopenharmony_ci
1942e5b6d6dSopenharmony_ciprivate:
1952e5b6d6dSopenharmony_ci    u_atomic_int32_t    fRefCount;
1962e5b6d6dSopenharmony_ci    UDataMemory        *fUDataMem;
1972e5b6d6dSopenharmony_ci    UnicodeString       fRuleString;
1982e5b6d6dSopenharmony_ci    UBool               fDontFreeData;
1992e5b6d6dSopenharmony_ci
2002e5b6d6dSopenharmony_ci    RBBIDataWrapper(const RBBIDataWrapper &other) = delete; /*  forbid copying of this class */
2012e5b6d6dSopenharmony_ci    RBBIDataWrapper &operator=(const RBBIDataWrapper &other) = delete; /*  forbid copying of this class */
2022e5b6d6dSopenharmony_ci};
2032e5b6d6dSopenharmony_ci
2042e5b6d6dSopenharmony_ci
2052e5b6d6dSopenharmony_ci
2062e5b6d6dSopenharmony_ciU_NAMESPACE_END
2072e5b6d6dSopenharmony_ci
2082e5b6d6dSopenharmony_ciU_CFUNC UBool rbbi_cleanup(void);
2092e5b6d6dSopenharmony_ci
2102e5b6d6dSopenharmony_ci#endif /* C++ */
2112e5b6d6dSopenharmony_ci
2122e5b6d6dSopenharmony_ci#endif
213