1/**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef LIBPANDABASE_UTILS_BIT_TABLE_H
17#define LIBPANDABASE_UTILS_BIT_TABLE_H
18
19#include "utils/arena_containers.h"
20#include "utils/bit_memory_region.h"
21#include "utils/bit_memory_stream.h"
22#include "utils/bit_vector.h"
23#include "utils/hash.h"
24#include <iomanip>
25#include "securec.h"
26
27namespace panda {
28
29class VarintPack {
30public:
31    static constexpr size_t INLINE_BITS = 4;
32    static constexpr size_t INLINE_MAX = 11;
33
34    template <size_t N>
35    static std::array<uint32_t, N> Read(BitMemoryStreamIn *stream)
36    {
37        static_assert(sizeof(uint64_t) * BITS_PER_BYTE >= N * INLINE_BITS);
38        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
39        std::array<uint32_t, N> values;
40        auto data = stream->Read<uint64_t>(N * INLINE_BITS);
41        for (size_t i = 0; i < N; i++) {
42            values[i] = ExtractBits(data, i * INLINE_BITS, INLINE_BITS);
43        }
44        for (size_t i = 0; i < N; i++) {
45            if (UNLIKELY(values[i] > INLINE_MAX)) {
46                values[i] = stream->Read<uint32_t>((values[i] - INLINE_MAX) * BITS_PER_BYTE);
47            }
48        }
49        return values;
50    }
51
52    template <size_t N, typename Container>
53    static void Write(BitMemoryStreamOut<Container> &stream, const std::array<uint32_t, N> &data)
54    {
55        for (auto value : data) {
56            if (value > INLINE_MAX) {
57                stream.Write(INLINE_MAX + BitsToBytesRoundUp(MinimumBitsToStore(value)), INLINE_BITS);
58            } else {
59                stream.Write(value, INLINE_BITS);
60            }
61        }
62        for (auto value : data) {
63            if (value > INLINE_MAX) {
64                stream.Write(value, BitsToBytesRoundUp(MinimumBitsToStore(value)) * BITS_PER_BYTE);
65            }
66        }
67    }
68};
69
70template <typename>
71class BitTable;
72
73template <size_t NumColumns, typename Accessor, bool reverse_iteration = false>
74class BitTableRow : public std::iterator<std::random_access_iterator_tag, Accessor, int32_t, Accessor *, Accessor &> {
75    using BitTableType = BitTable<Accessor>;
76
77public:
78    static constexpr size_t NUM_COLUMNS = NumColumns;
79    static constexpr uint32_t NO_VALUE = BitTableType::NO_VALUE;
80
81    using Reversed = BitTableRow<NumColumns, Accessor, !reverse_iteration>;
82
83    BitTableRow() = default;
84    BitTableRow(const BitTableType *table, int row_index) : table_(table), row_index_(row_index) {}
85    explicit BitTableRow(Reversed *rhs) : table_(rhs->table_), row_index_(rhs->row_index_) {}
86
87    uint32_t GetRow() const
88    {
89        return row_index_;
90    }
91
92    std::string GetColumnStr(size_t column) const
93    {
94        if (Get(column) == NO_VALUE) {
95            return "-";
96        }
97        return std::to_string(Get(column));
98    }
99
100    Accessor GetAccessor()
101    {
102        return Accessor(this);
103    }
104
105    uint32_t Get(size_t index) const
106    {
107        return table_->ReadColumn(row_index_, index);
108    }
109
110    bool Has(size_t index) const
111    {
112        return Get(index) != BitTableType::NO_VALUE;
113    }
114
115    size_t ColumnsCount() const
116    {
117        return table_->GetColumnsCount();
118    }
119
120    bool IsValid() const
121    {
122        return row_index_ != -1;
123    }
124
125    bool operator==(const BitTableRow &rhs) const
126    {
127        return table_ == rhs.table_ && row_index_ == rhs.row_index_;
128    }
129
130    bool operator!=(const BitTableRow &rhs) const
131    {
132        return !(operator==(rhs));
133    }
134
135    ~BitTableRow() = default;
136
137    DEFAULT_COPY_SEMANTIC(BitTableRow);
138    DEFAULT_NOEXCEPT_MOVE_SEMANTIC(BitTableRow);
139
140protected:
141    auto GetTable() const
142    {
143        return table_;
144    }
145
146private:
147    const BitTableType *table_ {nullptr};
148    int row_index_ {-1};
149
150    friend Reversed;
151    template <typename, bool>
152    friend class BitTableIterator;
153};
154
155template <typename Accessor, bool reverse_iteration = false>
156class BitTableIterator
157    : public std::iterator<std::random_access_iterator_tag, Accessor, int32_t, Accessor *, Accessor &> {
158public:
159    BitTableIterator(const typename Accessor::BitTableType *table, int row_index) : row_(table, row_index) {}
160    explicit BitTableIterator(Accessor &row) : row_(row) {}
161
162    BitTableIterator &operator++()
163    {
164        if constexpr (reverse_iteration) {  // NOLINT
165            row_.row_index_--;
166        } else {  // NOLINT
167            row_.row_index_++;
168        }
169        return *this;
170    }
171
172    BitTableIterator &operator--()
173    {
174        if constexpr (reverse_iteration) {  // NOLINT
175            row_.row_index_++;
176        } else {  // NOLINT
177            row_.row_index_--;
178        }
179        return *this;
180    }
181
182    explicit operator bool() const
183    {
184        return row_.row_index_ >= 0 && helpers::ToUnsigned(row_.row_index_) < row_.table_->GetRowsCount();
185    }
186
187    bool operator==(const BitTableIterator &rhs) const
188    {
189        return row_ == rhs.row_;
190    }
191
192    bool operator!=(const BitTableIterator &rhs) const
193    {
194        return row_ != rhs.row_;
195    }
196
197    auto operator+(int32_t n) const
198    {
199        if constexpr (reverse_iteration) {  // NOLINT
200            return BitTableIterator(row_.table_, row_.row_index_ - n);
201        } else {  // NOLINT
202            return BitTableIterator(row_.table_, row_.row_index_ + n);
203        }
204    }
205    auto operator-(int32_t n) const
206    {
207        if constexpr (reverse_iteration) {  // NOLINT
208            return BitTableIterator(row_.table_, row_.row_index_ + n);
209        } else {  // NOLINT
210            return BitTableIterator(row_.table_, row_.row_index_ - n);
211        }
212    }
213    int32_t operator-(const BitTableIterator &rhs) const
214    {
215        if constexpr (reverse_iteration) {  // NOLINT
216            return rhs.row_.row_index_ - row_.row_index_;
217        } else {  // NOLINT
218            return row_.row_index_ - rhs.row_.row_index_;
219        }
220    }
221
222    auto &operator+=(int32_t n)
223    {
224        if constexpr (reverse_iteration) {  // NOLINT
225            row_.row_index_ -= n;
226        } else {  // NOLINT
227            row_.row_index_ += n;
228        }
229        return *this;
230    }
231
232    Accessor &operator*()
233    {
234        return row_;
235    }
236    Accessor *operator->()
237    {
238        return &row_;
239    }
240
241    ~BitTableIterator() = default;
242
243    DEFAULT_COPY_SEMANTIC(BitTableIterator);
244    DEFAULT_NOEXCEPT_MOVE_SEMANTIC(BitTableIterator);
245
246private:
247    Accessor row_;
248};
249
250template <size_t NumColumns>
251struct BitTableDefault : public BitTableRow<NumColumns, BitTableDefault<NumColumns>> {
252    using Base = BitTableRow<NumColumns, BitTableDefault<NumColumns>>;
253    using Base::Base;
254};
255
256// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
257#define BIT_TABLE_HEADER(columns, name)            \
258    using Base = BitTableRow<columns, name>;       \
259    using BitTableRow<columns, name>::BitTableRow; \
260    template <int Column, int Dummy>               \
261    struct ColumnName;                             \
262    static constexpr const char *TABLE_NAME = #name;
263
264// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
265#define BIT_TABLE_COLUMN(index, name, upname)                                \
266    static constexpr size_t COLUMN_##upname = index;                         \
267    static constexpr const char COLUMN_NAME_##upname[] = #name; /* NOLINT */ \
268    template <int Dummy>                                                     \
269    struct ColumnName<index, Dummy> {                                        \
270        static constexpr const char VALUE[] = #name; /* NOLINT */            \
271    };                                                                       \
272    uint32_t Get##name() const                                               \
273    {                                                                        \
274        return Get(index);                                                   \
275    }                                                                        \
276    bool Has##name() const                                                   \
277    {                                                                        \
278        return Get(index) != NO_VALUE;                                       \
279    }
280
281template <typename Accessor, size_t... Columns>
282// NOLINTNEXTLINE(readability-named-parameter)
283static const char *const *GetBitTableColumnNamesImpl(std::index_sequence<Columns...>)
284{
285    static const std::array<const char *, sizeof...(Columns)> names = {
286        Accessor::template ColumnName<Columns, 0>::VALUE...};
287    return names.data();
288}
289
290template <typename AccessorT>
291// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
292class BitTable {
293public:
294    static constexpr uint32_t NO_VALUE = -1;
295    static constexpr uint32_t NO_VALUE_DIFF = -1;
296
297    static constexpr size_t NUM_COLUMNS = AccessorT::NUM_COLUMNS;
298
299    using Accessor = AccessorT;
300    using RegionType = BitMemoryRegion<const uint8_t>;
301
302public:
303    template <bool Reversed>
304    class Range {
305    public:
306        using RangeIterator =
307            std::conditional_t<Reversed, BitTableIterator<Accessor, true>, BitTableIterator<Accessor, false>>;
308
309        Range(BitTable *table, int start, int end) : table_(table), start_(start), end_(end) {}
310        auto begin()
311        {
312            return RangeIterator(table_, start_);
313        }
314        auto end()
315        {
316            return RangeIterator(table_, end_);
317        }
318        Accessor operator[](size_t index)
319        {
320            return *(RangeIterator(table_, start_) + index);
321        }
322
323    private:
324        BitTable *table_;
325        int start_;
326        int end_;
327    };
328
329    auto begin() const
330    {
331        return BitTableIterator<Accessor, false>(this, 0);
332    }
333
334    auto begin()
335    {
336        return BitTableIterator<Accessor, false>(this, 0);
337    }
338
339    auto end() const
340    {
341        return BitTableIterator<Accessor, false>(this, GetRowsCount());
342    }
343
344    auto end()
345    {
346        return BitTableIterator<Accessor, false>(this, GetRowsCount());
347    }
348
349    auto GetRange(int start, int end)
350    {
351        return Range<false>(this, start, end);
352    }
353    auto GetRangeReversed(int start, int end)
354    {
355        return Range<true>(this, end - 1, start - 1);
356    }
357    auto GetRangeReversed()
358    {
359        if (GetRowsCount() == 0) {
360            return Range<true>(this, -1, -1);
361        }
362        return Range<true>(this, GetRowsCount() - 1, -1);
363    }
364
365    constexpr size_t GetColumnsCount() const
366    {
367        return NUM_COLUMNS;
368    }
369
370    size_t GetRowsCount() const
371    {
372        return rows_count_;
373    }
374
375    size_t GetRowSizeInBits() const
376    {
377        return columns_offsets_[NUM_COLUMNS];
378    }
379
380    size_t GetColumnWidth(size_t index) const
381    {
382        return columns_offsets_[index + 1] - columns_offsets_[index];
383    }
384
385    uint32_t ReadColumn(size_t row_index, size_t column) const
386    {
387        ASSERT(column < GetColumnsCount());
388        return region_.Read(row_index * GetRowSizeInBits() + columns_offsets_[column], GetColumnWidth(column)) +
389               NO_VALUE_DIFF;
390    }
391
392    auto GetRow(size_t index)
393    {
394        ASSERT(index < GetRowsCount());
395        return Accessor(this, index);
396    }
397
398    auto GetRow(size_t index) const
399    {
400        ASSERT(index < GetRowsCount());
401        return Accessor(this, index);
402    }
403
404    auto GetInvalidRow() const
405    {
406        return Accessor(this, -1);
407    }
408
409    RegionType GetBitMemoryRegion(uint32_t row) const
410    {
411        if (row == NO_VALUE) {
412            return RegionType();
413        }
414        size_t offset = row * GetRowSizeInBits() + columns_offsets_[0];
415        return region_.Subregion(offset, GetColumnWidth(0));
416    }
417
418    void Decode(BitMemoryStreamIn *stream)
419    {
420        auto columns = VarintPack::Read<NUM_COLUMNS + 1>(stream);
421        rows_count_ = columns[NUM_COLUMNS];
422
423        // Calculate offsets
424        columns_offsets_[0] = 0;
425        for (size_t i = 0; i < NUM_COLUMNS; i++) {
426            columns_offsets_[i + 1] = columns_offsets_[i] + columns[i];
427        }
428
429        region_ = stream->ReadRegion(GetRowsCount() * GetRowSizeInBits());
430    }
431
432    void Dump(std::ostream &out = std::cout) const
433    {
434        out << "BitTable: " << Accessor::TABLE_NAME << ", rows=" << GetRowsCount()
435            << ", row_size=" << GetRowSizeInBits() << std::endl;
436        static constexpr size_t INDENT = 4;
437        std::array<uint32_t, NUM_COLUMNS> width {};
438        out << "    ";
439        for (size_t i = 0; i < NUM_COLUMNS; i++) {
440            out << GetColumnName(i) << " ";
441            width[i] = strlen(GetColumnName(i)) + 1;
442        }
443        out << std::endl;
444        int index = 0;
445        for (auto row : *this) {
446            static constexpr size_t INDEX_WIDTH = 2;
447            out << std::right << std::setw(INDEX_WIDTH) << index++ << ": ";
448            for (size_t i = 0; i < NUM_COLUMNS; i++) {
449                std::string indent(INDENT, ' ');
450                out << std::left << std::setw(width[i]);
451                out << row.GetColumnStr(i);
452            }
453            out << std::endl;
454        }
455    }
456
457    const char *GetColumnName(size_t index) const
458    {
459        // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
460        return GetBitTableColumnNamesImpl<Accessor>(std::make_index_sequence<NUM_COLUMNS>())[index];
461    }
462
463private:
464    BitMemoryRegion<const uint8_t> region_;
465    std::array<uint16_t, NUM_COLUMNS + 1> columns_offsets_ {};
466    uint16_t rows_count_ {0};
467};
468
469template <typename Accessor>
470class BitTableBuilder {
471public:
472    static constexpr size_t NUM_COLUMNS = Accessor::NUM_COLUMNS;
473    static constexpr uint32_t NO_VALUE = BitTable<Accessor>::NO_VALUE;
474    static constexpr uint32_t NO_VALUE_DIFF = BitTable<Accessor>::NO_VALUE_DIFF;
475
476    class Entry {
477    public:
478        Entry()
479        {
480            std::fill(data_.begin(), data_.end(), NO_VALUE);
481        }
482
483        Entry(std::initializer_list<uint32_t> data)
484        {
485            std::copy(data.begin(), data.end(), data_.begin());
486        }
487
488        uint32_t operator[](size_t index) const
489        {
490            return data_[index];
491        }
492
493        uint32_t &operator[](size_t index)
494        {
495            return data_[index];
496        }
497
498        bool operator==(const Entry &rhs) const
499        {
500            return data_ == rhs.data_;
501        }
502
503        void SetColumn(size_t index, uint32_t value)
504        {
505            data_[index] = value;
506        }
507
508        template <typename... Args>
509        void SetColumns(Args... values)
510        {
511            static_assert(sizeof...(Args) == NUM_COLUMNS);
512            int i = 0;
513            (SetColumn(i++, values), ...);
514        }
515
516        ~Entry() = default;
517
518        DEFAULT_COPY_SEMANTIC(Entry);
519        DEFAULT_NOEXCEPT_MOVE_SEMANTIC(Entry);
520
521    private:
522        std::array<uint32_t, NUM_COLUMNS> data_;
523    };
524
525public:
526    explicit BitTableBuilder(ArenaAllocator *allocator)
527        : entries_(allocator->Adapter()), dedup_map_(allocator->Adapter())
528    {
529    }
530
531    size_t GetRowsCount() const
532    {
533        return entries_.size();
534    }
535
536    const Entry &GetLast() const
537    {
538        return entries_.back();
539    }
540
541    const Entry &GetEntry(size_t index) const
542    {
543        return entries_[index];
544    }
545
546    size_t GetSize() const
547    {
548        return entries_.size();
549    }
550
551    ALWAYS_INLINE void Emplace(Entry entry)
552    {
553        entries_.emplace_back(entry);
554    }
555
556    ALWAYS_INLINE size_t Add(Entry entry)
557    {
558        return AddArray(Span(&entry, 1));
559    }
560
561    size_t AddArray(Span<Entry> entries)
562    {
563        uint32_t hash = FnvHash(entries.template SubSpan<uint32_t>(0, entries.size() * NUM_COLUMNS));
564        auto range = dedup_map_.equal_range(hash);
565        for (auto it = range.first; it != range.second; ++it) {
566            uint32_t row = it->second;
567            if (entries.size() + row <= entries_.size() &&
568                std::equal(entries.begin(), entries.end(), entries_.begin() + row)) {
569                return row;
570            }
571        }
572        uint32_t row = GetRowsCount();
573        entries_.insert(entries_.end(), entries.begin(), entries.end());
574        dedup_map_.emplace(hash, row);
575        return row;
576    }
577
578    void CalculateColumnsWidth(Span<uint32_t> columns_width)
579    {
580        std::fill(columns_width.begin(), columns_width.end(), 0);
581        for (const auto &entry : entries_) {
582            for (size_t i = 0; i < NUM_COLUMNS; i++) {
583                columns_width[i] |= entry[i] - NO_VALUE_DIFF;
584            }
585        }
586        for (auto &value : columns_width) {
587            value = MinimumBitsToStore(value);
588        }
589    }
590
591    template <typename Container>
592    void Encode(BitMemoryStreamOut<Container> &stream)
593    {
594        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
595        std::array<uint32_t, NUM_COLUMNS + 1> columns_width;
596        CalculateColumnsWidth(Span(columns_width.data(), NUM_COLUMNS));
597        columns_width[NUM_COLUMNS] = entries_.size();
598        VarintPack::Write(stream, columns_width);
599
600        for (const auto &entry : entries_) {
601            for (size_t i = 0; i < NUM_COLUMNS; i++) {
602                stream.Write(entry[i] - NO_VALUE_DIFF, columns_width[i]);
603            }
604        }
605    }
606
607    virtual ~BitTableBuilder() = default;
608
609    NO_COPY_SEMANTIC(BitTableBuilder);
610    NO_MOVE_SEMANTIC(BitTableBuilder);
611
612private:
613    ArenaDeque<Entry> entries_;
614    ArenaUnorderedMultiMap<uint32_t, uint32_t> dedup_map_;
615};
616
617class BitmapTableBuilder {
618public:
619    using Entry = std::pair<uint32_t *, uint32_t>;
620
621    explicit BitmapTableBuilder(ArenaAllocator *allocator)
622        : allocator_(allocator), rows_(allocator->Adapter()), dedup_map_(allocator->Adapter())
623    {
624    }
625
626    size_t GetRowsCount() const
627    {
628        return rows_.size();
629    }
630
631    auto GetEntry(size_t index) const
632    {
633        return BitMemoryRegion<uint32_t>(rows_[index].first, rows_[index].second);
634    }
635
636    size_t Add(BitVectorSpan vec)
637    {
638        if (vec.empty()) {
639            return BitTableDefault<1>::NO_VALUE;
640        }
641        uint32_t hash = FnvHash(vec.GetContainerDataSpan());
642        auto range = dedup_map_.equal_range(hash);
643        for (auto it = range.first; it != range.second; ++it) {
644            auto &row = rows_[it->second];
645            if (BitVectorSpan(row.first, row.second) == vec) {
646                return it->second;
647            }
648        }
649        size_t vec_size_in_bytes = BitsToBytesRoundUp(vec.size());
650        size_t data_size_in_bytes = RoundUp(vec_size_in_bytes, sizeof(uint32_t));
651        auto data = allocator_->AllocArray<uint32_t>(data_size_in_bytes / sizeof(uint32_t));
652        if (data_size_in_bytes != 0) {
653            errno_t res = memcpy_s(data, vec_size_in_bytes, vec.data(), vec_size_in_bytes);
654            if (res != EOK) {
655                UNREACHABLE();
656            }
657        }
658
659        // Clear extra bytes, which are not be covered by vector's data
660        size_t extra_bytes = data_size_in_bytes - vec_size_in_bytes;
661        if (extra_bytes != 0) {
662            // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic,-warnings-as-errors)
663            memset_s(reinterpret_cast<uint8_t *>(data) + vec_size_in_bytes, extra_bytes, 0, extra_bytes);
664        }
665        uint32_t index = rows_.size();
666        rows_.push_back({data, vec.size()});
667        dedup_map_.emplace(hash, index);
668        return index;
669    }
670
671    template <typename Container>
672    void Encode(BitMemoryStreamOut<Container> &stream)
673    {
674        static constexpr size_t COLUMNS_SIZE = 2;
675        std::array<uint32_t, COLUMNS_SIZE> columns = {0, static_cast<uint32_t>(rows_.size())};
676        std::for_each(rows_.begin(), rows_.end(),
677                      [&columns](const auto &row) { columns[0] = std::max(columns[0], row.second); });
678        VarintPack::Write(stream, columns);
679
680        for (const auto &row : rows_) {
681            stream.Write(row.first, row.second, columns[0]);
682        }
683    }
684
685    virtual ~BitmapTableBuilder() = default;
686
687    NO_COPY_SEMANTIC(BitmapTableBuilder);
688    NO_MOVE_SEMANTIC(BitmapTableBuilder);
689
690private:
691    ArenaAllocator *allocator_ {nullptr};
692    ArenaDeque<Entry> rows_;
693    ArenaUnorderedMultiMap<uint32_t, uint32_t> dedup_map_;
694};
695
696}  // namespace panda
697
698#endif  // LIBPANDABASE_UTILS_BIT_TABLE_H
699