1/** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#ifndef LIBPANDABASE_UTILS_BIT_TABLE_H 17#define LIBPANDABASE_UTILS_BIT_TABLE_H 18 19#include "utils/arena_containers.h" 20#include "utils/bit_memory_region.h" 21#include "utils/bit_memory_stream.h" 22#include "utils/bit_vector.h" 23#include "utils/hash.h" 24#include <iomanip> 25#include "securec.h" 26 27namespace panda { 28 29class VarintPack { 30public: 31 static constexpr size_t INLINE_BITS = 4; 32 static constexpr size_t INLINE_MAX = 11; 33 34 template <size_t N> 35 static std::array<uint32_t, N> Read(BitMemoryStreamIn *stream) 36 { 37 static_assert(sizeof(uint64_t) * BITS_PER_BYTE >= N * INLINE_BITS); 38 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) 39 std::array<uint32_t, N> values; 40 auto data = stream->Read<uint64_t>(N * INLINE_BITS); 41 for (size_t i = 0; i < N; i++) { 42 values[i] = ExtractBits(data, i * INLINE_BITS, INLINE_BITS); 43 } 44 for (size_t i = 0; i < N; i++) { 45 if (UNLIKELY(values[i] > INLINE_MAX)) { 46 values[i] = stream->Read<uint32_t>((values[i] - INLINE_MAX) * BITS_PER_BYTE); 47 } 48 } 49 return values; 50 } 51 52 template <size_t N, typename Container> 53 static void Write(BitMemoryStreamOut<Container> &stream, const std::array<uint32_t, N> &data) 54 { 55 for (auto value : data) { 56 if (value > INLINE_MAX) { 57 stream.Write(INLINE_MAX + BitsToBytesRoundUp(MinimumBitsToStore(value)), INLINE_BITS); 58 } else { 59 stream.Write(value, INLINE_BITS); 60 } 61 } 62 for (auto value : data) { 63 if (value > INLINE_MAX) { 64 stream.Write(value, BitsToBytesRoundUp(MinimumBitsToStore(value)) * BITS_PER_BYTE); 65 } 66 } 67 } 68}; 69 70template <typename> 71class BitTable; 72 73template <size_t NumColumns, typename Accessor, bool reverse_iteration = false> 74class BitTableRow : public std::iterator<std::random_access_iterator_tag, Accessor, int32_t, Accessor *, Accessor &> { 75 using BitTableType = BitTable<Accessor>; 76 77public: 78 static constexpr size_t NUM_COLUMNS = NumColumns; 79 static constexpr uint32_t NO_VALUE = BitTableType::NO_VALUE; 80 81 using Reversed = BitTableRow<NumColumns, Accessor, !reverse_iteration>; 82 83 BitTableRow() = default; 84 BitTableRow(const BitTableType *table, int row_index) : table_(table), row_index_(row_index) {} 85 explicit BitTableRow(Reversed *rhs) : table_(rhs->table_), row_index_(rhs->row_index_) {} 86 87 uint32_t GetRow() const 88 { 89 return row_index_; 90 } 91 92 std::string GetColumnStr(size_t column) const 93 { 94 if (Get(column) == NO_VALUE) { 95 return "-"; 96 } 97 return std::to_string(Get(column)); 98 } 99 100 Accessor GetAccessor() 101 { 102 return Accessor(this); 103 } 104 105 uint32_t Get(size_t index) const 106 { 107 return table_->ReadColumn(row_index_, index); 108 } 109 110 bool Has(size_t index) const 111 { 112 return Get(index) != BitTableType::NO_VALUE; 113 } 114 115 size_t ColumnsCount() const 116 { 117 return table_->GetColumnsCount(); 118 } 119 120 bool IsValid() const 121 { 122 return row_index_ != -1; 123 } 124 125 bool operator==(const BitTableRow &rhs) const 126 { 127 return table_ == rhs.table_ && row_index_ == rhs.row_index_; 128 } 129 130 bool operator!=(const BitTableRow &rhs) const 131 { 132 return !(operator==(rhs)); 133 } 134 135 ~BitTableRow() = default; 136 137 DEFAULT_COPY_SEMANTIC(BitTableRow); 138 DEFAULT_NOEXCEPT_MOVE_SEMANTIC(BitTableRow); 139 140protected: 141 auto GetTable() const 142 { 143 return table_; 144 } 145 146private: 147 const BitTableType *table_ {nullptr}; 148 int row_index_ {-1}; 149 150 friend Reversed; 151 template <typename, bool> 152 friend class BitTableIterator; 153}; 154 155template <typename Accessor, bool reverse_iteration = false> 156class BitTableIterator 157 : public std::iterator<std::random_access_iterator_tag, Accessor, int32_t, Accessor *, Accessor &> { 158public: 159 BitTableIterator(const typename Accessor::BitTableType *table, int row_index) : row_(table, row_index) {} 160 explicit BitTableIterator(Accessor &row) : row_(row) {} 161 162 BitTableIterator &operator++() 163 { 164 if constexpr (reverse_iteration) { // NOLINT 165 row_.row_index_--; 166 } else { // NOLINT 167 row_.row_index_++; 168 } 169 return *this; 170 } 171 172 BitTableIterator &operator--() 173 { 174 if constexpr (reverse_iteration) { // NOLINT 175 row_.row_index_++; 176 } else { // NOLINT 177 row_.row_index_--; 178 } 179 return *this; 180 } 181 182 explicit operator bool() const 183 { 184 return row_.row_index_ >= 0 && helpers::ToUnsigned(row_.row_index_) < row_.table_->GetRowsCount(); 185 } 186 187 bool operator==(const BitTableIterator &rhs) const 188 { 189 return row_ == rhs.row_; 190 } 191 192 bool operator!=(const BitTableIterator &rhs) const 193 { 194 return row_ != rhs.row_; 195 } 196 197 auto operator+(int32_t n) const 198 { 199 if constexpr (reverse_iteration) { // NOLINT 200 return BitTableIterator(row_.table_, row_.row_index_ - n); 201 } else { // NOLINT 202 return BitTableIterator(row_.table_, row_.row_index_ + n); 203 } 204 } 205 auto operator-(int32_t n) const 206 { 207 if constexpr (reverse_iteration) { // NOLINT 208 return BitTableIterator(row_.table_, row_.row_index_ + n); 209 } else { // NOLINT 210 return BitTableIterator(row_.table_, row_.row_index_ - n); 211 } 212 } 213 int32_t operator-(const BitTableIterator &rhs) const 214 { 215 if constexpr (reverse_iteration) { // NOLINT 216 return rhs.row_.row_index_ - row_.row_index_; 217 } else { // NOLINT 218 return row_.row_index_ - rhs.row_.row_index_; 219 } 220 } 221 222 auto &operator+=(int32_t n) 223 { 224 if constexpr (reverse_iteration) { // NOLINT 225 row_.row_index_ -= n; 226 } else { // NOLINT 227 row_.row_index_ += n; 228 } 229 return *this; 230 } 231 232 Accessor &operator*() 233 { 234 return row_; 235 } 236 Accessor *operator->() 237 { 238 return &row_; 239 } 240 241 ~BitTableIterator() = default; 242 243 DEFAULT_COPY_SEMANTIC(BitTableIterator); 244 DEFAULT_NOEXCEPT_MOVE_SEMANTIC(BitTableIterator); 245 246private: 247 Accessor row_; 248}; 249 250template <size_t NumColumns> 251struct BitTableDefault : public BitTableRow<NumColumns, BitTableDefault<NumColumns>> { 252 using Base = BitTableRow<NumColumns, BitTableDefault<NumColumns>>; 253 using Base::Base; 254}; 255 256// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) 257#define BIT_TABLE_HEADER(columns, name) \ 258 using Base = BitTableRow<columns, name>; \ 259 using BitTableRow<columns, name>::BitTableRow; \ 260 template <int Column, int Dummy> \ 261 struct ColumnName; \ 262 static constexpr const char *TABLE_NAME = #name; 263 264// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) 265#define BIT_TABLE_COLUMN(index, name, upname) \ 266 static constexpr size_t COLUMN_##upname = index; \ 267 static constexpr const char COLUMN_NAME_##upname[] = #name; /* NOLINT */ \ 268 template <int Dummy> \ 269 struct ColumnName<index, Dummy> { \ 270 static constexpr const char VALUE[] = #name; /* NOLINT */ \ 271 }; \ 272 uint32_t Get##name() const \ 273 { \ 274 return Get(index); \ 275 } \ 276 bool Has##name() const \ 277 { \ 278 return Get(index) != NO_VALUE; \ 279 } 280 281template <typename Accessor, size_t... Columns> 282// NOLINTNEXTLINE(readability-named-parameter) 283static const char *const *GetBitTableColumnNamesImpl(std::index_sequence<Columns...>) 284{ 285 static const std::array<const char *, sizeof...(Columns)> names = { 286 Accessor::template ColumnName<Columns, 0>::VALUE...}; 287 return names.data(); 288} 289 290template <typename AccessorT> 291// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) 292class BitTable { 293public: 294 static constexpr uint32_t NO_VALUE = -1; 295 static constexpr uint32_t NO_VALUE_DIFF = -1; 296 297 static constexpr size_t NUM_COLUMNS = AccessorT::NUM_COLUMNS; 298 299 using Accessor = AccessorT; 300 using RegionType = BitMemoryRegion<const uint8_t>; 301 302public: 303 template <bool Reversed> 304 class Range { 305 public: 306 using RangeIterator = 307 std::conditional_t<Reversed, BitTableIterator<Accessor, true>, BitTableIterator<Accessor, false>>; 308 309 Range(BitTable *table, int start, int end) : table_(table), start_(start), end_(end) {} 310 auto begin() 311 { 312 return RangeIterator(table_, start_); 313 } 314 auto end() 315 { 316 return RangeIterator(table_, end_); 317 } 318 Accessor operator[](size_t index) 319 { 320 return *(RangeIterator(table_, start_) + index); 321 } 322 323 private: 324 BitTable *table_; 325 int start_; 326 int end_; 327 }; 328 329 auto begin() const 330 { 331 return BitTableIterator<Accessor, false>(this, 0); 332 } 333 334 auto begin() 335 { 336 return BitTableIterator<Accessor, false>(this, 0); 337 } 338 339 auto end() const 340 { 341 return BitTableIterator<Accessor, false>(this, GetRowsCount()); 342 } 343 344 auto end() 345 { 346 return BitTableIterator<Accessor, false>(this, GetRowsCount()); 347 } 348 349 auto GetRange(int start, int end) 350 { 351 return Range<false>(this, start, end); 352 } 353 auto GetRangeReversed(int start, int end) 354 { 355 return Range<true>(this, end - 1, start - 1); 356 } 357 auto GetRangeReversed() 358 { 359 if (GetRowsCount() == 0) { 360 return Range<true>(this, -1, -1); 361 } 362 return Range<true>(this, GetRowsCount() - 1, -1); 363 } 364 365 constexpr size_t GetColumnsCount() const 366 { 367 return NUM_COLUMNS; 368 } 369 370 size_t GetRowsCount() const 371 { 372 return rows_count_; 373 } 374 375 size_t GetRowSizeInBits() const 376 { 377 return columns_offsets_[NUM_COLUMNS]; 378 } 379 380 size_t GetColumnWidth(size_t index) const 381 { 382 return columns_offsets_[index + 1] - columns_offsets_[index]; 383 } 384 385 uint32_t ReadColumn(size_t row_index, size_t column) const 386 { 387 ASSERT(column < GetColumnsCount()); 388 return region_.Read(row_index * GetRowSizeInBits() + columns_offsets_[column], GetColumnWidth(column)) + 389 NO_VALUE_DIFF; 390 } 391 392 auto GetRow(size_t index) 393 { 394 ASSERT(index < GetRowsCount()); 395 return Accessor(this, index); 396 } 397 398 auto GetRow(size_t index) const 399 { 400 ASSERT(index < GetRowsCount()); 401 return Accessor(this, index); 402 } 403 404 auto GetInvalidRow() const 405 { 406 return Accessor(this, -1); 407 } 408 409 RegionType GetBitMemoryRegion(uint32_t row) const 410 { 411 if (row == NO_VALUE) { 412 return RegionType(); 413 } 414 size_t offset = row * GetRowSizeInBits() + columns_offsets_[0]; 415 return region_.Subregion(offset, GetColumnWidth(0)); 416 } 417 418 void Decode(BitMemoryStreamIn *stream) 419 { 420 auto columns = VarintPack::Read<NUM_COLUMNS + 1>(stream); 421 rows_count_ = columns[NUM_COLUMNS]; 422 423 // Calculate offsets 424 columns_offsets_[0] = 0; 425 for (size_t i = 0; i < NUM_COLUMNS; i++) { 426 columns_offsets_[i + 1] = columns_offsets_[i] + columns[i]; 427 } 428 429 region_ = stream->ReadRegion(GetRowsCount() * GetRowSizeInBits()); 430 } 431 432 void Dump(std::ostream &out = std::cout) const 433 { 434 out << "BitTable: " << Accessor::TABLE_NAME << ", rows=" << GetRowsCount() 435 << ", row_size=" << GetRowSizeInBits() << std::endl; 436 static constexpr size_t INDENT = 4; 437 std::array<uint32_t, NUM_COLUMNS> width {}; 438 out << " "; 439 for (size_t i = 0; i < NUM_COLUMNS; i++) { 440 out << GetColumnName(i) << " "; 441 width[i] = strlen(GetColumnName(i)) + 1; 442 } 443 out << std::endl; 444 int index = 0; 445 for (auto row : *this) { 446 static constexpr size_t INDEX_WIDTH = 2; 447 out << std::right << std::setw(INDEX_WIDTH) << index++ << ": "; 448 for (size_t i = 0; i < NUM_COLUMNS; i++) { 449 std::string indent(INDENT, ' '); 450 out << std::left << std::setw(width[i]); 451 out << row.GetColumnStr(i); 452 } 453 out << std::endl; 454 } 455 } 456 457 const char *GetColumnName(size_t index) const 458 { 459 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 460 return GetBitTableColumnNamesImpl<Accessor>(std::make_index_sequence<NUM_COLUMNS>())[index]; 461 } 462 463private: 464 BitMemoryRegion<const uint8_t> region_; 465 std::array<uint16_t, NUM_COLUMNS + 1> columns_offsets_ {}; 466 uint16_t rows_count_ {0}; 467}; 468 469template <typename Accessor> 470class BitTableBuilder { 471public: 472 static constexpr size_t NUM_COLUMNS = Accessor::NUM_COLUMNS; 473 static constexpr uint32_t NO_VALUE = BitTable<Accessor>::NO_VALUE; 474 static constexpr uint32_t NO_VALUE_DIFF = BitTable<Accessor>::NO_VALUE_DIFF; 475 476 class Entry { 477 public: 478 Entry() 479 { 480 std::fill(data_.begin(), data_.end(), NO_VALUE); 481 } 482 483 Entry(std::initializer_list<uint32_t> data) 484 { 485 std::copy(data.begin(), data.end(), data_.begin()); 486 } 487 488 uint32_t operator[](size_t index) const 489 { 490 return data_[index]; 491 } 492 493 uint32_t &operator[](size_t index) 494 { 495 return data_[index]; 496 } 497 498 bool operator==(const Entry &rhs) const 499 { 500 return data_ == rhs.data_; 501 } 502 503 void SetColumn(size_t index, uint32_t value) 504 { 505 data_[index] = value; 506 } 507 508 template <typename... Args> 509 void SetColumns(Args... values) 510 { 511 static_assert(sizeof...(Args) == NUM_COLUMNS); 512 int i = 0; 513 (SetColumn(i++, values), ...); 514 } 515 516 ~Entry() = default; 517 518 DEFAULT_COPY_SEMANTIC(Entry); 519 DEFAULT_NOEXCEPT_MOVE_SEMANTIC(Entry); 520 521 private: 522 std::array<uint32_t, NUM_COLUMNS> data_; 523 }; 524 525public: 526 explicit BitTableBuilder(ArenaAllocator *allocator) 527 : entries_(allocator->Adapter()), dedup_map_(allocator->Adapter()) 528 { 529 } 530 531 size_t GetRowsCount() const 532 { 533 return entries_.size(); 534 } 535 536 const Entry &GetLast() const 537 { 538 return entries_.back(); 539 } 540 541 const Entry &GetEntry(size_t index) const 542 { 543 return entries_[index]; 544 } 545 546 size_t GetSize() const 547 { 548 return entries_.size(); 549 } 550 551 ALWAYS_INLINE void Emplace(Entry entry) 552 { 553 entries_.emplace_back(entry); 554 } 555 556 ALWAYS_INLINE size_t Add(Entry entry) 557 { 558 return AddArray(Span(&entry, 1)); 559 } 560 561 size_t AddArray(Span<Entry> entries) 562 { 563 uint32_t hash = FnvHash(entries.template SubSpan<uint32_t>(0, entries.size() * NUM_COLUMNS)); 564 auto range = dedup_map_.equal_range(hash); 565 for (auto it = range.first; it != range.second; ++it) { 566 uint32_t row = it->second; 567 if (entries.size() + row <= entries_.size() && 568 std::equal(entries.begin(), entries.end(), entries_.begin() + row)) { 569 return row; 570 } 571 } 572 uint32_t row = GetRowsCount(); 573 entries_.insert(entries_.end(), entries.begin(), entries.end()); 574 dedup_map_.emplace(hash, row); 575 return row; 576 } 577 578 void CalculateColumnsWidth(Span<uint32_t> columns_width) 579 { 580 std::fill(columns_width.begin(), columns_width.end(), 0); 581 for (const auto &entry : entries_) { 582 for (size_t i = 0; i < NUM_COLUMNS; i++) { 583 columns_width[i] |= entry[i] - NO_VALUE_DIFF; 584 } 585 } 586 for (auto &value : columns_width) { 587 value = MinimumBitsToStore(value); 588 } 589 } 590 591 template <typename Container> 592 void Encode(BitMemoryStreamOut<Container> &stream) 593 { 594 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) 595 std::array<uint32_t, NUM_COLUMNS + 1> columns_width; 596 CalculateColumnsWidth(Span(columns_width.data(), NUM_COLUMNS)); 597 columns_width[NUM_COLUMNS] = entries_.size(); 598 VarintPack::Write(stream, columns_width); 599 600 for (const auto &entry : entries_) { 601 for (size_t i = 0; i < NUM_COLUMNS; i++) { 602 stream.Write(entry[i] - NO_VALUE_DIFF, columns_width[i]); 603 } 604 } 605 } 606 607 virtual ~BitTableBuilder() = default; 608 609 NO_COPY_SEMANTIC(BitTableBuilder); 610 NO_MOVE_SEMANTIC(BitTableBuilder); 611 612private: 613 ArenaDeque<Entry> entries_; 614 ArenaUnorderedMultiMap<uint32_t, uint32_t> dedup_map_; 615}; 616 617class BitmapTableBuilder { 618public: 619 using Entry = std::pair<uint32_t *, uint32_t>; 620 621 explicit BitmapTableBuilder(ArenaAllocator *allocator) 622 : allocator_(allocator), rows_(allocator->Adapter()), dedup_map_(allocator->Adapter()) 623 { 624 } 625 626 size_t GetRowsCount() const 627 { 628 return rows_.size(); 629 } 630 631 auto GetEntry(size_t index) const 632 { 633 return BitMemoryRegion<uint32_t>(rows_[index].first, rows_[index].second); 634 } 635 636 size_t Add(BitVectorSpan vec) 637 { 638 if (vec.empty()) { 639 return BitTableDefault<1>::NO_VALUE; 640 } 641 uint32_t hash = FnvHash(vec.GetContainerDataSpan()); 642 auto range = dedup_map_.equal_range(hash); 643 for (auto it = range.first; it != range.second; ++it) { 644 auto &row = rows_[it->second]; 645 if (BitVectorSpan(row.first, row.second) == vec) { 646 return it->second; 647 } 648 } 649 size_t vec_size_in_bytes = BitsToBytesRoundUp(vec.size()); 650 size_t data_size_in_bytes = RoundUp(vec_size_in_bytes, sizeof(uint32_t)); 651 auto data = allocator_->AllocArray<uint32_t>(data_size_in_bytes / sizeof(uint32_t)); 652 if (data_size_in_bytes != 0) { 653 errno_t res = memcpy_s(data, vec_size_in_bytes, vec.data(), vec_size_in_bytes); 654 if (res != EOK) { 655 UNREACHABLE(); 656 } 657 } 658 659 // Clear extra bytes, which are not be covered by vector's data 660 size_t extra_bytes = data_size_in_bytes - vec_size_in_bytes; 661 if (extra_bytes != 0) { 662 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic,-warnings-as-errors) 663 memset_s(reinterpret_cast<uint8_t *>(data) + vec_size_in_bytes, extra_bytes, 0, extra_bytes); 664 } 665 uint32_t index = rows_.size(); 666 rows_.push_back({data, vec.size()}); 667 dedup_map_.emplace(hash, index); 668 return index; 669 } 670 671 template <typename Container> 672 void Encode(BitMemoryStreamOut<Container> &stream) 673 { 674 static constexpr size_t COLUMNS_SIZE = 2; 675 std::array<uint32_t, COLUMNS_SIZE> columns = {0, static_cast<uint32_t>(rows_.size())}; 676 std::for_each(rows_.begin(), rows_.end(), 677 [&columns](const auto &row) { columns[0] = std::max(columns[0], row.second); }); 678 VarintPack::Write(stream, columns); 679 680 for (const auto &row : rows_) { 681 stream.Write(row.first, row.second, columns[0]); 682 } 683 } 684 685 virtual ~BitmapTableBuilder() = default; 686 687 NO_COPY_SEMANTIC(BitmapTableBuilder); 688 NO_MOVE_SEMANTIC(BitmapTableBuilder); 689 690private: 691 ArenaAllocator *allocator_ {nullptr}; 692 ArenaDeque<Entry> rows_; 693 ArenaUnorderedMultiMap<uint32_t, uint32_t> dedup_map_; 694}; 695 696} // namespace panda 697 698#endif // LIBPANDABASE_UTILS_BIT_TABLE_H 699