1// Copyright 2017 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_COMPILER_PERSISTENT_MAP_H_
6#define V8_COMPILER_PERSISTENT_MAP_H_
7
8#include <array>
9#include <tuple>
10
11#include "src/base/functional.h"
12#include "src/zone/zone-containers.h"
13
14namespace v8 {
15namespace internal {
16namespace compiler {
17
18// PersistentMap is a persistent map datastructure based on hash trees (a binary
19// tree using the bits of a hash value as addresses). The map is a conceptually
20// infinite: All keys are initially mapped to a default value, values are
21// deleted by overwriting them with the default value. The iterators produce
22// exactly the keys that are not the default value. The hash values should have
23// high variance in their high bits, so dense integers are a bad choice.
24// Complexity:
25// - Copy and assignment: O(1)
26// - access: O(log n)
27// - update: O(log n) time and space
28// - iteration: amortized O(1) per step
29// - Zip: O(n)
30// - equality check: O(n)
31// TODO(turbofan): Cache map transitions to avoid re-allocation of the same map.
32// TODO(turbofan): Implement an O(1) equality check based on hash consing or
33//              something similar.
34template <class Key, class Value, class Hasher = base::hash<Key>>
35class PersistentMap {
36 public:
37  using key_type = Key;
38  using mapped_type = Value;
39  using value_type = std::pair<Key, Value>;
40
41 private:
42  static constexpr size_t kHashBits = 32;
43  enum Bit : int { kLeft = 0, kRight = 1 };
44
45  // Access hash bits starting from the high bits and compare them according to
46  // their unsigned value. This way, the order in the hash tree is compatible
47  // with numeric hash comparisons.
48  class HashValue;
49
50  struct KeyValue : std::pair<Key, Value> {
51    const Key& key() const { return this->first; }
52    const Value& value() const { return this->second; }
53    using std::pair<Key, Value>::pair;
54  };
55
56  struct FocusedTree;
57
58 public:
59  // Depth of the last added element. This is a cheap estimate for the size of
60  // the hash tree.
61  size_t last_depth() const {
62    if (tree_) {
63      return tree_->length;
64    } else {
65      return 0;
66    }
67  }
68
69  const Value& Get(const Key& key) const {
70    HashValue key_hash = HashValue(Hasher()(key));
71    const FocusedTree* tree = FindHash(key_hash);
72    return GetFocusedValue(tree, key);
73  }
74
75  // Add or overwrite an existing key-value pair.
76  void Set(Key key, Value value);
77
78  bool operator==(const PersistentMap& other) const {
79    if (tree_ == other.tree_) return true;
80    if (def_value_ != other.def_value_) return false;
81    for (std::tuple<Key, Value, Value> triple : Zip(other)) {
82      if (std::get<1>(triple) != std::get<2>(triple)) return false;
83    }
84    return true;
85  }
86
87  bool operator!=(const PersistentMap& other) const {
88    return !(*this == other);
89  }
90
91  // The iterator produces key-value pairs in the lexicographical order of
92  // hash value and key. It produces exactly the key-value pairs where the value
93  // is not the default value.
94  class iterator;
95
96  iterator begin() const {
97    if (!tree_) return end();
98    return iterator::begin(tree_, def_value_);
99  }
100  iterator end() const { return iterator::end(def_value_); }
101
102  // Iterator to traverse two maps in lockstep, producing matching value pairs
103  // for each key where at least one value is different from the respective
104  // default.
105  class double_iterator;
106
107  // An iterable to iterate over the two maps in lockstep.
108  struct ZipIterable {
109    PersistentMap a;
110    PersistentMap b;
111    double_iterator begin() { return double_iterator(a.begin(), b.begin()); }
112    double_iterator end() { return double_iterator(a.end(), b.end()); }
113  };
114
115  ZipIterable Zip(const PersistentMap& other) const { return {*this, other}; }
116
117  explicit PersistentMap(Zone* zone, Value def_value = Value())
118      : PersistentMap(nullptr, zone, def_value) {}
119
120 private:
121  // Find the {FocusedTree} that contains a key-value pair with key hash {hash}.
122  const FocusedTree* FindHash(HashValue hash) const;
123
124  // Find the {FocusedTree} that contains a key-value pair with key hash {hash}.
125  // Output the path to this {FocusedTree} and its length. If no such
126  // {FocusedTree} exists, return {nullptr} and output the path to the last node
127  // with a matching hash prefix. Note that {length} is the length of the found
128  // path and may be less than the length of the found {FocusedTree}.
129  const FocusedTree* FindHash(HashValue hash,
130                              std::array<const FocusedTree*, kHashBits>* path,
131                              int* length) const;
132
133  // Load value from the leaf node on the focused path of {tree}.
134  const Value& GetFocusedValue(const FocusedTree* tree, const Key& key) const;
135
136  // Return the {FocusedTree} representing the left (bit==kLeft) or right
137  // (bit==kRight) child of the node on the path of {tree} at tree level
138  // {level}.
139  static const FocusedTree* GetChild(const FocusedTree* tree, int level,
140                                     Bit bit);
141
142  // Find the leftmost path in the tree, starting at the node at tree level
143  // {level} on the path of {start}. Output the level of the leaf to {level} and
144  // the path to {path}.
145  static const FocusedTree* FindLeftmost(
146      const FocusedTree* start, int* level,
147      std::array<const FocusedTree*, kHashBits>* path);
148
149  PersistentMap(const FocusedTree* tree, Zone* zone, Value def_value)
150      : tree_(tree), def_value_(def_value), zone_(zone) {}
151
152  const FocusedTree* tree_;
153  Value def_value_;
154  Zone* zone_;
155};
156
157// This structure represents a hash tree with one focused path to a specific
158// leaf. For the focused leaf, it stores key, value and key hash. The path is
159// defined by the hash bits of the focused leaf. In a traditional tree
160// datastructure, the nodes of a path form a linked list with the values being
161// the pointers outside of this path. Instead of storing all of these nodes,
162// we store an array of the pointers pointing outside of the path. This is
163// similar to the stack used when doing DFS traversal of a tree. The hash of
164// the leaf is used to know if the pointers point to the left or the
165// right of the path. As there is no explicit representation of a tree node,
166// this structure also represents all the nodes on its path. The intended node
167// depends on the tree depth, which is always clear from the referencing
168// context. So the pointer to a {FocusedTree} stored in the
169// {PersistentMap.tree_} always references the root, while a pointer from a
170// focused node of another {FocusedTree} always references to one tree level
171// lower than before.
172template <class Key, class Value, class Hasher>
173struct PersistentMap<Key, Value, Hasher>::FocusedTree {
174  KeyValue key_value;
175  // The depth of the focused path, that is, the number of pointers stored in
176  // this structure.
177  int8_t length;
178  HashValue key_hash;
179  // Out-of-line storage for hash collisions.
180  const ZoneMap<Key, Value>* more;
181  using more_iterator = typename ZoneMap<Key, Value>::const_iterator;
182  // {path_array} has to be the last member: To store an array inline, we
183  // over-allocate memory for this structure and access memory beyond
184  // {path_array}. This corresponds to a flexible array member as defined in
185  // C99.
186  const FocusedTree* path_array[1];
187  const FocusedTree*& path(int i) {
188    DCHECK(i < length);
189    return reinterpret_cast<const FocusedTree**>(
190        reinterpret_cast<byte*>(this) + offsetof(FocusedTree, path_array))[i];
191  }
192  const FocusedTree* path(int i) const {
193    DCHECK(i < length);
194    return reinterpret_cast<const FocusedTree* const*>(
195        reinterpret_cast<const byte*>(this) +
196        offsetof(FocusedTree, path_array))[i];
197  }
198};
199
200template <class Key, class Value, class Hasher>
201class PersistentMap<Key, Value, Hasher>::HashValue {
202 public:
203  explicit HashValue(size_t hash) : bits_(static_cast<uint32_t>(hash)) {}
204
205  Bit operator[](int pos) const {
206    DCHECK_LT(pos, kHashBits);
207    return bits_ & (static_cast<decltype(bits_)>(1) << (kHashBits - pos - 1))
208               ? kRight
209               : kLeft;
210  }
211
212  bool operator<(HashValue other) const { return bits_ < other.bits_; }
213  bool operator==(HashValue other) const { return bits_ == other.bits_; }
214  bool operator!=(HashValue other) const { return bits_ != other.bits_; }
215  HashValue operator^(HashValue other) const {
216    return HashValue(bits_ ^ other.bits_);
217  }
218
219 private:
220  static_assert(sizeof(uint32_t) * 8 == kHashBits, "wrong type for bits_");
221  uint32_t bits_;
222};
223
224template <class Key, class Value, class Hasher>
225class PersistentMap<Key, Value, Hasher>::iterator {
226 public:
227  const value_type operator*() const {
228    if (current_->more) {
229      return *more_iter_;
230    } else {
231      return current_->key_value;
232    }
233  }
234
235  iterator& operator++() {
236    do {
237      if (!current_) {
238        // Iterator is past the end.
239        return *this;
240      }
241      if (current_->more) {
242        DCHECK(more_iter_ != current_->more->end());
243        ++more_iter_;
244        if (more_iter_ != current_->more->end()) return *this;
245      }
246      if (level_ == 0) {
247        *this = end(def_value_);
248        return *this;
249      }
250      --level_;
251      while (current_->key_hash[level_] == kRight || path_[level_] == nullptr) {
252        if (level_ == 0) {
253          *this = end(def_value_);
254          return *this;
255        }
256        --level_;
257      }
258      const FocusedTree* first_right_alternative = path_[level_];
259      level_++;
260      current_ = FindLeftmost(first_right_alternative, &level_, &path_);
261      if (current_->more) {
262        more_iter_ = current_->more->begin();
263      }
264    } while (!((**this).second != def_value()));
265    return *this;
266  }
267
268  bool operator==(const iterator& other) const {
269    if (is_end()) return other.is_end();
270    if (other.is_end()) return false;
271    if (current_->key_hash != other.current_->key_hash) {
272      return false;
273    } else {
274      return (**this).first == (*other).first;
275    }
276  }
277  bool operator!=(const iterator& other) const { return !(*this == other); }
278
279  bool operator<(const iterator& other) const {
280    if (is_end()) return false;
281    if (other.is_end()) return true;
282    if (current_->key_hash == other.current_->key_hash) {
283      return (**this).first < (*other).first;
284    } else {
285      return current_->key_hash < other.current_->key_hash;
286    }
287  }
288
289  bool is_end() const { return current_ == nullptr; }
290
291  const Value& def_value() { return def_value_; }
292
293  static iterator begin(const FocusedTree* tree, Value def_value) {
294    iterator i(def_value);
295    i.current_ = FindLeftmost(tree, &i.level_, &i.path_);
296    if (i.current_->more) {
297      i.more_iter_ = i.current_->more->begin();
298    }
299    // Skip entries with default value. PersistentMap iterators must never point
300    // to a default value.
301    while (!i.is_end() && !((*i).second != def_value)) ++i;
302    return i;
303  }
304
305  static iterator end(Value def_value) { return iterator(def_value); }
306
307 private:
308  int level_;
309  typename FocusedTree::more_iterator more_iter_;
310  const FocusedTree* current_;
311  std::array<const FocusedTree*, kHashBits> path_;
312  Value def_value_;
313
314  explicit iterator(Value def_value)
315      : level_(0), current_(nullptr), def_value_(def_value) {}
316};
317
318template <class Key, class Value, class Hasher>
319class PersistentMap<Key, Value, Hasher>::double_iterator {
320 public:
321  std::tuple<Key, Value, Value> operator*() {
322    if (first_current_) {
323      auto pair = *first_;
324      return std::make_tuple(
325          pair.first, pair.second,
326          second_current_ ? (*second_).second : second_.def_value());
327    } else {
328      DCHECK(second_current_);
329      auto pair = *second_;
330      return std::make_tuple(pair.first, first_.def_value(), pair.second);
331    }
332  }
333
334  double_iterator& operator++() {
335#ifdef DEBUG
336    iterator old_first = first_;
337    iterator old_second = second_;
338#endif
339    if (first_current_) {
340      ++first_;
341      DCHECK(old_first < first_);
342    }
343    if (second_current_) {
344      ++second_;
345      DCHECK(old_second < second_);
346    }
347    return *this = double_iterator(first_, second_);
348  }
349
350  double_iterator(iterator first, iterator second)
351      : first_(first), second_(second) {
352    if (first_ == second_) {
353      first_current_ = second_current_ = true;
354    } else if (first_ < second_) {
355      first_current_ = true;
356      second_current_ = false;
357    } else {
358      DCHECK(second_ < first_);
359      first_current_ = false;
360      second_current_ = true;
361    }
362  }
363
364  bool operator!=(const double_iterator& other) {
365    return first_ != other.first_ || second_ != other.second_;
366  }
367
368  bool is_end() const { return first_.is_end() && second_.is_end(); }
369
370 private:
371  iterator first_;
372  iterator second_;
373  bool first_current_;
374  bool second_current_;
375};
376
377template <class Key, class Value, class Hasher>
378void PersistentMap<Key, Value, Hasher>::Set(Key key, Value value) {
379  HashValue key_hash = HashValue(Hasher()(key));
380  std::array<const FocusedTree*, kHashBits> path;
381  int length = 0;
382  const FocusedTree* old = FindHash(key_hash, &path, &length);
383  ZoneMap<Key, Value>* more = nullptr;
384  if (!(GetFocusedValue(old, key) != value)) return;
385  if (old && !(old->more == nullptr && old->key_value.key() == key)) {
386    more = zone_->New<ZoneMap<Key, Value>>(zone_);
387    if (old->more) {
388      *more = *old->more;
389    } else {
390      more->erase(old->key_value.key());
391      more->emplace(old->key_value.key(), old->key_value.value());
392    }
393    more->erase(key);
394    more->emplace(key, value);
395  }
396  size_t size = sizeof(FocusedTree) +
397                std::max(0, length - 1) * sizeof(const FocusedTree*);
398  FocusedTree* tree = new (zone_->Allocate<FocusedTree>(size))
399      FocusedTree{KeyValue(std::move(key), std::move(value)),
400                  static_cast<int8_t>(length),
401                  key_hash,
402                  more,
403                  {}};
404  for (int i = 0; i < length; ++i) {
405    tree->path(i) = path[i];
406  }
407  *this = PersistentMap(tree, zone_, def_value_);
408}
409
410template <class Key, class Value, class Hasher>
411const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
412PersistentMap<Key, Value, Hasher>::FindHash(HashValue hash) const {
413  const FocusedTree* tree = tree_;
414  int level = 0;
415  while (tree && hash != tree->key_hash) {
416    while ((hash ^ tree->key_hash)[level] == 0) {
417      ++level;
418    }
419    tree = level < tree->length ? tree->path(level) : nullptr;
420    ++level;
421  }
422  return tree;
423}
424
425template <class Key, class Value, class Hasher>
426const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
427PersistentMap<Key, Value, Hasher>::FindHash(
428    HashValue hash, std::array<const FocusedTree*, kHashBits>* path,
429    int* length) const {
430  const FocusedTree* tree = tree_;
431  int level = 0;
432  while (tree && hash != tree->key_hash) {
433    int map_length = tree->length;
434    while ((hash ^ tree->key_hash)[level] == 0) {
435      (*path)[level] = level < map_length ? tree->path(level) : nullptr;
436      ++level;
437    }
438    (*path)[level] = tree;
439    tree = level < tree->length ? tree->path(level) : nullptr;
440    ++level;
441  }
442  if (tree) {
443    while (level < tree->length) {
444      (*path)[level] = tree->path(level);
445      ++level;
446    }
447  }
448  *length = level;
449  return tree;
450}
451
452template <class Key, class Value, class Hasher>
453const Value& PersistentMap<Key, Value, Hasher>::GetFocusedValue(
454    const FocusedTree* tree, const Key& key) const {
455  if (!tree) {
456    return def_value_;
457  }
458  if (tree->more) {
459    auto it = tree->more->find(key);
460    if (it == tree->more->end())
461      return def_value_;
462    else
463      return it->second;
464  } else {
465    if (key == tree->key_value.key()) {
466      return tree->key_value.value();
467    } else {
468      return def_value_;
469    }
470  }
471}
472
473template <class Key, class Value, class Hasher>
474const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
475PersistentMap<Key, Value, Hasher>::GetChild(const FocusedTree* tree, int level,
476                                            Bit bit) {
477  if (tree->key_hash[level] == bit) {
478    return tree;
479  } else if (level < tree->length) {
480    return tree->path(level);
481  } else {
482    return nullptr;
483  }
484}
485
486template <class Key, class Value, class Hasher>
487const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
488PersistentMap<Key, Value, Hasher>::FindLeftmost(
489    const FocusedTree* start, int* level,
490    std::array<const FocusedTree*, kHashBits>* path) {
491  const FocusedTree* current = start;
492  while (*level < current->length) {
493    if (const FocusedTree* left_child = GetChild(current, *level, kLeft)) {
494      (*path)[*level] = GetChild(current, *level, kRight);
495      current = left_child;
496      ++*level;
497    } else if (const FocusedTree* right_child =
498                   GetChild(current, *level, kRight)) {
499      (*path)[*level] = GetChild(current, *level, kLeft);
500      current = right_child;
501      ++*level;
502    } else {
503      UNREACHABLE();
504    }
505  }
506  return current;
507}
508
509template <class Key, class Value, class Hasher>
510std::ostream& operator<<(std::ostream& os,
511                         const PersistentMap<Key, Value, Hasher>& map) {
512  os << "{";
513  bool first = true;
514  for (auto pair : map) {
515    if (!first) os << ", ";
516    first = false;
517    os << pair.first << ": " << pair.second;
518  }
519  return os << "}";
520}
521
522}  // namespace compiler
523}  // namespace internal
524}  // namespace v8
525
526#endif  // V8_COMPILER_PERSISTENT_MAP_H_
527