1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_COMPILER_PERSISTENT_MAP_H_
6 #define V8_COMPILER_PERSISTENT_MAP_H_
7
8 #include <array>
9 #include <tuple>
10
11 #include "src/base/functional.h"
12 #include "src/zone/zone-containers.h"
13
14 namespace v8 {
15 namespace internal {
16 namespace compiler {
17
18 // PersistentMap is a persistent map datastructure based on hash trees (a binary
19 // tree using the bits of a hash value as addresses). The map is a conceptually
20 // infinite: All keys are initially mapped to a default value, values are
21 // deleted by overwriting them with the default value. The iterators produce
22 // exactly the keys that are not the default value. The hash values should have
23 // high variance in their high bits, so dense integers are a bad choice.
24 // Complexity:
25 // - Copy and assignment: O(1)
26 // - access: O(log n)
27 // - update: O(log n) time and space
28 // - iteration: amortized O(1) per step
29 // - Zip: O(n)
30 // - equality check: O(n)
31 // TODO(turbofan): Cache map transitions to avoid re-allocation of the same map.
32 // TODO(turbofan): Implement an O(1) equality check based on hash consing or
33 // something similar.
34 template <class Key, class Value, class Hasher = base::hash<Key>>
35 class PersistentMap {
36 public:
37 using key_type = Key;
38 using mapped_type = Value;
39 using value_type = std::pair<Key, Value>;
40
41 private:
42 static constexpr size_t kHashBits = 32;
43 enum Bit : int { kLeft = 0, kRight = 1 };
44
45 // Access hash bits starting from the high bits and compare them according to
46 // their unsigned value. This way, the order in the hash tree is compatible
47 // with numeric hash comparisons.
48 class HashValue;
49
50 struct KeyValue : std::pair<Key, Value> {
keyv8::internal::compiler::PersistentMap::KeyValue51 const Key& key() const { return this->first; }
valuev8::internal::compiler::PersistentMap::KeyValue52 const Value& value() const { return this->second; }
53 using std::pair<Key, Value>::pair;
54 };
55
56 struct FocusedTree;
57
58 public:
59 // Depth of the last added element. This is a cheap estimate for the size of
60 // the hash tree.
last_depth() const61 size_t last_depth() const {
62 if (tree_) {
63 return tree_->length;
64 } else {
65 return 0;
66 }
67 }
68
Get(const Key& key) const69 const Value& Get(const Key& key) const {
70 HashValue key_hash = HashValue(Hasher()(key));
71 const FocusedTree* tree = FindHash(key_hash);
72 return GetFocusedValue(tree, key);
73 }
74
75 // Add or overwrite an existing key-value pair.
76 void Set(Key key, Value value);
77
operator ==(const PersistentMap& other) const78 bool operator==(const PersistentMap& other) const {
79 if (tree_ == other.tree_) return true;
80 if (def_value_ != other.def_value_) return false;
81 for (std::tuple<Key, Value, Value> triple : Zip(other)) {
82 if (std::get<1>(triple) != std::get<2>(triple)) return false;
83 }
84 return true;
85 }
86
operator !=(const PersistentMap& other) const87 bool operator!=(const PersistentMap& other) const {
88 return !(*this == other);
89 }
90
91 // The iterator produces key-value pairs in the lexicographical order of
92 // hash value and key. It produces exactly the key-value pairs where the value
93 // is not the default value.
94 class iterator;
95
begin() const96 iterator begin() const {
97 if (!tree_) return end();
98 return iterator::begin(tree_, def_value_);
99 }
end() const100 iterator end() const { return iterator::end(def_value_); }
101
102 // Iterator to traverse two maps in lockstep, producing matching value pairs
103 // for each key where at least one value is different from the respective
104 // default.
105 class double_iterator;
106
107 // An iterable to iterate over the two maps in lockstep.
108 struct ZipIterable {
109 PersistentMap a;
110 PersistentMap b;
beginv8::internal::compiler::PersistentMap::ZipIterable111 double_iterator begin() { return double_iterator(a.begin(), b.begin()); }
endv8::internal::compiler::PersistentMap::ZipIterable112 double_iterator end() { return double_iterator(a.end(), b.end()); }
113 };
114
Zip(const PersistentMap& other) const115 ZipIterable Zip(const PersistentMap& other) const { return {*this, other}; }
116
PersistentMap(Zone* zone, Value def_value = Value())117 explicit PersistentMap(Zone* zone, Value def_value = Value())
118 : PersistentMap(nullptr, zone, def_value) {}
119
120 private:
121 // Find the {FocusedTree} that contains a key-value pair with key hash {hash}.
122 const FocusedTree* FindHash(HashValue hash) const;
123
124 // Find the {FocusedTree} that contains a key-value pair with key hash {hash}.
125 // Output the path to this {FocusedTree} and its length. If no such
126 // {FocusedTree} exists, return {nullptr} and output the path to the last node
127 // with a matching hash prefix. Note that {length} is the length of the found
128 // path and may be less than the length of the found {FocusedTree}.
129 const FocusedTree* FindHash(HashValue hash,
130 std::array<const FocusedTree*, kHashBits>* path,
131 int* length) const;
132
133 // Load value from the leaf node on the focused path of {tree}.
134 const Value& GetFocusedValue(const FocusedTree* tree, const Key& key) const;
135
136 // Return the {FocusedTree} representing the left (bit==kLeft) or right
137 // (bit==kRight) child of the node on the path of {tree} at tree level
138 // {level}.
139 static const FocusedTree* GetChild(const FocusedTree* tree, int level,
140 Bit bit);
141
142 // Find the leftmost path in the tree, starting at the node at tree level
143 // {level} on the path of {start}. Output the level of the leaf to {level} and
144 // the path to {path}.
145 static const FocusedTree* FindLeftmost(
146 const FocusedTree* start, int* level,
147 std::array<const FocusedTree*, kHashBits>* path);
148
PersistentMap(const FocusedTree* tree, Zone* zone, Value def_value)149 PersistentMap(const FocusedTree* tree, Zone* zone, Value def_value)
150 : tree_(tree), def_value_(def_value), zone_(zone) {}
151
152 const FocusedTree* tree_;
153 Value def_value_;
154 Zone* zone_;
155 };
156
157 // This structure represents a hash tree with one focused path to a specific
158 // leaf. For the focused leaf, it stores key, value and key hash. The path is
159 // defined by the hash bits of the focused leaf. In a traditional tree
160 // datastructure, the nodes of a path form a linked list with the values being
161 // the pointers outside of this path. Instead of storing all of these nodes,
162 // we store an array of the pointers pointing outside of the path. This is
163 // similar to the stack used when doing DFS traversal of a tree. The hash of
164 // the leaf is used to know if the pointers point to the left or the
165 // right of the path. As there is no explicit representation of a tree node,
166 // this structure also represents all the nodes on its path. The intended node
167 // depends on the tree depth, which is always clear from the referencing
168 // context. So the pointer to a {FocusedTree} stored in the
169 // {PersistentMap.tree_} always references the root, while a pointer from a
170 // focused node of another {FocusedTree} always references to one tree level
171 // lower than before.
172 template <class Key, class Value, class Hasher>
173 struct PersistentMap<Key, Value, Hasher>::FocusedTree {
174 KeyValue key_value;
175 // The depth of the focused path, that is, the number of pointers stored in
176 // this structure.
177 int8_t length;
178 HashValue key_hash;
179 // Out-of-line storage for hash collisions.
180 const ZoneMap<Key, Value>* more;
181 using more_iterator = typename ZoneMap<Key, Value>::const_iterator;
182 // {path_array} has to be the last member: To store an array inline, we
183 // over-allocate memory for this structure and access memory beyond
184 // {path_array}. This corresponds to a flexible array member as defined in
185 // C99.
186 const FocusedTree* path_array[1];
pathv8::internal::compiler::PersistentMap::PersistentMap::FocusedTree187 const FocusedTree*& path(int i) {
188 DCHECK(i < length);
189 return reinterpret_cast<const FocusedTree**>(
190 reinterpret_cast<byte*>(this) + offsetof(FocusedTree, path_array))[i];
191 }
192 const FocusedTree* path(int i) const {
193 DCHECK(i < length);
194 return reinterpret_cast<const FocusedTree* const*>(
195 reinterpret_cast<const byte*>(this) +
196 offsetof(FocusedTree, path_array))[i];
197 }
198 };
199
200 template <class Key, class Value, class Hasher>
201 class PersistentMap<Key, Value, Hasher>::HashValue {
202 public:
203 explicit HashValue(size_t hash) : bits_(static_cast<uint32_t>(hash)) {}
204
205 Bit operator[](int pos) const {
206 DCHECK_LT(pos, kHashBits);
207 return bits_ & (static_cast<decltype(bits_)>(1) << (kHashBits - pos - 1))
208 ? kRight
209 : kLeft;
210 }
211
212 bool operator<(HashValue other) const { return bits_ < other.bits_; }
213 bool operator==(HashValue other) const { return bits_ == other.bits_; }
214 bool operator!=(HashValue other) const { return bits_ != other.bits_; }
215 HashValue operator^(HashValue other) const {
216 return HashValue(bits_ ^ other.bits_);
217 }
218
219 private:
220 static_assert(sizeof(uint32_t) * 8 == kHashBits, "wrong type for bits_");
221 uint32_t bits_;
222 };
223
224 template <class Key, class Value, class Hasher>
225 class PersistentMap<Key, Value, Hasher>::iterator {
226 public:
227 const value_type operator*() const {
228 if (current_->more) {
229 return *more_iter_;
230 } else {
231 return current_->key_value;
232 }
233 }
234
235 iterator& operator++() {
236 do {
237 if (!current_) {
238 // Iterator is past the end.
239 return *this;
240 }
241 if (current_->more) {
242 DCHECK(more_iter_ != current_->more->end());
243 ++more_iter_;
244 if (more_iter_ != current_->more->end()) return *this;
245 }
246 if (level_ == 0) {
247 *this = end(def_value_);
248 return *this;
249 }
250 --level_;
251 while (current_->key_hash[level_] == kRight || path_[level_] == nullptr) {
252 if (level_ == 0) {
253 *this = end(def_value_);
254 return *this;
255 }
256 --level_;
257 }
258 const FocusedTree* first_right_alternative = path_[level_];
259 level_++;
260 current_ = FindLeftmost(first_right_alternative, &level_, &path_);
261 if (current_->more) {
262 more_iter_ = current_->more->begin();
263 }
264 } while (!((**this).second != def_value()));
265 return *this;
266 }
267
operator ==(const iterator& other) const268 bool operator==(const iterator& other) const {
269 if (is_end()) return other.is_end();
270 if (other.is_end()) return false;
271 if (current_->key_hash != other.current_->key_hash) {
272 return false;
273 } else {
274 return (**this).first == (*other).first;
275 }
276 }
operator !=(const iterator& other) const277 bool operator!=(const iterator& other) const { return !(*this == other); }
278
operator <(const iterator& other) const279 bool operator<(const iterator& other) const {
280 if (is_end()) return false;
281 if (other.is_end()) return true;
282 if (current_->key_hash == other.current_->key_hash) {
283 return (**this).first < (*other).first;
284 } else {
285 return current_->key_hash < other.current_->key_hash;
286 }
287 }
288
is_end() const289 bool is_end() const { return current_ == nullptr; }
290
def_value()291 const Value& def_value() { return def_value_; }
292
begin(const FocusedTree* tree, Value def_value)293 static iterator begin(const FocusedTree* tree, Value def_value) {
294 iterator i(def_value);
295 i.current_ = FindLeftmost(tree, &i.level_, &i.path_);
296 if (i.current_->more) {
297 i.more_iter_ = i.current_->more->begin();
298 }
299 // Skip entries with default value. PersistentMap iterators must never point
300 // to a default value.
301 while (!i.is_end() && !((*i).second != def_value)) ++i;
302 return i;
303 }
304
end(Value def_value)305 static iterator end(Value def_value) { return iterator(def_value); }
306
307 private:
308 int level_;
309 typename FocusedTree::more_iterator more_iter_;
310 const FocusedTree* current_;
311 std::array<const FocusedTree*, kHashBits> path_;
312 Value def_value_;
313
iterator(Value def_value)314 explicit iterator(Value def_value)
315 : level_(0), current_(nullptr), def_value_(def_value) {}
316 };
317
318 template <class Key, class Value, class Hasher>
319 class PersistentMap<Key, Value, Hasher>::double_iterator {
320 public:
operator *()321 std::tuple<Key, Value, Value> operator*() {
322 if (first_current_) {
323 auto pair = *first_;
324 return std::make_tuple(
325 pair.first, pair.second,
326 second_current_ ? (*second_).second : second_.def_value());
327 } else {
328 DCHECK(second_current_);
329 auto pair = *second_;
330 return std::make_tuple(pair.first, first_.def_value(), pair.second);
331 }
332 }
333
operator ++()334 double_iterator& operator++() {
335 #ifdef DEBUG
336 iterator old_first = first_;
337 iterator old_second = second_;
338 #endif
339 if (first_current_) {
340 ++first_;
341 DCHECK(old_first < first_);
342 }
343 if (second_current_) {
344 ++second_;
345 DCHECK(old_second < second_);
346 }
347 return *this = double_iterator(first_, second_);
348 }
349
350 double_iterator(iterator first, iterator second)
351 : first_(first), second_(second) {
352 if (first_ == second_) {
353 first_current_ = second_current_ = true;
354 } else if (first_ < second_) {
355 first_current_ = true;
356 second_current_ = false;
357 } else {
358 DCHECK(second_ < first_);
359 first_current_ = false;
360 second_current_ = true;
361 }
362 }
363
364 bool operator!=(const double_iterator& other) {
365 return first_ != other.first_ || second_ != other.second_;
366 }
367
368 bool is_end() const { return first_.is_end() && second_.is_end(); }
369
370 private:
371 iterator first_;
372 iterator second_;
373 bool first_current_;
374 bool second_current_;
375 };
376
377 template <class Key, class Value, class Hasher>
378 void PersistentMap<Key, Value, Hasher>::Set(Key key, Value value) {
379 HashValue key_hash = HashValue(Hasher()(key));
380 std::array<const FocusedTree*, kHashBits> path;
381 int length = 0;
382 const FocusedTree* old = FindHash(key_hash, &path, &length);
383 ZoneMap<Key, Value>* more = nullptr;
384 if (!(GetFocusedValue(old, key) != value)) return;
385 if (old && !(old->more == nullptr && old->key_value.key() == key)) {
386 more = zone_->New<ZoneMap<Key, Value>>(zone_);
387 if (old->more) {
388 *more = *old->more;
389 } else {
390 more->erase(old->key_value.key());
391 more->emplace(old->key_value.key(), old->key_value.value());
392 }
393 more->erase(key);
394 more->emplace(key, value);
395 }
396 size_t size = sizeof(FocusedTree) +
397 std::max(0, length - 1) * sizeof(const FocusedTree*);
398 FocusedTree* tree = new (zone_->Allocate<FocusedTree>(size))
399 FocusedTree{KeyValue(std::move(key), std::move(value)),
400 static_cast<int8_t>(length),
401 key_hash,
402 more,
403 {}};
404 for (int i = 0; i < length; ++i) {
405 tree->path(i) = path[i];
406 }
407 *this = PersistentMap(tree, zone_, def_value_);
408 }
409
410 template <class Key, class Value, class Hasher>
411 const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
FindHash(HashValue hash) const412 PersistentMap<Key, Value, Hasher>::FindHash(HashValue hash) const {
413 const FocusedTree* tree = tree_;
414 int level = 0;
415 while (tree && hash != tree->key_hash) {
416 while ((hash ^ tree->key_hash)[level] == 0) {
417 ++level;
418 }
419 tree = level < tree->length ? tree->path(level) : nullptr;
420 ++level;
421 }
422 return tree;
423 }
424
425 template <class Key, class Value, class Hasher>
426 const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
FindHash( HashValue hash, std::array<const FocusedTree*, kHashBits>* path, int* length) const427 PersistentMap<Key, Value, Hasher>::FindHash(
428 HashValue hash, std::array<const FocusedTree*, kHashBits>* path,
429 int* length) const {
430 const FocusedTree* tree = tree_;
431 int level = 0;
432 while (tree && hash != tree->key_hash) {
433 int map_length = tree->length;
434 while ((hash ^ tree->key_hash)[level] == 0) {
435 (*path)[level] = level < map_length ? tree->path(level) : nullptr;
436 ++level;
437 }
438 (*path)[level] = tree;
439 tree = level < tree->length ? tree->path(level) : nullptr;
440 ++level;
441 }
442 if (tree) {
443 while (level < tree->length) {
444 (*path)[level] = tree->path(level);
445 ++level;
446 }
447 }
448 *length = level;
449 return tree;
450 }
451
452 template <class Key, class Value, class Hasher>
GetFocusedValue( const FocusedTree* tree, const Key& key) const453 const Value& PersistentMap<Key, Value, Hasher>::GetFocusedValue(
454 const FocusedTree* tree, const Key& key) const {
455 if (!tree) {
456 return def_value_;
457 }
458 if (tree->more) {
459 auto it = tree->more->find(key);
460 if (it == tree->more->end())
461 return def_value_;
462 else
463 return it->second;
464 } else {
465 if (key == tree->key_value.key()) {
466 return tree->key_value.value();
467 } else {
468 return def_value_;
469 }
470 }
471 }
472
473 template <class Key, class Value, class Hasher>
474 const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
GetChild(const FocusedTree* tree, int level, Bit bit)475 PersistentMap<Key, Value, Hasher>::GetChild(const FocusedTree* tree, int level,
476 Bit bit) {
477 if (tree->key_hash[level] == bit) {
478 return tree;
479 } else if (level < tree->length) {
480 return tree->path(level);
481 } else {
482 return nullptr;
483 }
484 }
485
486 template <class Key, class Value, class Hasher>
487 const typename PersistentMap<Key, Value, Hasher>::FocusedTree*
FindLeftmost( const FocusedTree* start, int* level, std::array<const FocusedTree*, kHashBits>* path)488 PersistentMap<Key, Value, Hasher>::FindLeftmost(
489 const FocusedTree* start, int* level,
490 std::array<const FocusedTree*, kHashBits>* path) {
491 const FocusedTree* current = start;
492 while (*level < current->length) {
493 if (const FocusedTree* left_child = GetChild(current, *level, kLeft)) {
494 (*path)[*level] = GetChild(current, *level, kRight);
495 current = left_child;
496 ++*level;
497 } else if (const FocusedTree* right_child =
498 GetChild(current, *level, kRight)) {
499 (*path)[*level] = GetChild(current, *level, kLeft);
500 current = right_child;
501 ++*level;
502 } else {
503 UNREACHABLE();
504 }
505 }
506 return current;
507 }
508
509 template <class Key, class Value, class Hasher>
operator <<(std::ostream& os, const PersistentMap<Key, Value, Hasher>& map)510 std::ostream& operator<<(std::ostream& os,
511 const PersistentMap<Key, Value, Hasher>& map) {
512 os << "{";
513 bool first = true;
514 for (auto pair : map) {
515 if (!first) os << ", ";
516 first = false;
517 os << pair.first << ": " << pair.second;
518 }
519 return os << "}";
520 }
521
522 } // namespace compiler
523 } // namespace internal
524 } // namespace v8
525
526 #endif // V8_COMPILER_PERSISTENT_MAP_H_
527