1#include "Python.h" 2 3#include "pycore_bitutils.h" // _Py_popcount32 4#include "pycore_hamt.h" 5#include "pycore_initconfig.h" // _PyStatus_OK() 6#include "pycore_object.h" // _PyObject_GC_TRACK() 7#include <stddef.h> // offsetof() 8 9/* 10This file provides an implementation of an immutable mapping using the 11Hash Array Mapped Trie (or HAMT) datastructure. 12 13This design allows to have: 14 151. Efficient copy: immutable mappings can be copied by reference, 16 making it an O(1) operation. 17 182. Efficient mutations: due to structural sharing, only a portion of 19 the trie needs to be copied when the collection is mutated. The 20 cost of set/delete operations is O(log N). 21 223. Efficient lookups: O(log N). 23 24(where N is number of key/value items in the immutable mapping.) 25 26 27HAMT 28==== 29 30The core idea of HAMT is that the shape of the trie is encoded into the 31hashes of keys. 32 33Say we want to store a K/V pair in our mapping. First, we calculate the 34hash of K, let's say it's 19830128, or in binary: 35 36 0b1001011101001010101110000 = 19830128 37 38Now let's partition this bit representation of the hash into blocks of 395 bits each: 40 41 0b00_00000_10010_11101_00101_01011_10000 = 19830128 42 (6) (5) (4) (3) (2) (1) 43 44Each block of 5 bits represents a number between 0 and 31. So if we have 45a tree that consists of nodes, each of which is an array of 32 pointers, 46those 5-bit blocks will encode a position on a single tree level. 47 48For example, storing the key K with hash 19830128, results in the following 49tree structure: 50 51 (array of 32 pointers) 52 +---+ -- +----+----+----+ -- +----+ 53 root node | 0 | .. | 15 | 16 | 17 | .. | 31 | 0b10000 = 16 (1) 54 (level 1) +---+ -- +----+----+----+ -- +----+ 55 | 56 +---+ -- +----+----+----+ -- +----+ 57 a 2nd level node | 0 | .. | 10 | 11 | 12 | .. | 31 | 0b01011 = 11 (2) 58 +---+ -- +----+----+----+ -- +----+ 59 | 60 +---+ -- +----+----+----+ -- +----+ 61 a 3rd level node | 0 | .. | 04 | 05 | 06 | .. | 31 | 0b00101 = 5 (3) 62 +---+ -- +----+----+----+ -- +----+ 63 | 64 +---+ -- +----+----+----+----+ 65 a 4th level node | 0 | .. | 04 | 29 | 30 | 31 | 0b11101 = 29 (4) 66 +---+ -- +----+----+----+----+ 67 | 68 +---+ -- +----+----+----+ -- +----+ 69 a 5th level node | 0 | .. | 17 | 18 | 19 | .. | 31 | 0b10010 = 18 (5) 70 +---+ -- +----+----+----+ -- +----+ 71 | 72 +--------------+ 73 | 74 +---+ -- +----+----+----+ -- +----+ 75 a 6th level node | 0 | .. | 15 | 16 | 17 | .. | 31 | 0b00000 = 0 (6) 76 +---+ -- +----+----+----+ -- +----+ 77 | 78 V -- our value (or collision) 79 80To rehash: for a K/V pair, the hash of K encodes where in the tree V will 81be stored. 82 83To optimize memory footprint and handle hash collisions, our implementation 84uses three different types of nodes: 85 86 * A Bitmap node; 87 * An Array node; 88 * A Collision node. 89 90Because we implement an immutable dictionary, our nodes are also 91immutable. Therefore, when we need to modify a node, we copy it, and 92do that modification to the copy. 93 94 95Array Nodes 96----------- 97 98These nodes are very simple. Essentially they are arrays of 32 pointers 99we used to illustrate the high-level idea in the previous section. 100 101We use Array nodes only when we need to store more than 16 pointers 102in a single node. 103 104Array nodes do not store key objects or value objects. They are used 105only as an indirection level - their pointers point to other nodes in 106the tree. 107 108 109Bitmap Node 110----------- 111 112Allocating a new 32-pointers array for every node of our tree would be 113very expensive. Unless we store millions of keys, most of tree nodes would 114be very sparse. 115 116When we have less than 16 elements in a node, we don't want to use the 117Array node, that would mean that we waste a lot of memory. Instead, 118we can use bitmap compression and can have just as many pointers 119as we need! 120 121Bitmap nodes consist of two fields: 122 1231. An array of pointers. If a Bitmap node holds N elements, the 124 array will be of N pointers. 125 1262. A 32bit integer -- a bitmap field. If an N-th bit is set in the 127 bitmap, it means that the node has an N-th element. 128 129For example, say we need to store a 3 elements sparse array: 130 131 +---+ -- +---+ -- +----+ -- +----+ 132 | 0 | .. | 4 | .. | 11 | .. | 17 | 133 +---+ -- +---+ -- +----+ -- +----+ 134 | | | 135 o1 o2 o3 136 137We allocate a three-pointer Bitmap node. Its bitmap field will be 138then set to: 139 140 0b_00100_00010_00000_10000 == (1 << 17) | (1 << 11) | (1 << 4) 141 142To check if our Bitmap node has an I-th element we can do: 143 144 bitmap & (1 << I) 145 146 147And here's a formula to calculate a position in our pointer array 148which would correspond to an I-th element: 149 150 popcount(bitmap & ((1 << I) - 1)) 151 152 153Let's break it down: 154 155 * `popcount` is a function that returns a number of bits set to 1; 156 157 * `((1 << I) - 1)` is a mask to filter the bitmask to contain bits 158 set to the *right* of our bit. 159 160 161So for our 17, 11, and 4 indexes: 162 163 * bitmap & ((1 << 17) - 1) == 0b100000010000 => 2 bits are set => index is 2. 164 165 * bitmap & ((1 << 11) - 1) == 0b10000 => 1 bit is set => index is 1. 166 167 * bitmap & ((1 << 4) - 1) == 0b0 => 0 bits are set => index is 0. 168 169 170To conclude: Bitmap nodes are just like Array nodes -- they can store 171a number of pointers, but use bitmap compression to eliminate unused 172pointers. 173 174 175Bitmap nodes have two pointers for each item: 176 177 +----+----+----+----+ -- +----+----+ 178 | k1 | v1 | k2 | v2 | .. | kN | vN | 179 +----+----+----+----+ -- +----+----+ 180 181When kI == NULL, vI points to another tree level. 182 183When kI != NULL, the actual key object is stored in kI, and its 184value is stored in vI. 185 186 187Collision Nodes 188--------------- 189 190Collision nodes are simple arrays of pointers -- two pointers per 191key/value. When there's a hash collision, say for k1/v1 and k2/v2 192we have `hash(k1)==hash(k2)`. Then our collision node will be: 193 194 +----+----+----+----+ 195 | k1 | v1 | k2 | v2 | 196 +----+----+----+----+ 197 198 199Tree Structure 200-------------- 201 202All nodes are PyObjects. 203 204The `PyHamtObject` object has a pointer to the root node (h_root), 205and has a length field (h_count). 206 207High-level functions accept a PyHamtObject object and dispatch to 208lower-level functions depending on what kind of node h_root points to. 209 210 211Operations 212========== 213 214There are three fundamental operations on an immutable dictionary: 215 2161. "o.assoc(k, v)" will return a new immutable dictionary, that will be 217 a copy of "o", but with the "k/v" item set. 218 219 Functions in this file: 220 221 hamt_node_assoc, hamt_node_bitmap_assoc, 222 hamt_node_array_assoc, hamt_node_collision_assoc 223 224 `hamt_node_assoc` function accepts a node object, and calls 225 other functions depending on its actual type. 226 2272. "o.find(k)" will lookup key "k" in "o". 228 229 Functions: 230 231 hamt_node_find, hamt_node_bitmap_find, 232 hamt_node_array_find, hamt_node_collision_find 233 2343. "o.without(k)" will return a new immutable dictionary, that will be 235 a copy of "o", buth without the "k" key. 236 237 Functions: 238 239 hamt_node_without, hamt_node_bitmap_without, 240 hamt_node_array_without, hamt_node_collision_without 241 242 243Further Reading 244=============== 245 2461. http://blog.higher-order.net/2009/09/08/understanding-clojures-persistenthashmap-deftwice.html 247 2482. http://blog.higher-order.net/2010/08/16/assoc-and-clojures-persistenthashmap-part-ii.html 249 2503. Clojure's PersistentHashMap implementation: 251 https://github.com/clojure/clojure/blob/master/src/jvm/clojure/lang/PersistentHashMap.java 252 253 254Debug 255===== 256 257The HAMT datatype is accessible for testing purposes under the 258`_testcapi` module: 259 260 >>> from _testcapi import hamt 261 >>> h = hamt() 262 >>> h2 = h.set('a', 2) 263 >>> h3 = h2.set('b', 3) 264 >>> list(h3) 265 ['a', 'b'] 266 267When CPython is built in debug mode, a '__dump__()' method is available 268to introspect the tree: 269 270 >>> print(h3.__dump__()) 271 HAMT(len=2): 272 BitmapNode(size=4 count=2 bitmap=0b110 id=0x10eb9d9e8): 273 'a': 2 274 'b': 3 275*/ 276 277 278#define IS_ARRAY_NODE(node) Py_IS_TYPE(node, &_PyHamt_ArrayNode_Type) 279#define IS_BITMAP_NODE(node) Py_IS_TYPE(node, &_PyHamt_BitmapNode_Type) 280#define IS_COLLISION_NODE(node) Py_IS_TYPE(node, &_PyHamt_CollisionNode_Type) 281 282 283/* Return type for 'find' (lookup a key) functions. 284 285 * F_ERROR - an error occurred; 286 * F_NOT_FOUND - the key was not found; 287 * F_FOUND - the key was found. 288*/ 289typedef enum {F_ERROR, F_NOT_FOUND, F_FOUND} hamt_find_t; 290 291 292/* Return type for 'without' (delete a key) functions. 293 294 * W_ERROR - an error occurred; 295 * W_NOT_FOUND - the key was not found: there's nothing to delete; 296 * W_EMPTY - the key was found: the node/tree would be empty 297 if the key is deleted; 298 * W_NEWNODE - the key was found: a new node/tree is returned 299 without that key. 300*/ 301typedef enum {W_ERROR, W_NOT_FOUND, W_EMPTY, W_NEWNODE} hamt_without_t; 302 303 304/* Low-level iterator protocol type. 305 306 * I_ITEM - a new item has been yielded; 307 * I_END - the whole tree was visited (similar to StopIteration). 308*/ 309typedef enum {I_ITEM, I_END} hamt_iter_t; 310 311 312#define HAMT_ARRAY_NODE_SIZE 32 313 314 315typedef struct { 316 PyObject_HEAD 317 PyHamtNode *a_array[HAMT_ARRAY_NODE_SIZE]; 318 Py_ssize_t a_count; 319} PyHamtNode_Array; 320 321 322typedef struct { 323 PyObject_VAR_HEAD 324 uint32_t b_bitmap; 325 PyObject *b_array[1]; 326} PyHamtNode_Bitmap; 327 328 329typedef struct { 330 PyObject_VAR_HEAD 331 int32_t c_hash; 332 PyObject *c_array[1]; 333} PyHamtNode_Collision; 334 335 336static PyHamtNode_Bitmap *_empty_bitmap_node; 337static PyHamtObject *_empty_hamt; 338 339 340static PyHamtObject * 341hamt_alloc(void); 342 343static PyHamtNode * 344hamt_node_assoc(PyHamtNode *node, 345 uint32_t shift, int32_t hash, 346 PyObject *key, PyObject *val, int* added_leaf); 347 348static hamt_without_t 349hamt_node_without(PyHamtNode *node, 350 uint32_t shift, int32_t hash, 351 PyObject *key, 352 PyHamtNode **new_node); 353 354static hamt_find_t 355hamt_node_find(PyHamtNode *node, 356 uint32_t shift, int32_t hash, 357 PyObject *key, PyObject **val); 358 359#ifdef Py_DEBUG 360static int 361hamt_node_dump(PyHamtNode *node, 362 _PyUnicodeWriter *writer, int level); 363#endif 364 365static PyHamtNode * 366hamt_node_array_new(Py_ssize_t); 367 368static PyHamtNode * 369hamt_node_collision_new(int32_t hash, Py_ssize_t size); 370 371static inline Py_ssize_t 372hamt_node_collision_count(PyHamtNode_Collision *node); 373 374 375#ifdef Py_DEBUG 376static void 377_hamt_node_array_validate(void *obj_raw) 378{ 379 PyObject *obj = _PyObject_CAST(obj_raw); 380 assert(IS_ARRAY_NODE(obj)); 381 PyHamtNode_Array *node = (PyHamtNode_Array*)obj; 382 Py_ssize_t i = 0, count = 0; 383 for (; i < HAMT_ARRAY_NODE_SIZE; i++) { 384 if (node->a_array[i] != NULL) { 385 count++; 386 } 387 } 388 assert(count == node->a_count); 389} 390 391#define VALIDATE_ARRAY_NODE(NODE) \ 392 do { _hamt_node_array_validate(NODE); } while (0); 393#else 394#define VALIDATE_ARRAY_NODE(NODE) 395#endif 396 397 398/* Returns -1 on error */ 399static inline int32_t 400hamt_hash(PyObject *o) 401{ 402 Py_hash_t hash = PyObject_Hash(o); 403 404#if SIZEOF_PY_HASH_T <= 4 405 return hash; 406#else 407 if (hash == -1) { 408 /* exception */ 409 return -1; 410 } 411 412 /* While it's somewhat suboptimal to reduce Python's 64 bit hash to 413 32 bits via XOR, it seems that the resulting hash function 414 is good enough (this is also how Long type is hashed in Java.) 415 Storing 10, 100, 1000 Python strings results in a relatively 416 shallow and uniform tree structure. 417 418 Also it's worth noting that it would be possible to adapt the tree 419 structure to 64 bit hashes, but that would increase memory pressure 420 and provide little to no performance benefits for collections with 421 fewer than billions of key/value pairs. 422 423 Important: do not change this hash reducing function. There are many 424 tests that need an exact tree shape to cover all code paths and 425 we do that by specifying concrete values for test data's `__hash__`. 426 If this function is changed most of the regression tests would 427 become useless. 428 */ 429 int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32); 430 return xored == -1 ? -2 : xored; 431#endif 432} 433 434static inline uint32_t 435hamt_mask(int32_t hash, uint32_t shift) 436{ 437 return (((uint32_t)hash >> shift) & 0x01f); 438} 439 440static inline uint32_t 441hamt_bitpos(int32_t hash, uint32_t shift) 442{ 443 return (uint32_t)1 << hamt_mask(hash, shift); 444} 445 446static inline uint32_t 447hamt_bitindex(uint32_t bitmap, uint32_t bit) 448{ 449 return (uint32_t)_Py_popcount32(bitmap & (bit - 1)); 450} 451 452 453/////////////////////////////////// Dump Helpers 454#ifdef Py_DEBUG 455 456static int 457_hamt_dump_ident(_PyUnicodeWriter *writer, int level) 458{ 459 /* Write `' ' * level` to the `writer` */ 460 PyObject *str = NULL; 461 PyObject *num = NULL; 462 PyObject *res = NULL; 463 int ret = -1; 464 465 str = PyUnicode_FromString(" "); 466 if (str == NULL) { 467 goto error; 468 } 469 470 num = PyLong_FromLong((long)level); 471 if (num == NULL) { 472 goto error; 473 } 474 475 res = PyNumber_Multiply(str, num); 476 if (res == NULL) { 477 goto error; 478 } 479 480 ret = _PyUnicodeWriter_WriteStr(writer, res); 481 482error: 483 Py_XDECREF(res); 484 Py_XDECREF(str); 485 Py_XDECREF(num); 486 return ret; 487} 488 489static int 490_hamt_dump_format(_PyUnicodeWriter *writer, const char *format, ...) 491{ 492 /* A convenient helper combining _PyUnicodeWriter_WriteStr and 493 PyUnicode_FromFormatV. 494 */ 495 PyObject* msg; 496 int ret; 497 498 va_list vargs; 499#ifdef HAVE_STDARG_PROTOTYPES 500 va_start(vargs, format); 501#else 502 va_start(vargs); 503#endif 504 msg = PyUnicode_FromFormatV(format, vargs); 505 va_end(vargs); 506 507 if (msg == NULL) { 508 return -1; 509 } 510 511 ret = _PyUnicodeWriter_WriteStr(writer, msg); 512 Py_DECREF(msg); 513 return ret; 514} 515 516#endif /* Py_DEBUG */ 517/////////////////////////////////// Bitmap Node 518 519 520static PyHamtNode * 521hamt_node_bitmap_new(Py_ssize_t size) 522{ 523 /* Create a new bitmap node of size 'size' */ 524 525 PyHamtNode_Bitmap *node; 526 Py_ssize_t i; 527 528 assert(size >= 0); 529 assert(size % 2 == 0); 530 531 if (size == 0 && _empty_bitmap_node != NULL) { 532 Py_INCREF(_empty_bitmap_node); 533 return (PyHamtNode *)_empty_bitmap_node; 534 } 535 536 /* No freelist; allocate a new bitmap node */ 537 node = PyObject_GC_NewVar( 538 PyHamtNode_Bitmap, &_PyHamt_BitmapNode_Type, size); 539 if (node == NULL) { 540 return NULL; 541 } 542 543 Py_SET_SIZE(node, size); 544 545 for (i = 0; i < size; i++) { 546 node->b_array[i] = NULL; 547 } 548 549 node->b_bitmap = 0; 550 551 _PyObject_GC_TRACK(node); 552 553 if (size == 0 && _empty_bitmap_node == NULL) { 554 /* Since bitmap nodes are immutable, we can cache the instance 555 for size=0 and reuse it whenever we need an empty bitmap node. 556 */ 557 _empty_bitmap_node = node; 558 Py_INCREF(_empty_bitmap_node); 559 } 560 561 return (PyHamtNode *)node; 562} 563 564static inline Py_ssize_t 565hamt_node_bitmap_count(PyHamtNode_Bitmap *node) 566{ 567 return Py_SIZE(node) / 2; 568} 569 570static PyHamtNode_Bitmap * 571hamt_node_bitmap_clone(PyHamtNode_Bitmap *node) 572{ 573 /* Clone a bitmap node; return a new one with the same child notes. */ 574 575 PyHamtNode_Bitmap *clone; 576 Py_ssize_t i; 577 578 clone = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(Py_SIZE(node)); 579 if (clone == NULL) { 580 return NULL; 581 } 582 583 for (i = 0; i < Py_SIZE(node); i++) { 584 Py_XINCREF(node->b_array[i]); 585 clone->b_array[i] = node->b_array[i]; 586 } 587 588 clone->b_bitmap = node->b_bitmap; 589 return clone; 590} 591 592static PyHamtNode_Bitmap * 593hamt_node_bitmap_clone_without(PyHamtNode_Bitmap *o, uint32_t bit) 594{ 595 assert(bit & o->b_bitmap); 596 assert(hamt_node_bitmap_count(o) > 1); 597 598 PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)hamt_node_bitmap_new( 599 Py_SIZE(o) - 2); 600 if (new == NULL) { 601 return NULL; 602 } 603 604 uint32_t idx = hamt_bitindex(o->b_bitmap, bit); 605 uint32_t key_idx = 2 * idx; 606 uint32_t val_idx = key_idx + 1; 607 uint32_t i; 608 609 for (i = 0; i < key_idx; i++) { 610 Py_XINCREF(o->b_array[i]); 611 new->b_array[i] = o->b_array[i]; 612 } 613 614 assert(Py_SIZE(o) >= 0 && Py_SIZE(o) <= 32); 615 for (i = val_idx + 1; i < (uint32_t)Py_SIZE(o); i++) { 616 Py_XINCREF(o->b_array[i]); 617 new->b_array[i - 2] = o->b_array[i]; 618 } 619 620 new->b_bitmap = o->b_bitmap & ~bit; 621 return new; 622} 623 624static PyHamtNode * 625hamt_node_new_bitmap_or_collision(uint32_t shift, 626 PyObject *key1, PyObject *val1, 627 int32_t key2_hash, 628 PyObject *key2, PyObject *val2) 629{ 630 /* Helper method. Creates a new node for key1/val and key2/val2 631 pairs. 632 633 If key1 hash is equal to the hash of key2, a Collision node 634 will be created. If they are not equal, a Bitmap node is 635 created. 636 */ 637 638 int32_t key1_hash = hamt_hash(key1); 639 if (key1_hash == -1) { 640 return NULL; 641 } 642 643 if (key1_hash == key2_hash) { 644 PyHamtNode_Collision *n; 645 n = (PyHamtNode_Collision *)hamt_node_collision_new(key1_hash, 4); 646 if (n == NULL) { 647 return NULL; 648 } 649 650 Py_INCREF(key1); 651 n->c_array[0] = key1; 652 Py_INCREF(val1); 653 n->c_array[1] = val1; 654 655 Py_INCREF(key2); 656 n->c_array[2] = key2; 657 Py_INCREF(val2); 658 n->c_array[3] = val2; 659 660 return (PyHamtNode *)n; 661 } 662 else { 663 int added_leaf = 0; 664 PyHamtNode *n = hamt_node_bitmap_new(0); 665 if (n == NULL) { 666 return NULL; 667 } 668 669 PyHamtNode *n2 = hamt_node_assoc( 670 n, shift, key1_hash, key1, val1, &added_leaf); 671 Py_DECREF(n); 672 if (n2 == NULL) { 673 return NULL; 674 } 675 676 n = hamt_node_assoc(n2, shift, key2_hash, key2, val2, &added_leaf); 677 Py_DECREF(n2); 678 if (n == NULL) { 679 return NULL; 680 } 681 682 return n; 683 } 684} 685 686static PyHamtNode * 687hamt_node_bitmap_assoc(PyHamtNode_Bitmap *self, 688 uint32_t shift, int32_t hash, 689 PyObject *key, PyObject *val, int* added_leaf) 690{ 691 /* assoc operation for bitmap nodes. 692 693 Return: a new node, or self if key/val already is in the 694 collection. 695 696 'added_leaf' is later used in '_PyHamt_Assoc' to determine if 697 `hamt.set(key, val)` increased the size of the collection. 698 */ 699 700 uint32_t bit = hamt_bitpos(hash, shift); 701 uint32_t idx = hamt_bitindex(self->b_bitmap, bit); 702 703 /* Bitmap node layout: 704 705 +------+------+------+------+ --- +------+------+ 706 | key1 | val1 | key2 | val2 | ... | keyN | valN | 707 +------+------+------+------+ --- +------+------+ 708 where `N < Py_SIZE(node)`. 709 710 The `node->b_bitmap` field is a bitmap. For a given 711 `(shift, hash)` pair we can determine: 712 713 - If this node has the corresponding key/val slots. 714 - The index of key/val slots. 715 */ 716 717 if (self->b_bitmap & bit) { 718 /* The key is set in this node */ 719 720 uint32_t key_idx = 2 * idx; 721 uint32_t val_idx = key_idx + 1; 722 723 assert(val_idx < (size_t)Py_SIZE(self)); 724 725 PyObject *key_or_null = self->b_array[key_idx]; 726 PyObject *val_or_node = self->b_array[val_idx]; 727 728 if (key_or_null == NULL) { 729 /* key is NULL. This means that we have a few keys 730 that have the same (hash, shift) pair. */ 731 732 assert(val_or_node != NULL); 733 734 PyHamtNode *sub_node = hamt_node_assoc( 735 (PyHamtNode *)val_or_node, 736 shift + 5, hash, key, val, added_leaf); 737 if (sub_node == NULL) { 738 return NULL; 739 } 740 741 if (val_or_node == (PyObject *)sub_node) { 742 Py_DECREF(sub_node); 743 Py_INCREF(self); 744 return (PyHamtNode *)self; 745 } 746 747 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self); 748 if (ret == NULL) { 749 return NULL; 750 } 751 Py_SETREF(ret->b_array[val_idx], (PyObject*)sub_node); 752 return (PyHamtNode *)ret; 753 } 754 755 assert(key != NULL); 756 /* key is not NULL. This means that we have only one other 757 key in this collection that matches our hash for this shift. */ 758 759 int comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ); 760 if (comp_err < 0) { /* exception in __eq__ */ 761 return NULL; 762 } 763 if (comp_err == 1) { /* key == key_or_null */ 764 if (val == val_or_node) { 765 /* we already have the same key/val pair; return self. */ 766 Py_INCREF(self); 767 return (PyHamtNode *)self; 768 } 769 770 /* We're setting a new value for the key we had before. 771 Make a new bitmap node with a replaced value, and return it. */ 772 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self); 773 if (ret == NULL) { 774 return NULL; 775 } 776 Py_INCREF(val); 777 Py_SETREF(ret->b_array[val_idx], val); 778 return (PyHamtNode *)ret; 779 } 780 781 /* It's a new key, and it has the same index as *one* another key. 782 We have a collision. We need to create a new node which will 783 combine the existing key and the key we're adding. 784 785 `hamt_node_new_bitmap_or_collision` will either create a new 786 Collision node if the keys have identical hashes, or 787 a new Bitmap node. 788 */ 789 PyHamtNode *sub_node = hamt_node_new_bitmap_or_collision( 790 shift + 5, 791 key_or_null, val_or_node, /* existing key/val */ 792 hash, 793 key, val /* new key/val */ 794 ); 795 if (sub_node == NULL) { 796 return NULL; 797 } 798 799 PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self); 800 if (ret == NULL) { 801 Py_DECREF(sub_node); 802 return NULL; 803 } 804 Py_SETREF(ret->b_array[key_idx], NULL); 805 Py_SETREF(ret->b_array[val_idx], (PyObject *)sub_node); 806 807 *added_leaf = 1; 808 return (PyHamtNode *)ret; 809 } 810 else { 811 /* There was no key before with the same (shift,hash). */ 812 813 uint32_t n = (uint32_t)_Py_popcount32(self->b_bitmap); 814 815 if (n >= 16) { 816 /* When we have a situation where we want to store more 817 than 16 nodes at one level of the tree, we no longer 818 want to use the Bitmap node with bitmap encoding. 819 820 Instead we start using an Array node, which has 821 simpler (faster) implementation at the expense of 822 having preallocated 32 pointers for its keys/values 823 pairs. 824 825 Small hamt objects (<30 keys) usually don't have any 826 Array nodes at all. Between ~30 and ~400 keys hamt 827 objects usually have one Array node, and usually it's 828 a root node. 829 */ 830 831 uint32_t jdx = hamt_mask(hash, shift); 832 /* 'jdx' is the index of where the new key should be added 833 in the new Array node we're about to create. */ 834 835 PyHamtNode *empty = NULL; 836 PyHamtNode_Array *new_node = NULL; 837 PyHamtNode *res = NULL; 838 839 /* Create a new Array node. */ 840 new_node = (PyHamtNode_Array *)hamt_node_array_new(n + 1); 841 if (new_node == NULL) { 842 goto fin; 843 } 844 845 /* Create an empty bitmap node for the next 846 hamt_node_assoc call. */ 847 empty = hamt_node_bitmap_new(0); 848 if (empty == NULL) { 849 goto fin; 850 } 851 852 /* Make a new bitmap node for the key/val we're adding. 853 Set that bitmap node to new-array-node[jdx]. */ 854 new_node->a_array[jdx] = hamt_node_assoc( 855 empty, shift + 5, hash, key, val, added_leaf); 856 if (new_node->a_array[jdx] == NULL) { 857 goto fin; 858 } 859 860 /* Copy existing key/value pairs from the current Bitmap 861 node to the new Array node we've just created. */ 862 Py_ssize_t i, j; 863 for (i = 0, j = 0; i < HAMT_ARRAY_NODE_SIZE; i++) { 864 if (((self->b_bitmap >> i) & 1) != 0) { 865 /* Ensure we don't accidentally override `jdx` element 866 we set few lines above. 867 */ 868 assert(new_node->a_array[i] == NULL); 869 870 if (self->b_array[j] == NULL) { 871 new_node->a_array[i] = 872 (PyHamtNode *)self->b_array[j + 1]; 873 Py_INCREF(new_node->a_array[i]); 874 } 875 else { 876 int32_t rehash = hamt_hash(self->b_array[j]); 877 if (rehash == -1) { 878 goto fin; 879 } 880 881 new_node->a_array[i] = hamt_node_assoc( 882 empty, shift + 5, 883 rehash, 884 self->b_array[j], 885 self->b_array[j + 1], 886 added_leaf); 887 888 if (new_node->a_array[i] == NULL) { 889 goto fin; 890 } 891 } 892 j += 2; 893 } 894 } 895 896 VALIDATE_ARRAY_NODE(new_node) 897 898 /* That's it! */ 899 res = (PyHamtNode *)new_node; 900 901 fin: 902 Py_XDECREF(empty); 903 if (res == NULL) { 904 Py_XDECREF(new_node); 905 } 906 return res; 907 } 908 else { 909 /* We have less than 16 keys at this level; let's just 910 create a new bitmap node out of this node with the 911 new key/val pair added. */ 912 913 uint32_t key_idx = 2 * idx; 914 uint32_t val_idx = key_idx + 1; 915 uint32_t i; 916 917 *added_leaf = 1; 918 919 /* Allocate new Bitmap node which can have one more key/val 920 pair in addition to what we have already. */ 921 PyHamtNode_Bitmap *new_node = 922 (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2 * (n + 1)); 923 if (new_node == NULL) { 924 return NULL; 925 } 926 927 /* Copy all keys/values that will be before the new key/value 928 we are adding. */ 929 for (i = 0; i < key_idx; i++) { 930 Py_XINCREF(self->b_array[i]); 931 new_node->b_array[i] = self->b_array[i]; 932 } 933 934 /* Set the new key/value to the new Bitmap node. */ 935 Py_INCREF(key); 936 new_node->b_array[key_idx] = key; 937 Py_INCREF(val); 938 new_node->b_array[val_idx] = val; 939 940 /* Copy all keys/values that will be after the new key/value 941 we are adding. */ 942 assert(Py_SIZE(self) >= 0 && Py_SIZE(self) <= 32); 943 for (i = key_idx; i < (uint32_t)Py_SIZE(self); i++) { 944 Py_XINCREF(self->b_array[i]); 945 new_node->b_array[i + 2] = self->b_array[i]; 946 } 947 948 new_node->b_bitmap = self->b_bitmap | bit; 949 return (PyHamtNode *)new_node; 950 } 951 } 952} 953 954static hamt_without_t 955hamt_node_bitmap_without(PyHamtNode_Bitmap *self, 956 uint32_t shift, int32_t hash, 957 PyObject *key, 958 PyHamtNode **new_node) 959{ 960 uint32_t bit = hamt_bitpos(hash, shift); 961 if ((self->b_bitmap & bit) == 0) { 962 return W_NOT_FOUND; 963 } 964 965 uint32_t idx = hamt_bitindex(self->b_bitmap, bit); 966 967 uint32_t key_idx = 2 * idx; 968 uint32_t val_idx = key_idx + 1; 969 970 PyObject *key_or_null = self->b_array[key_idx]; 971 PyObject *val_or_node = self->b_array[val_idx]; 972 973 if (key_or_null == NULL) { 974 /* key == NULL means that 'value' is another tree node. */ 975 976 PyHamtNode *sub_node = NULL; 977 978 hamt_without_t res = hamt_node_without( 979 (PyHamtNode *)val_or_node, 980 shift + 5, hash, key, &sub_node); 981 982 switch (res) { 983 case W_EMPTY: 984 /* It's impossible for us to receive a W_EMPTY here: 985 986 - Array nodes are converted to Bitmap nodes when 987 we delete 16th item from them; 988 989 - Collision nodes are converted to Bitmap when 990 there is one item in them; 991 992 - Bitmap node's without() inlines single-item 993 sub-nodes. 994 995 So in no situation we can have a single-item 996 Bitmap child of another Bitmap node. 997 */ 998 Py_UNREACHABLE(); 999 1000 case W_NEWNODE: { 1001 assert(sub_node != NULL); 1002 1003 if (IS_BITMAP_NODE(sub_node)) { 1004 PyHamtNode_Bitmap *sub_tree = (PyHamtNode_Bitmap *)sub_node; 1005 if (hamt_node_bitmap_count(sub_tree) == 1 && 1006 sub_tree->b_array[0] != NULL) 1007 { 1008 /* A bitmap node with one key/value pair. Just 1009 merge it into this node. 1010 1011 Note that we don't inline Bitmap nodes that 1012 have a NULL key -- those nodes point to another 1013 tree level, and we cannot simply move tree levels 1014 up or down. 1015 */ 1016 1017 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self); 1018 if (clone == NULL) { 1019 Py_DECREF(sub_node); 1020 return W_ERROR; 1021 } 1022 1023 PyObject *key = sub_tree->b_array[0]; 1024 PyObject *val = sub_tree->b_array[1]; 1025 1026 Py_INCREF(key); 1027 Py_XSETREF(clone->b_array[key_idx], key); 1028 Py_INCREF(val); 1029 Py_SETREF(clone->b_array[val_idx], val); 1030 1031 Py_DECREF(sub_tree); 1032 1033 *new_node = (PyHamtNode *)clone; 1034 return W_NEWNODE; 1035 } 1036 } 1037 1038#ifdef Py_DEBUG 1039 /* Ensure that Collision.without implementation 1040 converts to Bitmap nodes itself. 1041 */ 1042 if (IS_COLLISION_NODE(sub_node)) { 1043 assert(hamt_node_collision_count( 1044 (PyHamtNode_Collision*)sub_node) > 1); 1045 } 1046#endif 1047 1048 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self); 1049 if (clone == NULL) { 1050 return W_ERROR; 1051 } 1052 1053 Py_SETREF(clone->b_array[val_idx], 1054 (PyObject *)sub_node); /* borrow */ 1055 1056 *new_node = (PyHamtNode *)clone; 1057 return W_NEWNODE; 1058 } 1059 1060 case W_ERROR: 1061 case W_NOT_FOUND: 1062 assert(sub_node == NULL); 1063 return res; 1064 1065 default: 1066 Py_UNREACHABLE(); 1067 } 1068 } 1069 else { 1070 /* We have a regular key/value pair */ 1071 1072 int cmp = PyObject_RichCompareBool(key_or_null, key, Py_EQ); 1073 if (cmp < 0) { 1074 return W_ERROR; 1075 } 1076 if (cmp == 0) { 1077 return W_NOT_FOUND; 1078 } 1079 1080 if (hamt_node_bitmap_count(self) == 1) { 1081 return W_EMPTY; 1082 } 1083 1084 *new_node = (PyHamtNode *) 1085 hamt_node_bitmap_clone_without(self, bit); 1086 if (*new_node == NULL) { 1087 return W_ERROR; 1088 } 1089 1090 return W_NEWNODE; 1091 } 1092} 1093 1094static hamt_find_t 1095hamt_node_bitmap_find(PyHamtNode_Bitmap *self, 1096 uint32_t shift, int32_t hash, 1097 PyObject *key, PyObject **val) 1098{ 1099 /* Lookup a key in a Bitmap node. */ 1100 1101 uint32_t bit = hamt_bitpos(hash, shift); 1102 uint32_t idx; 1103 uint32_t key_idx; 1104 uint32_t val_idx; 1105 PyObject *key_or_null; 1106 PyObject *val_or_node; 1107 int comp_err; 1108 1109 if ((self->b_bitmap & bit) == 0) { 1110 return F_NOT_FOUND; 1111 } 1112 1113 idx = hamt_bitindex(self->b_bitmap, bit); 1114 key_idx = idx * 2; 1115 val_idx = key_idx + 1; 1116 1117 assert(val_idx < (size_t)Py_SIZE(self)); 1118 1119 key_or_null = self->b_array[key_idx]; 1120 val_or_node = self->b_array[val_idx]; 1121 1122 if (key_or_null == NULL) { 1123 /* There are a few keys that have the same hash at the current shift 1124 that match our key. Dispatch the lookup further down the tree. */ 1125 assert(val_or_node != NULL); 1126 return hamt_node_find((PyHamtNode *)val_or_node, 1127 shift + 5, hash, key, val); 1128 } 1129 1130 /* We have only one key -- a potential match. Let's compare if the 1131 key we are looking at is equal to the key we are looking for. */ 1132 assert(key != NULL); 1133 comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ); 1134 if (comp_err < 0) { /* exception in __eq__ */ 1135 return F_ERROR; 1136 } 1137 if (comp_err == 1) { /* key == key_or_null */ 1138 *val = val_or_node; 1139 return F_FOUND; 1140 } 1141 1142 return F_NOT_FOUND; 1143} 1144 1145static int 1146hamt_node_bitmap_traverse(PyHamtNode_Bitmap *self, visitproc visit, void *arg) 1147{ 1148 /* Bitmap's tp_traverse */ 1149 1150 Py_ssize_t i; 1151 1152 for (i = Py_SIZE(self); --i >= 0; ) { 1153 Py_VISIT(self->b_array[i]); 1154 } 1155 1156 return 0; 1157} 1158 1159static void 1160hamt_node_bitmap_dealloc(PyHamtNode_Bitmap *self) 1161{ 1162 /* Bitmap's tp_dealloc */ 1163 1164 Py_ssize_t len = Py_SIZE(self); 1165 Py_ssize_t i; 1166 1167 PyObject_GC_UnTrack(self); 1168 Py_TRASHCAN_BEGIN(self, hamt_node_bitmap_dealloc) 1169 1170 if (len > 0) { 1171 i = len; 1172 while (--i >= 0) { 1173 Py_XDECREF(self->b_array[i]); 1174 } 1175 } 1176 1177 Py_TYPE(self)->tp_free((PyObject *)self); 1178 Py_TRASHCAN_END 1179} 1180 1181#ifdef Py_DEBUG 1182static int 1183hamt_node_bitmap_dump(PyHamtNode_Bitmap *node, 1184 _PyUnicodeWriter *writer, int level) 1185{ 1186 /* Debug build: __dump__() method implementation for Bitmap nodes. */ 1187 1188 Py_ssize_t i; 1189 PyObject *tmp1; 1190 PyObject *tmp2; 1191 1192 if (_hamt_dump_ident(writer, level + 1)) { 1193 goto error; 1194 } 1195 1196 if (_hamt_dump_format(writer, "BitmapNode(size=%zd count=%zd ", 1197 Py_SIZE(node), Py_SIZE(node) / 2)) 1198 { 1199 goto error; 1200 } 1201 1202 tmp1 = PyLong_FromUnsignedLong(node->b_bitmap); 1203 if (tmp1 == NULL) { 1204 goto error; 1205 } 1206 tmp2 = _PyLong_Format(tmp1, 2); 1207 Py_DECREF(tmp1); 1208 if (tmp2 == NULL) { 1209 goto error; 1210 } 1211 if (_hamt_dump_format(writer, "bitmap=%S id=%p):\n", tmp2, node)) { 1212 Py_DECREF(tmp2); 1213 goto error; 1214 } 1215 Py_DECREF(tmp2); 1216 1217 for (i = 0; i < Py_SIZE(node); i += 2) { 1218 PyObject *key_or_null = node->b_array[i]; 1219 PyObject *val_or_node = node->b_array[i + 1]; 1220 1221 if (_hamt_dump_ident(writer, level + 2)) { 1222 goto error; 1223 } 1224 1225 if (key_or_null == NULL) { 1226 if (_hamt_dump_format(writer, "NULL:\n")) { 1227 goto error; 1228 } 1229 1230 if (hamt_node_dump((PyHamtNode *)val_or_node, 1231 writer, level + 2)) 1232 { 1233 goto error; 1234 } 1235 } 1236 else { 1237 if (_hamt_dump_format(writer, "%R: %R", key_or_null, 1238 val_or_node)) 1239 { 1240 goto error; 1241 } 1242 } 1243 1244 if (_hamt_dump_format(writer, "\n")) { 1245 goto error; 1246 } 1247 } 1248 1249 return 0; 1250error: 1251 return -1; 1252} 1253#endif /* Py_DEBUG */ 1254 1255 1256/////////////////////////////////// Collision Node 1257 1258 1259static PyHamtNode * 1260hamt_node_collision_new(int32_t hash, Py_ssize_t size) 1261{ 1262 /* Create a new Collision node. */ 1263 1264 PyHamtNode_Collision *node; 1265 Py_ssize_t i; 1266 1267 assert(size >= 4); 1268 assert(size % 2 == 0); 1269 1270 node = PyObject_GC_NewVar( 1271 PyHamtNode_Collision, &_PyHamt_CollisionNode_Type, size); 1272 if (node == NULL) { 1273 return NULL; 1274 } 1275 1276 for (i = 0; i < size; i++) { 1277 node->c_array[i] = NULL; 1278 } 1279 1280 Py_SET_SIZE(node, size); 1281 node->c_hash = hash; 1282 1283 _PyObject_GC_TRACK(node); 1284 1285 return (PyHamtNode *)node; 1286} 1287 1288static hamt_find_t 1289hamt_node_collision_find_index(PyHamtNode_Collision *self, PyObject *key, 1290 Py_ssize_t *idx) 1291{ 1292 /* Lookup `key` in the Collision node `self`. Set the index of the 1293 found key to 'idx'. */ 1294 1295 Py_ssize_t i; 1296 PyObject *el; 1297 1298 for (i = 0; i < Py_SIZE(self); i += 2) { 1299 el = self->c_array[i]; 1300 1301 assert(el != NULL); 1302 int cmp = PyObject_RichCompareBool(key, el, Py_EQ); 1303 if (cmp < 0) { 1304 return F_ERROR; 1305 } 1306 if (cmp == 1) { 1307 *idx = i; 1308 return F_FOUND; 1309 } 1310 } 1311 1312 return F_NOT_FOUND; 1313} 1314 1315static PyHamtNode * 1316hamt_node_collision_assoc(PyHamtNode_Collision *self, 1317 uint32_t shift, int32_t hash, 1318 PyObject *key, PyObject *val, int* added_leaf) 1319{ 1320 /* Set a new key to this level (currently a Collision node) 1321 of the tree. */ 1322 1323 if (hash == self->c_hash) { 1324 /* The hash of the 'key' we are adding matches the hash of 1325 other keys in this Collision node. */ 1326 1327 Py_ssize_t key_idx = -1; 1328 hamt_find_t found; 1329 PyHamtNode_Collision *new_node; 1330 Py_ssize_t i; 1331 1332 /* Let's try to lookup the new 'key', maybe we already have it. */ 1333 found = hamt_node_collision_find_index(self, key, &key_idx); 1334 switch (found) { 1335 case F_ERROR: 1336 /* Exception. */ 1337 return NULL; 1338 1339 case F_NOT_FOUND: 1340 /* This is a totally new key. Clone the current node, 1341 add a new key/value to the cloned node. */ 1342 1343 new_node = (PyHamtNode_Collision *)hamt_node_collision_new( 1344 self->c_hash, Py_SIZE(self) + 2); 1345 if (new_node == NULL) { 1346 return NULL; 1347 } 1348 1349 for (i = 0; i < Py_SIZE(self); i++) { 1350 Py_INCREF(self->c_array[i]); 1351 new_node->c_array[i] = self->c_array[i]; 1352 } 1353 1354 Py_INCREF(key); 1355 new_node->c_array[i] = key; 1356 Py_INCREF(val); 1357 new_node->c_array[i + 1] = val; 1358 1359 *added_leaf = 1; 1360 return (PyHamtNode *)new_node; 1361 1362 case F_FOUND: 1363 /* There's a key which is equal to the key we are adding. */ 1364 1365 assert(key_idx >= 0); 1366 assert(key_idx < Py_SIZE(self)); 1367 Py_ssize_t val_idx = key_idx + 1; 1368 1369 if (self->c_array[val_idx] == val) { 1370 /* We're setting a key/value pair that's already set. */ 1371 Py_INCREF(self); 1372 return (PyHamtNode *)self; 1373 } 1374 1375 /* We need to replace old value for the key 1376 with a new value. Create a new Collision node.*/ 1377 new_node = (PyHamtNode_Collision *)hamt_node_collision_new( 1378 self->c_hash, Py_SIZE(self)); 1379 if (new_node == NULL) { 1380 return NULL; 1381 } 1382 1383 /* Copy all elements of the old node to the new one. */ 1384 for (i = 0; i < Py_SIZE(self); i++) { 1385 Py_INCREF(self->c_array[i]); 1386 new_node->c_array[i] = self->c_array[i]; 1387 } 1388 1389 /* Replace the old value with the new value for the our key. */ 1390 Py_DECREF(new_node->c_array[val_idx]); 1391 Py_INCREF(val); 1392 new_node->c_array[val_idx] = val; 1393 1394 return (PyHamtNode *)new_node; 1395 1396 default: 1397 Py_UNREACHABLE(); 1398 } 1399 } 1400 else { 1401 /* The hash of the new key is different from the hash that 1402 all keys of this Collision node have. 1403 1404 Create a Bitmap node inplace with two children: 1405 key/value pair that we're adding, and the Collision node 1406 we're replacing on this tree level. 1407 */ 1408 1409 PyHamtNode_Bitmap *new_node; 1410 PyHamtNode *assoc_res; 1411 1412 new_node = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2); 1413 if (new_node == NULL) { 1414 return NULL; 1415 } 1416 new_node->b_bitmap = hamt_bitpos(self->c_hash, shift); 1417 Py_INCREF(self); 1418 new_node->b_array[1] = (PyObject*) self; 1419 1420 assoc_res = hamt_node_bitmap_assoc( 1421 new_node, shift, hash, key, val, added_leaf); 1422 Py_DECREF(new_node); 1423 return assoc_res; 1424 } 1425} 1426 1427static inline Py_ssize_t 1428hamt_node_collision_count(PyHamtNode_Collision *node) 1429{ 1430 return Py_SIZE(node) / 2; 1431} 1432 1433static hamt_without_t 1434hamt_node_collision_without(PyHamtNode_Collision *self, 1435 uint32_t shift, int32_t hash, 1436 PyObject *key, 1437 PyHamtNode **new_node) 1438{ 1439 if (hash != self->c_hash) { 1440 return W_NOT_FOUND; 1441 } 1442 1443 Py_ssize_t key_idx = -1; 1444 hamt_find_t found = hamt_node_collision_find_index(self, key, &key_idx); 1445 1446 switch (found) { 1447 case F_ERROR: 1448 return W_ERROR; 1449 1450 case F_NOT_FOUND: 1451 return W_NOT_FOUND; 1452 1453 case F_FOUND: 1454 assert(key_idx >= 0); 1455 assert(key_idx < Py_SIZE(self)); 1456 1457 Py_ssize_t new_count = hamt_node_collision_count(self) - 1; 1458 1459 if (new_count == 0) { 1460 /* The node has only one key/value pair and it's for the 1461 key we're trying to delete. So a new node will be empty 1462 after the removal. 1463 */ 1464 return W_EMPTY; 1465 } 1466 1467 if (new_count == 1) { 1468 /* The node has two keys, and after deletion the 1469 new Collision node would have one. Collision nodes 1470 with one key shouldn't exist, so convert it to a 1471 Bitmap node. 1472 */ 1473 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *) 1474 hamt_node_bitmap_new(2); 1475 if (node == NULL) { 1476 return W_ERROR; 1477 } 1478 1479 if (key_idx == 0) { 1480 Py_INCREF(self->c_array[2]); 1481 node->b_array[0] = self->c_array[2]; 1482 Py_INCREF(self->c_array[3]); 1483 node->b_array[1] = self->c_array[3]; 1484 } 1485 else { 1486 assert(key_idx == 2); 1487 Py_INCREF(self->c_array[0]); 1488 node->b_array[0] = self->c_array[0]; 1489 Py_INCREF(self->c_array[1]); 1490 node->b_array[1] = self->c_array[1]; 1491 } 1492 1493 node->b_bitmap = hamt_bitpos(hash, shift); 1494 1495 *new_node = (PyHamtNode *)node; 1496 return W_NEWNODE; 1497 } 1498 1499 /* Allocate a new Collision node with capacity for one 1500 less key/value pair */ 1501 PyHamtNode_Collision *new = (PyHamtNode_Collision *) 1502 hamt_node_collision_new( 1503 self->c_hash, Py_SIZE(self) - 2); 1504 if (new == NULL) { 1505 return W_ERROR; 1506 } 1507 1508 /* Copy all other keys from `self` to `new` */ 1509 Py_ssize_t i; 1510 for (i = 0; i < key_idx; i++) { 1511 Py_INCREF(self->c_array[i]); 1512 new->c_array[i] = self->c_array[i]; 1513 } 1514 for (i = key_idx + 2; i < Py_SIZE(self); i++) { 1515 Py_INCREF(self->c_array[i]); 1516 new->c_array[i - 2] = self->c_array[i]; 1517 } 1518 1519 *new_node = (PyHamtNode*)new; 1520 return W_NEWNODE; 1521 1522 default: 1523 Py_UNREACHABLE(); 1524 } 1525} 1526 1527static hamt_find_t 1528hamt_node_collision_find(PyHamtNode_Collision *self, 1529 uint32_t shift, int32_t hash, 1530 PyObject *key, PyObject **val) 1531{ 1532 /* Lookup `key` in the Collision node `self`. Set the value 1533 for the found key to 'val'. */ 1534 1535 Py_ssize_t idx = -1; 1536 hamt_find_t res; 1537 1538 res = hamt_node_collision_find_index(self, key, &idx); 1539 if (res == F_ERROR || res == F_NOT_FOUND) { 1540 return res; 1541 } 1542 1543 assert(idx >= 0); 1544 assert(idx + 1 < Py_SIZE(self)); 1545 1546 *val = self->c_array[idx + 1]; 1547 assert(*val != NULL); 1548 1549 return F_FOUND; 1550} 1551 1552 1553static int 1554hamt_node_collision_traverse(PyHamtNode_Collision *self, 1555 visitproc visit, void *arg) 1556{ 1557 /* Collision's tp_traverse */ 1558 1559 Py_ssize_t i; 1560 1561 for (i = Py_SIZE(self); --i >= 0; ) { 1562 Py_VISIT(self->c_array[i]); 1563 } 1564 1565 return 0; 1566} 1567 1568static void 1569hamt_node_collision_dealloc(PyHamtNode_Collision *self) 1570{ 1571 /* Collision's tp_dealloc */ 1572 1573 Py_ssize_t len = Py_SIZE(self); 1574 1575 PyObject_GC_UnTrack(self); 1576 Py_TRASHCAN_BEGIN(self, hamt_node_collision_dealloc) 1577 1578 if (len > 0) { 1579 1580 while (--len >= 0) { 1581 Py_XDECREF(self->c_array[len]); 1582 } 1583 } 1584 1585 Py_TYPE(self)->tp_free((PyObject *)self); 1586 Py_TRASHCAN_END 1587} 1588 1589#ifdef Py_DEBUG 1590static int 1591hamt_node_collision_dump(PyHamtNode_Collision *node, 1592 _PyUnicodeWriter *writer, int level) 1593{ 1594 /* Debug build: __dump__() method implementation for Collision nodes. */ 1595 1596 Py_ssize_t i; 1597 1598 if (_hamt_dump_ident(writer, level + 1)) { 1599 goto error; 1600 } 1601 1602 if (_hamt_dump_format(writer, "CollisionNode(size=%zd id=%p):\n", 1603 Py_SIZE(node), node)) 1604 { 1605 goto error; 1606 } 1607 1608 for (i = 0; i < Py_SIZE(node); i += 2) { 1609 PyObject *key = node->c_array[i]; 1610 PyObject *val = node->c_array[i + 1]; 1611 1612 if (_hamt_dump_ident(writer, level + 2)) { 1613 goto error; 1614 } 1615 1616 if (_hamt_dump_format(writer, "%R: %R\n", key, val)) { 1617 goto error; 1618 } 1619 } 1620 1621 return 0; 1622error: 1623 return -1; 1624} 1625#endif /* Py_DEBUG */ 1626 1627 1628/////////////////////////////////// Array Node 1629 1630 1631static PyHamtNode * 1632hamt_node_array_new(Py_ssize_t count) 1633{ 1634 Py_ssize_t i; 1635 1636 PyHamtNode_Array *node = PyObject_GC_New( 1637 PyHamtNode_Array, &_PyHamt_ArrayNode_Type); 1638 if (node == NULL) { 1639 return NULL; 1640 } 1641 1642 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) { 1643 node->a_array[i] = NULL; 1644 } 1645 1646 node->a_count = count; 1647 1648 _PyObject_GC_TRACK(node); 1649 return (PyHamtNode *)node; 1650} 1651 1652static PyHamtNode_Array * 1653hamt_node_array_clone(PyHamtNode_Array *node) 1654{ 1655 PyHamtNode_Array *clone; 1656 Py_ssize_t i; 1657 1658 VALIDATE_ARRAY_NODE(node) 1659 1660 /* Create a new Array node. */ 1661 clone = (PyHamtNode_Array *)hamt_node_array_new(node->a_count); 1662 if (clone == NULL) { 1663 return NULL; 1664 } 1665 1666 /* Copy all elements from the current Array node to the new one. */ 1667 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) { 1668 Py_XINCREF(node->a_array[i]); 1669 clone->a_array[i] = node->a_array[i]; 1670 } 1671 1672 VALIDATE_ARRAY_NODE(clone) 1673 return clone; 1674} 1675 1676static PyHamtNode * 1677hamt_node_array_assoc(PyHamtNode_Array *self, 1678 uint32_t shift, int32_t hash, 1679 PyObject *key, PyObject *val, int* added_leaf) 1680{ 1681 /* Set a new key to this level (currently a Collision node) 1682 of the tree. 1683 1684 Array nodes don't store values, they can only point to 1685 other nodes. They are simple arrays of 32 BaseNode pointers/ 1686 */ 1687 1688 uint32_t idx = hamt_mask(hash, shift); 1689 PyHamtNode *node = self->a_array[idx]; 1690 PyHamtNode *child_node; 1691 PyHamtNode_Array *new_node; 1692 Py_ssize_t i; 1693 1694 if (node == NULL) { 1695 /* There's no child node for the given hash. Create a new 1696 Bitmap node for this key. */ 1697 1698 PyHamtNode_Bitmap *empty = NULL; 1699 1700 /* Get an empty Bitmap node to work with. */ 1701 empty = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(0); 1702 if (empty == NULL) { 1703 return NULL; 1704 } 1705 1706 /* Set key/val to the newly created empty Bitmap, thus 1707 creating a new Bitmap node with our key/value pair. */ 1708 child_node = hamt_node_bitmap_assoc( 1709 empty, 1710 shift + 5, hash, key, val, added_leaf); 1711 Py_DECREF(empty); 1712 if (child_node == NULL) { 1713 return NULL; 1714 } 1715 1716 /* Create a new Array node. */ 1717 new_node = (PyHamtNode_Array *)hamt_node_array_new(self->a_count + 1); 1718 if (new_node == NULL) { 1719 Py_DECREF(child_node); 1720 return NULL; 1721 } 1722 1723 /* Copy all elements from the current Array node to the 1724 new one. */ 1725 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) { 1726 Py_XINCREF(self->a_array[i]); 1727 new_node->a_array[i] = self->a_array[i]; 1728 } 1729 1730 assert(new_node->a_array[idx] == NULL); 1731 new_node->a_array[idx] = child_node; /* borrow */ 1732 VALIDATE_ARRAY_NODE(new_node) 1733 } 1734 else { 1735 /* There's a child node for the given hash. 1736 Set the key to it./ */ 1737 child_node = hamt_node_assoc( 1738 node, shift + 5, hash, key, val, added_leaf); 1739 if (child_node == NULL) { 1740 return NULL; 1741 } 1742 else if (child_node == (PyHamtNode *)self) { 1743 Py_DECREF(child_node); 1744 return (PyHamtNode *)self; 1745 } 1746 1747 new_node = hamt_node_array_clone(self); 1748 if (new_node == NULL) { 1749 Py_DECREF(child_node); 1750 return NULL; 1751 } 1752 1753 Py_SETREF(new_node->a_array[idx], child_node); /* borrow */ 1754 VALIDATE_ARRAY_NODE(new_node) 1755 } 1756 1757 return (PyHamtNode *)new_node; 1758} 1759 1760static hamt_without_t 1761hamt_node_array_without(PyHamtNode_Array *self, 1762 uint32_t shift, int32_t hash, 1763 PyObject *key, 1764 PyHamtNode **new_node) 1765{ 1766 uint32_t idx = hamt_mask(hash, shift); 1767 PyHamtNode *node = self->a_array[idx]; 1768 1769 if (node == NULL) { 1770 return W_NOT_FOUND; 1771 } 1772 1773 PyHamtNode *sub_node = NULL; 1774 hamt_without_t res = hamt_node_without( 1775 (PyHamtNode *)node, 1776 shift + 5, hash, key, &sub_node); 1777 1778 switch (res) { 1779 case W_NOT_FOUND: 1780 case W_ERROR: 1781 assert(sub_node == NULL); 1782 return res; 1783 1784 case W_NEWNODE: { 1785 /* We need to replace a node at the `idx` index. 1786 Clone this node and replace. 1787 */ 1788 assert(sub_node != NULL); 1789 1790 PyHamtNode_Array *clone = hamt_node_array_clone(self); 1791 if (clone == NULL) { 1792 Py_DECREF(sub_node); 1793 return W_ERROR; 1794 } 1795 1796 Py_SETREF(clone->a_array[idx], sub_node); /* borrow */ 1797 *new_node = (PyHamtNode*)clone; /* borrow */ 1798 return W_NEWNODE; 1799 } 1800 1801 case W_EMPTY: { 1802 assert(sub_node == NULL); 1803 /* We need to remove a node at the `idx` index. 1804 Calculate the size of the replacement Array node. 1805 */ 1806 Py_ssize_t new_count = self->a_count - 1; 1807 1808 if (new_count == 0) { 1809 return W_EMPTY; 1810 } 1811 1812 if (new_count >= 16) { 1813 /* We convert Bitmap nodes to Array nodes, when a 1814 Bitmap node needs to store more than 15 key/value 1815 pairs. So we will create a new Array node if we 1816 the number of key/values after deletion is still 1817 greater than 15. 1818 */ 1819 1820 PyHamtNode_Array *new = hamt_node_array_clone(self); 1821 if (new == NULL) { 1822 return W_ERROR; 1823 } 1824 new->a_count = new_count; 1825 Py_CLEAR(new->a_array[idx]); 1826 1827 *new_node = (PyHamtNode*)new; /* borrow */ 1828 return W_NEWNODE; 1829 } 1830 1831 /* New Array node would have less than 16 key/value 1832 pairs. We need to create a replacement Bitmap node. */ 1833 1834 Py_ssize_t bitmap_size = new_count * 2; 1835 uint32_t bitmap = 0; 1836 1837 PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *) 1838 hamt_node_bitmap_new(bitmap_size); 1839 if (new == NULL) { 1840 return W_ERROR; 1841 } 1842 1843 Py_ssize_t new_i = 0; 1844 for (uint32_t i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) { 1845 if (i == idx) { 1846 /* Skip the node we are deleting. */ 1847 continue; 1848 } 1849 1850 PyHamtNode *node = self->a_array[i]; 1851 if (node == NULL) { 1852 /* Skip any missing nodes. */ 1853 continue; 1854 } 1855 1856 bitmap |= 1U << i; 1857 1858 if (IS_BITMAP_NODE(node)) { 1859 PyHamtNode_Bitmap *child = (PyHamtNode_Bitmap *)node; 1860 1861 if (hamt_node_bitmap_count(child) == 1 && 1862 child->b_array[0] != NULL) 1863 { 1864 /* node is a Bitmap with one key/value pair, just 1865 merge it into the new Bitmap node we're building. 1866 1867 Note that we don't inline Bitmap nodes that 1868 have a NULL key -- those nodes point to another 1869 tree level, and we cannot simply move tree levels 1870 up or down. 1871 */ 1872 PyObject *key = child->b_array[0]; 1873 PyObject *val = child->b_array[1]; 1874 1875 Py_INCREF(key); 1876 new->b_array[new_i] = key; 1877 Py_INCREF(val); 1878 new->b_array[new_i + 1] = val; 1879 } 1880 else { 1881 new->b_array[new_i] = NULL; 1882 Py_INCREF(node); 1883 new->b_array[new_i + 1] = (PyObject*)node; 1884 } 1885 } 1886 else { 1887 1888#ifdef Py_DEBUG 1889 if (IS_COLLISION_NODE(node)) { 1890 Py_ssize_t child_count = hamt_node_collision_count( 1891 (PyHamtNode_Collision*)node); 1892 assert(child_count > 1); 1893 } 1894 else if (IS_ARRAY_NODE(node)) { 1895 assert(((PyHamtNode_Array*)node)->a_count >= 16); 1896 } 1897#endif 1898 1899 /* Just copy the node into our new Bitmap */ 1900 new->b_array[new_i] = NULL; 1901 Py_INCREF(node); 1902 new->b_array[new_i + 1] = (PyObject*)node; 1903 } 1904 1905 new_i += 2; 1906 } 1907 1908 new->b_bitmap = bitmap; 1909 *new_node = (PyHamtNode*)new; /* borrow */ 1910 return W_NEWNODE; 1911 } 1912 1913 default: 1914 Py_UNREACHABLE(); 1915 } 1916} 1917 1918static hamt_find_t 1919hamt_node_array_find(PyHamtNode_Array *self, 1920 uint32_t shift, int32_t hash, 1921 PyObject *key, PyObject **val) 1922{ 1923 /* Lookup `key` in the Array node `self`. Set the value 1924 for the found key to 'val'. */ 1925 1926 uint32_t idx = hamt_mask(hash, shift); 1927 PyHamtNode *node; 1928 1929 node = self->a_array[idx]; 1930 if (node == NULL) { 1931 return F_NOT_FOUND; 1932 } 1933 1934 /* Dispatch to the generic hamt_node_find */ 1935 return hamt_node_find(node, shift + 5, hash, key, val); 1936} 1937 1938static int 1939hamt_node_array_traverse(PyHamtNode_Array *self, 1940 visitproc visit, void *arg) 1941{ 1942 /* Array's tp_traverse */ 1943 1944 Py_ssize_t i; 1945 1946 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) { 1947 Py_VISIT(self->a_array[i]); 1948 } 1949 1950 return 0; 1951} 1952 1953static void 1954hamt_node_array_dealloc(PyHamtNode_Array *self) 1955{ 1956 /* Array's tp_dealloc */ 1957 1958 Py_ssize_t i; 1959 1960 PyObject_GC_UnTrack(self); 1961 Py_TRASHCAN_BEGIN(self, hamt_node_array_dealloc) 1962 1963 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) { 1964 Py_XDECREF(self->a_array[i]); 1965 } 1966 1967 Py_TYPE(self)->tp_free((PyObject *)self); 1968 Py_TRASHCAN_END 1969} 1970 1971#ifdef Py_DEBUG 1972static int 1973hamt_node_array_dump(PyHamtNode_Array *node, 1974 _PyUnicodeWriter *writer, int level) 1975{ 1976 /* Debug build: __dump__() method implementation for Array nodes. */ 1977 1978 Py_ssize_t i; 1979 1980 if (_hamt_dump_ident(writer, level + 1)) { 1981 goto error; 1982 } 1983 1984 if (_hamt_dump_format(writer, "ArrayNode(id=%p):\n", node)) { 1985 goto error; 1986 } 1987 1988 for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) { 1989 if (node->a_array[i] == NULL) { 1990 continue; 1991 } 1992 1993 if (_hamt_dump_ident(writer, level + 2)) { 1994 goto error; 1995 } 1996 1997 if (_hamt_dump_format(writer, "%zd::\n", i)) { 1998 goto error; 1999 } 2000 2001 if (hamt_node_dump(node->a_array[i], writer, level + 1)) { 2002 goto error; 2003 } 2004 2005 if (_hamt_dump_format(writer, "\n")) { 2006 goto error; 2007 } 2008 } 2009 2010 return 0; 2011error: 2012 return -1; 2013} 2014#endif /* Py_DEBUG */ 2015 2016 2017/////////////////////////////////// Node Dispatch 2018 2019 2020static PyHamtNode * 2021hamt_node_assoc(PyHamtNode *node, 2022 uint32_t shift, int32_t hash, 2023 PyObject *key, PyObject *val, int* added_leaf) 2024{ 2025 /* Set key/value to the 'node' starting with the given shift/hash. 2026 Return a new node, or the same node if key/value already 2027 set. 2028 2029 added_leaf will be set to 1 if key/value wasn't in the 2030 tree before. 2031 2032 This method automatically dispatches to the suitable 2033 hamt_node_{nodetype}_assoc method. 2034 */ 2035 2036 if (IS_BITMAP_NODE(node)) { 2037 return hamt_node_bitmap_assoc( 2038 (PyHamtNode_Bitmap *)node, 2039 shift, hash, key, val, added_leaf); 2040 } 2041 else if (IS_ARRAY_NODE(node)) { 2042 return hamt_node_array_assoc( 2043 (PyHamtNode_Array *)node, 2044 shift, hash, key, val, added_leaf); 2045 } 2046 else { 2047 assert(IS_COLLISION_NODE(node)); 2048 return hamt_node_collision_assoc( 2049 (PyHamtNode_Collision *)node, 2050 shift, hash, key, val, added_leaf); 2051 } 2052} 2053 2054static hamt_without_t 2055hamt_node_without(PyHamtNode *node, 2056 uint32_t shift, int32_t hash, 2057 PyObject *key, 2058 PyHamtNode **new_node) 2059{ 2060 if (IS_BITMAP_NODE(node)) { 2061 return hamt_node_bitmap_without( 2062 (PyHamtNode_Bitmap *)node, 2063 shift, hash, key, 2064 new_node); 2065 } 2066 else if (IS_ARRAY_NODE(node)) { 2067 return hamt_node_array_without( 2068 (PyHamtNode_Array *)node, 2069 shift, hash, key, 2070 new_node); 2071 } 2072 else { 2073 assert(IS_COLLISION_NODE(node)); 2074 return hamt_node_collision_without( 2075 (PyHamtNode_Collision *)node, 2076 shift, hash, key, 2077 new_node); 2078 } 2079} 2080 2081static hamt_find_t 2082hamt_node_find(PyHamtNode *node, 2083 uint32_t shift, int32_t hash, 2084 PyObject *key, PyObject **val) 2085{ 2086 /* Find the key in the node starting with the given shift/hash. 2087 2088 If a value is found, the result will be set to F_FOUND, and 2089 *val will point to the found value object. 2090 2091 If a value wasn't found, the result will be set to F_NOT_FOUND. 2092 2093 If an exception occurs during the call, the result will be F_ERROR. 2094 2095 This method automatically dispatches to the suitable 2096 hamt_node_{nodetype}_find method. 2097 */ 2098 2099 if (IS_BITMAP_NODE(node)) { 2100 return hamt_node_bitmap_find( 2101 (PyHamtNode_Bitmap *)node, 2102 shift, hash, key, val); 2103 2104 } 2105 else if (IS_ARRAY_NODE(node)) { 2106 return hamt_node_array_find( 2107 (PyHamtNode_Array *)node, 2108 shift, hash, key, val); 2109 } 2110 else { 2111 assert(IS_COLLISION_NODE(node)); 2112 return hamt_node_collision_find( 2113 (PyHamtNode_Collision *)node, 2114 shift, hash, key, val); 2115 } 2116} 2117 2118#ifdef Py_DEBUG 2119static int 2120hamt_node_dump(PyHamtNode *node, 2121 _PyUnicodeWriter *writer, int level) 2122{ 2123 /* Debug build: __dump__() method implementation for a node. 2124 2125 This method automatically dispatches to the suitable 2126 hamt_node_{nodetype})_dump method. 2127 */ 2128 2129 if (IS_BITMAP_NODE(node)) { 2130 return hamt_node_bitmap_dump( 2131 (PyHamtNode_Bitmap *)node, writer, level); 2132 } 2133 else if (IS_ARRAY_NODE(node)) { 2134 return hamt_node_array_dump( 2135 (PyHamtNode_Array *)node, writer, level); 2136 } 2137 else { 2138 assert(IS_COLLISION_NODE(node)); 2139 return hamt_node_collision_dump( 2140 (PyHamtNode_Collision *)node, writer, level); 2141 } 2142} 2143#endif /* Py_DEBUG */ 2144 2145 2146/////////////////////////////////// Iterators: Machinery 2147 2148 2149static hamt_iter_t 2150hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val); 2151 2152 2153static void 2154hamt_iterator_init(PyHamtIteratorState *iter, PyHamtNode *root) 2155{ 2156 for (uint32_t i = 0; i < _Py_HAMT_MAX_TREE_DEPTH; i++) { 2157 iter->i_nodes[i] = NULL; 2158 iter->i_pos[i] = 0; 2159 } 2160 2161 iter->i_level = 0; 2162 2163 /* Note: we don't incref/decref nodes in i_nodes. */ 2164 iter->i_nodes[0] = root; 2165} 2166 2167static hamt_iter_t 2168hamt_iterator_bitmap_next(PyHamtIteratorState *iter, 2169 PyObject **key, PyObject **val) 2170{ 2171 int8_t level = iter->i_level; 2172 2173 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)(iter->i_nodes[level]); 2174 Py_ssize_t pos = iter->i_pos[level]; 2175 2176 if (pos + 1 >= Py_SIZE(node)) { 2177#ifdef Py_DEBUG 2178 assert(iter->i_level >= 0); 2179 iter->i_nodes[iter->i_level] = NULL; 2180#endif 2181 iter->i_level--; 2182 return hamt_iterator_next(iter, key, val); 2183 } 2184 2185 if (node->b_array[pos] == NULL) { 2186 iter->i_pos[level] = pos + 2; 2187 2188 int8_t next_level = level + 1; 2189 assert(next_level < _Py_HAMT_MAX_TREE_DEPTH); 2190 iter->i_level = next_level; 2191 iter->i_pos[next_level] = 0; 2192 iter->i_nodes[next_level] = (PyHamtNode *) 2193 node->b_array[pos + 1]; 2194 2195 return hamt_iterator_next(iter, key, val); 2196 } 2197 2198 *key = node->b_array[pos]; 2199 *val = node->b_array[pos + 1]; 2200 iter->i_pos[level] = pos + 2; 2201 return I_ITEM; 2202} 2203 2204static hamt_iter_t 2205hamt_iterator_collision_next(PyHamtIteratorState *iter, 2206 PyObject **key, PyObject **val) 2207{ 2208 int8_t level = iter->i_level; 2209 2210 PyHamtNode_Collision *node = (PyHamtNode_Collision *)(iter->i_nodes[level]); 2211 Py_ssize_t pos = iter->i_pos[level]; 2212 2213 if (pos + 1 >= Py_SIZE(node)) { 2214#ifdef Py_DEBUG 2215 assert(iter->i_level >= 0); 2216 iter->i_nodes[iter->i_level] = NULL; 2217#endif 2218 iter->i_level--; 2219 return hamt_iterator_next(iter, key, val); 2220 } 2221 2222 *key = node->c_array[pos]; 2223 *val = node->c_array[pos + 1]; 2224 iter->i_pos[level] = pos + 2; 2225 return I_ITEM; 2226} 2227 2228static hamt_iter_t 2229hamt_iterator_array_next(PyHamtIteratorState *iter, 2230 PyObject **key, PyObject **val) 2231{ 2232 int8_t level = iter->i_level; 2233 2234 PyHamtNode_Array *node = (PyHamtNode_Array *)(iter->i_nodes[level]); 2235 Py_ssize_t pos = iter->i_pos[level]; 2236 2237 if (pos >= HAMT_ARRAY_NODE_SIZE) { 2238#ifdef Py_DEBUG 2239 assert(iter->i_level >= 0); 2240 iter->i_nodes[iter->i_level] = NULL; 2241#endif 2242 iter->i_level--; 2243 return hamt_iterator_next(iter, key, val); 2244 } 2245 2246 for (Py_ssize_t i = pos; i < HAMT_ARRAY_NODE_SIZE; i++) { 2247 if (node->a_array[i] != NULL) { 2248 iter->i_pos[level] = i + 1; 2249 2250 int8_t next_level = level + 1; 2251 assert(next_level < _Py_HAMT_MAX_TREE_DEPTH); 2252 iter->i_pos[next_level] = 0; 2253 iter->i_nodes[next_level] = node->a_array[i]; 2254 iter->i_level = next_level; 2255 2256 return hamt_iterator_next(iter, key, val); 2257 } 2258 } 2259 2260#ifdef Py_DEBUG 2261 assert(iter->i_level >= 0); 2262 iter->i_nodes[iter->i_level] = NULL; 2263#endif 2264 2265 iter->i_level--; 2266 return hamt_iterator_next(iter, key, val); 2267} 2268 2269static hamt_iter_t 2270hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val) 2271{ 2272 if (iter->i_level < 0) { 2273 return I_END; 2274 } 2275 2276 assert(iter->i_level < _Py_HAMT_MAX_TREE_DEPTH); 2277 2278 PyHamtNode *current = iter->i_nodes[iter->i_level]; 2279 2280 if (IS_BITMAP_NODE(current)) { 2281 return hamt_iterator_bitmap_next(iter, key, val); 2282 } 2283 else if (IS_ARRAY_NODE(current)) { 2284 return hamt_iterator_array_next(iter, key, val); 2285 } 2286 else { 2287 assert(IS_COLLISION_NODE(current)); 2288 return hamt_iterator_collision_next(iter, key, val); 2289 } 2290} 2291 2292 2293/////////////////////////////////// HAMT high-level functions 2294 2295 2296PyHamtObject * 2297_PyHamt_Assoc(PyHamtObject *o, PyObject *key, PyObject *val) 2298{ 2299 int32_t key_hash; 2300 int added_leaf = 0; 2301 PyHamtNode *new_root; 2302 PyHamtObject *new_o; 2303 2304 key_hash = hamt_hash(key); 2305 if (key_hash == -1) { 2306 return NULL; 2307 } 2308 2309 new_root = hamt_node_assoc( 2310 (PyHamtNode *)(o->h_root), 2311 0, key_hash, key, val, &added_leaf); 2312 if (new_root == NULL) { 2313 return NULL; 2314 } 2315 2316 if (new_root == o->h_root) { 2317 Py_DECREF(new_root); 2318 Py_INCREF(o); 2319 return o; 2320 } 2321 2322 new_o = hamt_alloc(); 2323 if (new_o == NULL) { 2324 Py_DECREF(new_root); 2325 return NULL; 2326 } 2327 2328 new_o->h_root = new_root; /* borrow */ 2329 new_o->h_count = added_leaf ? o->h_count + 1 : o->h_count; 2330 2331 return new_o; 2332} 2333 2334PyHamtObject * 2335_PyHamt_Without(PyHamtObject *o, PyObject *key) 2336{ 2337 int32_t key_hash = hamt_hash(key); 2338 if (key_hash == -1) { 2339 return NULL; 2340 } 2341 2342 PyHamtNode *new_root = NULL; 2343 2344 hamt_without_t res = hamt_node_without( 2345 (PyHamtNode *)(o->h_root), 2346 0, key_hash, key, 2347 &new_root); 2348 2349 switch (res) { 2350 case W_ERROR: 2351 return NULL; 2352 case W_EMPTY: 2353 return _PyHamt_New(); 2354 case W_NOT_FOUND: 2355 Py_INCREF(o); 2356 return o; 2357 case W_NEWNODE: { 2358 assert(new_root != NULL); 2359 2360 PyHamtObject *new_o = hamt_alloc(); 2361 if (new_o == NULL) { 2362 Py_DECREF(new_root); 2363 return NULL; 2364 } 2365 2366 new_o->h_root = new_root; /* borrow */ 2367 new_o->h_count = o->h_count - 1; 2368 assert(new_o->h_count >= 0); 2369 return new_o; 2370 } 2371 default: 2372 Py_UNREACHABLE(); 2373 } 2374} 2375 2376static hamt_find_t 2377hamt_find(PyHamtObject *o, PyObject *key, PyObject **val) 2378{ 2379 if (o->h_count == 0) { 2380 return F_NOT_FOUND; 2381 } 2382 2383 int32_t key_hash = hamt_hash(key); 2384 if (key_hash == -1) { 2385 return F_ERROR; 2386 } 2387 2388 return hamt_node_find(o->h_root, 0, key_hash, key, val); 2389} 2390 2391 2392int 2393_PyHamt_Find(PyHamtObject *o, PyObject *key, PyObject **val) 2394{ 2395 hamt_find_t res = hamt_find(o, key, val); 2396 switch (res) { 2397 case F_ERROR: 2398 return -1; 2399 case F_NOT_FOUND: 2400 return 0; 2401 case F_FOUND: 2402 return 1; 2403 default: 2404 Py_UNREACHABLE(); 2405 } 2406} 2407 2408 2409int 2410_PyHamt_Eq(PyHamtObject *v, PyHamtObject *w) 2411{ 2412 if (v == w) { 2413 return 1; 2414 } 2415 2416 if (v->h_count != w->h_count) { 2417 return 0; 2418 } 2419 2420 PyHamtIteratorState iter; 2421 hamt_iter_t iter_res; 2422 hamt_find_t find_res; 2423 PyObject *v_key; 2424 PyObject *v_val; 2425 PyObject *w_val; 2426 2427 hamt_iterator_init(&iter, v->h_root); 2428 2429 do { 2430 iter_res = hamt_iterator_next(&iter, &v_key, &v_val); 2431 if (iter_res == I_ITEM) { 2432 find_res = hamt_find(w, v_key, &w_val); 2433 switch (find_res) { 2434 case F_ERROR: 2435 return -1; 2436 2437 case F_NOT_FOUND: 2438 return 0; 2439 2440 case F_FOUND: { 2441 int cmp = PyObject_RichCompareBool(v_val, w_val, Py_EQ); 2442 if (cmp < 0) { 2443 return -1; 2444 } 2445 if (cmp == 0) { 2446 return 0; 2447 } 2448 } 2449 } 2450 } 2451 } while (iter_res != I_END); 2452 2453 return 1; 2454} 2455 2456Py_ssize_t 2457_PyHamt_Len(PyHamtObject *o) 2458{ 2459 return o->h_count; 2460} 2461 2462static PyHamtObject * 2463hamt_alloc(void) 2464{ 2465 PyHamtObject *o; 2466 o = PyObject_GC_New(PyHamtObject, &_PyHamt_Type); 2467 if (o == NULL) { 2468 return NULL; 2469 } 2470 o->h_count = 0; 2471 o->h_root = NULL; 2472 o->h_weakreflist = NULL; 2473 PyObject_GC_Track(o); 2474 return o; 2475} 2476 2477PyHamtObject * 2478_PyHamt_New(void) 2479{ 2480 if (_empty_hamt != NULL) { 2481 /* HAMT is an immutable object so we can easily cache an 2482 empty instance. */ 2483 Py_INCREF(_empty_hamt); 2484 return _empty_hamt; 2485 } 2486 2487 PyHamtObject *o = hamt_alloc(); 2488 if (o == NULL) { 2489 return NULL; 2490 } 2491 2492 o->h_root = hamt_node_bitmap_new(0); 2493 if (o->h_root == NULL) { 2494 Py_DECREF(o); 2495 return NULL; 2496 } 2497 2498 o->h_count = 0; 2499 2500 if (_empty_hamt == NULL) { 2501 Py_INCREF(o); 2502 _empty_hamt = o; 2503 } 2504 2505 return o; 2506} 2507 2508#ifdef Py_DEBUG 2509static PyObject * 2510hamt_dump(PyHamtObject *self) 2511{ 2512 _PyUnicodeWriter writer; 2513 2514 _PyUnicodeWriter_Init(&writer); 2515 2516 if (_hamt_dump_format(&writer, "HAMT(len=%zd):\n", self->h_count)) { 2517 goto error; 2518 } 2519 2520 if (hamt_node_dump(self->h_root, &writer, 0)) { 2521 goto error; 2522 } 2523 2524 return _PyUnicodeWriter_Finish(&writer); 2525 2526error: 2527 _PyUnicodeWriter_Dealloc(&writer); 2528 return NULL; 2529} 2530#endif /* Py_DEBUG */ 2531 2532 2533/////////////////////////////////// Iterators: Shared Iterator Implementation 2534 2535 2536static int 2537hamt_baseiter_tp_clear(PyHamtIterator *it) 2538{ 2539 Py_CLEAR(it->hi_obj); 2540 return 0; 2541} 2542 2543static void 2544hamt_baseiter_tp_dealloc(PyHamtIterator *it) 2545{ 2546 PyObject_GC_UnTrack(it); 2547 (void)hamt_baseiter_tp_clear(it); 2548 PyObject_GC_Del(it); 2549} 2550 2551static int 2552hamt_baseiter_tp_traverse(PyHamtIterator *it, visitproc visit, void *arg) 2553{ 2554 Py_VISIT(it->hi_obj); 2555 return 0; 2556} 2557 2558static PyObject * 2559hamt_baseiter_tp_iternext(PyHamtIterator *it) 2560{ 2561 PyObject *key; 2562 PyObject *val; 2563 hamt_iter_t res = hamt_iterator_next(&it->hi_iter, &key, &val); 2564 2565 switch (res) { 2566 case I_END: 2567 PyErr_SetNone(PyExc_StopIteration); 2568 return NULL; 2569 2570 case I_ITEM: { 2571 return (*(it->hi_yield))(key, val); 2572 } 2573 2574 default: { 2575 Py_UNREACHABLE(); 2576 } 2577 } 2578} 2579 2580static Py_ssize_t 2581hamt_baseiter_tp_len(PyHamtIterator *it) 2582{ 2583 return it->hi_obj->h_count; 2584} 2585 2586static PyMappingMethods PyHamtIterator_as_mapping = { 2587 (lenfunc)hamt_baseiter_tp_len, 2588}; 2589 2590static PyObject * 2591hamt_baseiter_new(PyTypeObject *type, binaryfunc yield, PyHamtObject *o) 2592{ 2593 PyHamtIterator *it = PyObject_GC_New(PyHamtIterator, type); 2594 if (it == NULL) { 2595 return NULL; 2596 } 2597 2598 Py_INCREF(o); 2599 it->hi_obj = o; 2600 it->hi_yield = yield; 2601 2602 hamt_iterator_init(&it->hi_iter, o->h_root); 2603 2604 return (PyObject*)it; 2605} 2606 2607#define ITERATOR_TYPE_SHARED_SLOTS \ 2608 .tp_basicsize = sizeof(PyHamtIterator), \ 2609 .tp_itemsize = 0, \ 2610 .tp_as_mapping = &PyHamtIterator_as_mapping, \ 2611 .tp_dealloc = (destructor)hamt_baseiter_tp_dealloc, \ 2612 .tp_getattro = PyObject_GenericGetAttr, \ 2613 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, \ 2614 .tp_traverse = (traverseproc)hamt_baseiter_tp_traverse, \ 2615 .tp_clear = (inquiry)hamt_baseiter_tp_clear, \ 2616 .tp_iter = PyObject_SelfIter, \ 2617 .tp_iternext = (iternextfunc)hamt_baseiter_tp_iternext, 2618 2619 2620/////////////////////////////////// _PyHamtItems_Type 2621 2622 2623PyTypeObject _PyHamtItems_Type = { 2624 PyVarObject_HEAD_INIT(NULL, 0) 2625 "items", 2626 ITERATOR_TYPE_SHARED_SLOTS 2627}; 2628 2629static PyObject * 2630hamt_iter_yield_items(PyObject *key, PyObject *val) 2631{ 2632 return PyTuple_Pack(2, key, val); 2633} 2634 2635PyObject * 2636_PyHamt_NewIterItems(PyHamtObject *o) 2637{ 2638 return hamt_baseiter_new( 2639 &_PyHamtItems_Type, hamt_iter_yield_items, o); 2640} 2641 2642 2643/////////////////////////////////// _PyHamtKeys_Type 2644 2645 2646PyTypeObject _PyHamtKeys_Type = { 2647 PyVarObject_HEAD_INIT(NULL, 0) 2648 "keys", 2649 ITERATOR_TYPE_SHARED_SLOTS 2650}; 2651 2652static PyObject * 2653hamt_iter_yield_keys(PyObject *key, PyObject *val) 2654{ 2655 Py_INCREF(key); 2656 return key; 2657} 2658 2659PyObject * 2660_PyHamt_NewIterKeys(PyHamtObject *o) 2661{ 2662 return hamt_baseiter_new( 2663 &_PyHamtKeys_Type, hamt_iter_yield_keys, o); 2664} 2665 2666 2667/////////////////////////////////// _PyHamtValues_Type 2668 2669 2670PyTypeObject _PyHamtValues_Type = { 2671 PyVarObject_HEAD_INIT(NULL, 0) 2672 "values", 2673 ITERATOR_TYPE_SHARED_SLOTS 2674}; 2675 2676static PyObject * 2677hamt_iter_yield_values(PyObject *key, PyObject *val) 2678{ 2679 Py_INCREF(val); 2680 return val; 2681} 2682 2683PyObject * 2684_PyHamt_NewIterValues(PyHamtObject *o) 2685{ 2686 return hamt_baseiter_new( 2687 &_PyHamtValues_Type, hamt_iter_yield_values, o); 2688} 2689 2690 2691/////////////////////////////////// _PyHamt_Type 2692 2693 2694#ifdef Py_DEBUG 2695static PyObject * 2696hamt_dump(PyHamtObject *self); 2697#endif 2698 2699 2700static PyObject * 2701hamt_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 2702{ 2703 return (PyObject*)_PyHamt_New(); 2704} 2705 2706static int 2707hamt_tp_clear(PyHamtObject *self) 2708{ 2709 Py_CLEAR(self->h_root); 2710 return 0; 2711} 2712 2713 2714static int 2715hamt_tp_traverse(PyHamtObject *self, visitproc visit, void *arg) 2716{ 2717 Py_VISIT(self->h_root); 2718 return 0; 2719} 2720 2721static void 2722hamt_tp_dealloc(PyHamtObject *self) 2723{ 2724 PyObject_GC_UnTrack(self); 2725 if (self->h_weakreflist != NULL) { 2726 PyObject_ClearWeakRefs((PyObject*)self); 2727 } 2728 (void)hamt_tp_clear(self); 2729 Py_TYPE(self)->tp_free(self); 2730} 2731 2732 2733static PyObject * 2734hamt_tp_richcompare(PyObject *v, PyObject *w, int op) 2735{ 2736 if (!PyHamt_Check(v) || !PyHamt_Check(w) || (op != Py_EQ && op != Py_NE)) { 2737 Py_RETURN_NOTIMPLEMENTED; 2738 } 2739 2740 int res = _PyHamt_Eq((PyHamtObject *)v, (PyHamtObject *)w); 2741 if (res < 0) { 2742 return NULL; 2743 } 2744 2745 if (op == Py_NE) { 2746 res = !res; 2747 } 2748 2749 if (res) { 2750 Py_RETURN_TRUE; 2751 } 2752 else { 2753 Py_RETURN_FALSE; 2754 } 2755} 2756 2757static int 2758hamt_tp_contains(PyHamtObject *self, PyObject *key) 2759{ 2760 PyObject *val; 2761 return _PyHamt_Find(self, key, &val); 2762} 2763 2764static PyObject * 2765hamt_tp_subscript(PyHamtObject *self, PyObject *key) 2766{ 2767 PyObject *val; 2768 hamt_find_t res = hamt_find(self, key, &val); 2769 switch (res) { 2770 case F_ERROR: 2771 return NULL; 2772 case F_FOUND: 2773 Py_INCREF(val); 2774 return val; 2775 case F_NOT_FOUND: 2776 PyErr_SetObject(PyExc_KeyError, key); 2777 return NULL; 2778 default: 2779 Py_UNREACHABLE(); 2780 } 2781} 2782 2783static Py_ssize_t 2784hamt_tp_len(PyHamtObject *self) 2785{ 2786 return _PyHamt_Len(self); 2787} 2788 2789static PyObject * 2790hamt_tp_iter(PyHamtObject *self) 2791{ 2792 return _PyHamt_NewIterKeys(self); 2793} 2794 2795static PyObject * 2796hamt_py_set(PyHamtObject *self, PyObject *args) 2797{ 2798 PyObject *key; 2799 PyObject *val; 2800 2801 if (!PyArg_UnpackTuple(args, "set", 2, 2, &key, &val)) { 2802 return NULL; 2803 } 2804 2805 return (PyObject *)_PyHamt_Assoc(self, key, val); 2806} 2807 2808static PyObject * 2809hamt_py_get(PyHamtObject *self, PyObject *args) 2810{ 2811 PyObject *key; 2812 PyObject *def = NULL; 2813 2814 if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &def)) { 2815 return NULL; 2816 } 2817 2818 PyObject *val = NULL; 2819 hamt_find_t res = hamt_find(self, key, &val); 2820 switch (res) { 2821 case F_ERROR: 2822 return NULL; 2823 case F_FOUND: 2824 Py_INCREF(val); 2825 return val; 2826 case F_NOT_FOUND: 2827 if (def == NULL) { 2828 Py_RETURN_NONE; 2829 } 2830 Py_INCREF(def); 2831 return def; 2832 default: 2833 Py_UNREACHABLE(); 2834 } 2835} 2836 2837static PyObject * 2838hamt_py_delete(PyHamtObject *self, PyObject *key) 2839{ 2840 return (PyObject *)_PyHamt_Without(self, key); 2841} 2842 2843static PyObject * 2844hamt_py_items(PyHamtObject *self, PyObject *args) 2845{ 2846 return _PyHamt_NewIterItems(self); 2847} 2848 2849static PyObject * 2850hamt_py_values(PyHamtObject *self, PyObject *args) 2851{ 2852 return _PyHamt_NewIterValues(self); 2853} 2854 2855static PyObject * 2856hamt_py_keys(PyHamtObject *self, PyObject *Py_UNUSED(args)) 2857{ 2858 return _PyHamt_NewIterKeys(self); 2859} 2860 2861#ifdef Py_DEBUG 2862static PyObject * 2863hamt_py_dump(PyHamtObject *self, PyObject *Py_UNUSED(args)) 2864{ 2865 return hamt_dump(self); 2866} 2867#endif 2868 2869 2870static PyMethodDef PyHamt_methods[] = { 2871 {"set", _PyCFunction_CAST(hamt_py_set), METH_VARARGS, NULL}, 2872 {"get", _PyCFunction_CAST(hamt_py_get), METH_VARARGS, NULL}, 2873 {"delete", _PyCFunction_CAST(hamt_py_delete), METH_O, NULL}, 2874 {"items", _PyCFunction_CAST(hamt_py_items), METH_NOARGS, NULL}, 2875 {"keys", _PyCFunction_CAST(hamt_py_keys), METH_NOARGS, NULL}, 2876 {"values", _PyCFunction_CAST(hamt_py_values), METH_NOARGS, NULL}, 2877#ifdef Py_DEBUG 2878 {"__dump__", _PyCFunction_CAST(hamt_py_dump), METH_NOARGS, NULL}, 2879#endif 2880 {NULL, NULL} 2881}; 2882 2883static PySequenceMethods PyHamt_as_sequence = { 2884 0, /* sq_length */ 2885 0, /* sq_concat */ 2886 0, /* sq_repeat */ 2887 0, /* sq_item */ 2888 0, /* sq_slice */ 2889 0, /* sq_ass_item */ 2890 0, /* sq_ass_slice */ 2891 (objobjproc)hamt_tp_contains, /* sq_contains */ 2892 0, /* sq_inplace_concat */ 2893 0, /* sq_inplace_repeat */ 2894}; 2895 2896static PyMappingMethods PyHamt_as_mapping = { 2897 (lenfunc)hamt_tp_len, /* mp_length */ 2898 (binaryfunc)hamt_tp_subscript, /* mp_subscript */ 2899}; 2900 2901PyTypeObject _PyHamt_Type = { 2902 PyVarObject_HEAD_INIT(&PyType_Type, 0) 2903 "hamt", 2904 sizeof(PyHamtObject), 2905 .tp_methods = PyHamt_methods, 2906 .tp_as_mapping = &PyHamt_as_mapping, 2907 .tp_as_sequence = &PyHamt_as_sequence, 2908 .tp_iter = (getiterfunc)hamt_tp_iter, 2909 .tp_dealloc = (destructor)hamt_tp_dealloc, 2910 .tp_getattro = PyObject_GenericGetAttr, 2911 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, 2912 .tp_richcompare = hamt_tp_richcompare, 2913 .tp_traverse = (traverseproc)hamt_tp_traverse, 2914 .tp_clear = (inquiry)hamt_tp_clear, 2915 .tp_new = hamt_tp_new, 2916 .tp_weaklistoffset = offsetof(PyHamtObject, h_weakreflist), 2917 .tp_hash = PyObject_HashNotImplemented, 2918}; 2919 2920 2921/////////////////////////////////// Tree Node Types 2922 2923 2924PyTypeObject _PyHamt_ArrayNode_Type = { 2925 PyVarObject_HEAD_INIT(&PyType_Type, 0) 2926 "hamt_array_node", 2927 sizeof(PyHamtNode_Array), 2928 0, 2929 .tp_dealloc = (destructor)hamt_node_array_dealloc, 2930 .tp_getattro = PyObject_GenericGetAttr, 2931 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, 2932 .tp_traverse = (traverseproc)hamt_node_array_traverse, 2933 .tp_free = PyObject_GC_Del, 2934 .tp_hash = PyObject_HashNotImplemented, 2935}; 2936 2937PyTypeObject _PyHamt_BitmapNode_Type = { 2938 PyVarObject_HEAD_INIT(&PyType_Type, 0) 2939 "hamt_bitmap_node", 2940 sizeof(PyHamtNode_Bitmap) - sizeof(PyObject *), 2941 sizeof(PyObject *), 2942 .tp_dealloc = (destructor)hamt_node_bitmap_dealloc, 2943 .tp_getattro = PyObject_GenericGetAttr, 2944 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, 2945 .tp_traverse = (traverseproc)hamt_node_bitmap_traverse, 2946 .tp_free = PyObject_GC_Del, 2947 .tp_hash = PyObject_HashNotImplemented, 2948}; 2949 2950PyTypeObject _PyHamt_CollisionNode_Type = { 2951 PyVarObject_HEAD_INIT(&PyType_Type, 0) 2952 "hamt_collision_node", 2953 sizeof(PyHamtNode_Collision) - sizeof(PyObject *), 2954 sizeof(PyObject *), 2955 .tp_dealloc = (destructor)hamt_node_collision_dealloc, 2956 .tp_getattro = PyObject_GenericGetAttr, 2957 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, 2958 .tp_traverse = (traverseproc)hamt_node_collision_traverse, 2959 .tp_free = PyObject_GC_Del, 2960 .tp_hash = PyObject_HashNotImplemented, 2961}; 2962 2963 2964void 2965_PyHamt_Fini(PyInterpreterState *interp) 2966{ 2967 Py_CLEAR(_empty_hamt); 2968 Py_CLEAR(_empty_bitmap_node); 2969} 2970